diff options
Diffstat (limited to 'third_party/libwebrtc/audio')
66 files changed, 15754 insertions, 0 deletions
diff --git a/third_party/libwebrtc/audio/BUILD.gn b/third_party/libwebrtc/audio/BUILD.gn new file mode 100644 index 0000000000..ec09e5a350 --- /dev/null +++ b/third_party/libwebrtc/audio/BUILD.gn @@ -0,0 +1,250 @@ +# Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../webrtc.gni") +if (is_android) { + import("//build/config/android/config.gni") + import("//build/config/android/rules.gni") +} + +rtc_library("audio") { + sources = [ + "audio_level.cc", + "audio_level.h", + "audio_receive_stream.cc", + "audio_receive_stream.h", + "audio_send_stream.cc", + "audio_send_stream.h", + "audio_state.cc", + "audio_state.h", + "audio_transport_impl.cc", + "audio_transport_impl.h", + "channel_receive.cc", + "channel_receive.h", + "channel_receive_frame_transformer_delegate.cc", + "channel_receive_frame_transformer_delegate.h", + "channel_send.cc", + "channel_send.h", + "channel_send_frame_transformer_delegate.cc", + "channel_send_frame_transformer_delegate.h", + "conversion.h", + "remix_resample.cc", + "remix_resample.h", + ] + + deps = [ + "../api:array_view", + "../api:call_api", + "../api:field_trials_view", + "../api:frame_transformer_interface", + "../api:function_view", + "../api:rtp_headers", + "../api:rtp_parameters", + "../api:scoped_refptr", + "../api:sequence_checker", + "../api:transport_api", + "../api/audio:aec3_factory", + "../api/audio:audio_frame_api", + "../api/audio:audio_frame_processor", + "../api/audio:audio_mixer_api", + "../api/audio_codecs:audio_codecs_api", + "../api/crypto:frame_decryptor_interface", + "../api/crypto:frame_encryptor_interface", + "../api/crypto:options", + "../api/neteq:neteq_api", + "../api/rtc_event_log", + "../api/task_queue", + "../api/task_queue:pending_task_safety_flag", + "../api/transport/rtp:rtp_source", + "../api/units:time_delta", + "../call:audio_sender_interface", + "../call:bitrate_allocator", + "../call:call_interfaces", + "../call:rtp_interfaces", + "../common_audio", + "../common_audio:common_audio_c", + "../logging:rtc_event_audio", + "../logging:rtc_stream_config", + "../media:media_channel", + "../media:rtc_media_base", + "../modules/async_audio_processing", + "../modules/audio_coding", + "../modules/audio_coding:audio_coding_module_typedefs", + "../modules/audio_coding:audio_encoder_cng", + "../modules/audio_coding:audio_network_adaptor_config", + "../modules/audio_coding:red", + "../modules/audio_device", + "../modules/audio_processing", + "../modules/audio_processing:api", + "../modules/audio_processing:audio_frame_proxies", + "../modules/audio_processing:rms_level", + "../modules/pacing", + "../modules/rtp_rtcp", + "../modules/rtp_rtcp:rtp_rtcp_format", + "../rtc_base:audio_format_to_string", + "../rtc_base:buffer", + "../rtc_base:checks", + "../rtc_base:event_tracer", + "../rtc_base:logging", + "../rtc_base:macromagic", + "../rtc_base:race_checker", + "../rtc_base:rate_limiter", + "../rtc_base:refcount", + "../rtc_base:rtc_event", + "../rtc_base:rtc_numerics", + "../rtc_base:rtc_task_queue", + "../rtc_base:safe_conversions", + "../rtc_base:safe_minmax", + "../rtc_base:stringutils", + "../rtc_base:threading", + "../rtc_base:timeutils", + "../rtc_base/containers:flat_set", + "../rtc_base/experiments:field_trial_parser", + "../rtc_base/synchronization:mutex", + "../rtc_base/system:no_unique_address", + "../rtc_base/task_utils:repeating_task", + "../system_wrappers", + "../system_wrappers:field_trial", + "../system_wrappers:metrics", + "utility:audio_frame_operations", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/functional:any_invocable", + "//third_party/abseil-cpp/absl/memory", + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} +if (rtc_include_tests) { + rtc_library("audio_end_to_end_test") { + testonly = true + + sources = [ + "test/audio_end_to_end_test.cc", + "test/audio_end_to_end_test.h", + ] + deps = [ + ":audio", + "../api:simulated_network_api", + "../api/task_queue", + "../call:fake_network", + "../call:simulated_network", + "../modules/audio_device:audio_device_api", + "../modules/audio_device:test_audio_device_module", + "../system_wrappers", + "../test:test_common", + "../test:test_support", + "../test:video_test_constants", + ] + } + + rtc_library("audio_tests") { + testonly = true + + sources = [ + "audio_receive_stream_unittest.cc", + "audio_send_stream_tests.cc", + "audio_send_stream_unittest.cc", + "audio_state_unittest.cc", + "channel_receive_frame_transformer_delegate_unittest.cc", + "channel_send_frame_transformer_delegate_unittest.cc", + "channel_send_unittest.cc", + "mock_voe_channel_proxy.h", + "remix_resample_unittest.cc", + "test/audio_stats_test.cc", + "test/nack_test.cc", + "test/non_sender_rtt_test.cc", + ] + deps = [ + ":audio", + ":audio_end_to_end_test", + ":channel_receive_unittest", + "../api:libjingle_peerconnection_api", + "../api:mock_audio_mixer", + "../api:mock_frame_decryptor", + "../api:mock_frame_encryptor", + "../api:scoped_refptr", + "../api/audio:audio_frame_api", + "../api/audio_codecs:audio_codecs_api", + "../api/audio_codecs:builtin_audio_encoder_factory", + "../api/audio_codecs/opus:audio_decoder_opus", + "../api/audio_codecs/opus:audio_encoder_opus", + "../api/crypto:frame_decryptor_interface", + "../api/rtc_event_log", + "../api/task_queue:default_task_queue_factory", + "../api/task_queue/test:mock_task_queue_base", + "../api/units:time_delta", + "../api/units:timestamp", + "../call:mock_bitrate_allocator", + "../call:mock_call_interfaces", + "../call:mock_rtp_interfaces", + "../call:rtp_interfaces", + "../call:rtp_receiver", + "../call:rtp_sender", + "../common_audio", + "../logging:mocks", + "../modules/audio_device:audio_device_api", + "../modules/audio_device:audio_device_impl", # For TestAudioDeviceModule + "../modules/audio_device:mock_audio_device", + "../modules/audio_mixer:audio_mixer_impl", + "../modules/audio_mixer:audio_mixer_test_utils", + "../modules/audio_processing:audio_processing_statistics", + "../modules/audio_processing:mocks", + "../modules/pacing", + "../modules/rtp_rtcp:mock_rtp_rtcp", + "../modules/rtp_rtcp:rtp_rtcp_format", + "../rtc_base:checks", + "../rtc_base:gunit_helpers", + "../rtc_base:macromagic", + "../rtc_base:refcount", + "../rtc_base:rtc_base_tests_utils", + "../rtc_base:safe_compare", + "../rtc_base:task_queue_for_test", + "../rtc_base:threading", + "../rtc_base:timeutils", + "../system_wrappers", + "../test:audio_codec_mocks", + "../test:field_trial", + "../test:mock_frame_transformer", + "../test:mock_transformable_frame", + "../test:mock_transport", + "../test:rtp_test_utils", + "../test:run_loop", + "../test:scoped_key_value_config", + "../test:test_common", + "../test:test_support", + "../test:video_test_constants", + "../test/time_controller:time_controller", + "utility:utility_tests", + "//testing/gtest", + ] + } + + rtc_library("channel_receive_unittest") { + testonly = true + sources = [ "channel_receive_unittest.cc" ] + deps = [ + ":audio", + "../api/audio_codecs:builtin_audio_decoder_factory", + "../api/crypto:frame_decryptor_interface", + "../api/task_queue:default_task_queue_factory", + "../logging:mocks", + "../modules/audio_device:audio_device_api", + "../modules/audio_device:mock_audio_device", + "../modules/rtp_rtcp", + "../modules/rtp_rtcp:rtp_rtcp_format", + "../rtc_base:logging", + "../rtc_base:threading", + "../test:audio_codec_mocks", + "../test:mock_transport", + "../test:test_support", + "../test/time_controller", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] + } +} diff --git a/third_party/libwebrtc/audio/DEPS b/third_party/libwebrtc/audio/DEPS new file mode 100644 index 0000000000..7a0c7e7ce6 --- /dev/null +++ b/third_party/libwebrtc/audio/DEPS @@ -0,0 +1,27 @@ +include_rules = [ + "+call", + "+common_audio", + "+logging/rtc_event_log", + "+media/base", + "+modules/async_audio_processing", + "+modules/audio_coding", + "+modules/audio_device", + "+modules/audio_mixer", + "+modules/audio_processing", + "+modules/audio_processing/include", + "+modules/bitrate_controller", + "+modules/congestion_controller", + "+modules/pacing", + "+modules/rtp_rtcp", + "+modules/utility", + "+system_wrappers", +] + +specific_include_rules = { + "audio_send_stream.cc": [ + "+modules/audio_coding/codecs/cng/audio_encoder_cng.h", + ], + "audio_transport_impl.h": [ + "+modules/audio_processing/typing_detection.h", + ] +} diff --git a/third_party/libwebrtc/audio/OWNERS b/third_party/libwebrtc/audio/OWNERS new file mode 100644 index 0000000000..e629bc1815 --- /dev/null +++ b/third_party/libwebrtc/audio/OWNERS @@ -0,0 +1,5 @@ +alessiob@webrtc.org +gustaf@webrtc.org +henrik.lundin@webrtc.org +jakobi@webrtc.org +peah@webrtc.org diff --git a/third_party/libwebrtc/audio/audio_gn/moz.build b/third_party/libwebrtc/audio/audio_gn/moz.build new file mode 100644 index 0000000000..e81a4f673b --- /dev/null +++ b/third_party/libwebrtc/audio/audio_gn/moz.build @@ -0,0 +1,249 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +SOURCES += [ + "/third_party/libwebrtc/audio/channel_send_frame_transformer_delegate.cc" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/audio/audio_level.cc", + "/third_party/libwebrtc/audio/audio_receive_stream.cc", + "/third_party/libwebrtc/audio/audio_send_stream.cc", + "/third_party/libwebrtc/audio/audio_state.cc", + "/third_party/libwebrtc/audio/audio_transport_impl.cc", + "/third_party/libwebrtc/audio/channel_receive.cc", + "/third_party/libwebrtc/audio/channel_receive_frame_transformer_delegate.cc", + "/third_party/libwebrtc/audio/channel_send.cc", + "/third_party/libwebrtc/audio/remix_resample.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_LIBEVENT"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "GLESv2", + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_ENABLE_LIBEVENT"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_LIBEVENT"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["RTC_ENABLE_WIN_WGC"] = True + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["TARGET_CPU"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["TARGET_CPU"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["TARGET_CPU"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["TARGET_CPU"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["TARGET_CPU"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["TARGET_CPU"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["OS_TARGET"] == "Android" and CONFIG["TARGET_CPU"] == "arm": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["OS_TARGET"] == "Android" and CONFIG["TARGET_CPU"] == "x86": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "aarch64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "arm": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "x86": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "x86_64": + + DEFINES["_GNU_SOURCE"] = True + +Library("audio_gn") diff --git a/third_party/libwebrtc/audio/audio_level.cc b/third_party/libwebrtc/audio/audio_level.cc new file mode 100644 index 0000000000..7874b73f1c --- /dev/null +++ b/third_party/libwebrtc/audio/audio_level.cc @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/audio_level.h" + +#include "api/audio/audio_frame.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" + +namespace webrtc { +namespace voe { + +AudioLevel::AudioLevel() + : abs_max_(0), count_(0), current_level_full_range_(0) {} + +AudioLevel::~AudioLevel() {} + +void AudioLevel::Reset() { + MutexLock lock(&mutex_); + abs_max_ = 0; + count_ = 0; + current_level_full_range_ = 0; + total_energy_ = 0.0; + total_duration_ = 0.0; +} + +int16_t AudioLevel::LevelFullRange() const { + MutexLock lock(&mutex_); + return current_level_full_range_; +} + +void AudioLevel::ResetLevelFullRange() { + MutexLock lock(&mutex_); + abs_max_ = 0; + count_ = 0; + current_level_full_range_ = 0; +} + +double AudioLevel::TotalEnergy() const { + MutexLock lock(&mutex_); + return total_energy_; +} + +double AudioLevel::TotalDuration() const { + MutexLock lock(&mutex_); + return total_duration_; +} + +void AudioLevel::ComputeLevel(const AudioFrame& audioFrame, double duration) { + // Check speech level (works for 2 channels as well) + int16_t abs_value = + audioFrame.muted() + ? 0 + : WebRtcSpl_MaxAbsValueW16( + audioFrame.data(), + audioFrame.samples_per_channel_ * audioFrame.num_channels_); + + // Protect member access using a lock since this method is called on a + // dedicated audio thread in the RecordedDataIsAvailable() callback. + MutexLock lock(&mutex_); + + if (abs_value > abs_max_) + abs_max_ = abs_value; + + // Update level approximately 9 times per second, assuming audio frame + // duration is approximately 10 ms. (The update frequency is every + // 11th (= |kUpdateFrequency+1|) call: 1000/(11*10)=9.09..., we should + // probably change this behavior, see https://crbug.com/webrtc/10784). + if (count_++ == kUpdateFrequency) { + current_level_full_range_ = abs_max_; + + count_ = 0; + + // Decay the absolute maximum (divide by 4) + abs_max_ >>= 2; + } + + // See the description for "totalAudioEnergy" in the WebRTC stats spec + // (https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy) + // for an explanation of these formulas. In short, we need a value that can + // be used to compute RMS audio levels over different time intervals, by + // taking the difference between the results from two getStats calls. To do + // this, the value needs to be of units "squared sample value * time". + double additional_energy = + static_cast<double>(current_level_full_range_) / INT16_MAX; + additional_energy *= additional_energy; + total_energy_ += additional_energy * duration; + total_duration_ += duration; +} + +} // namespace voe +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/audio_level.h b/third_party/libwebrtc/audio/audio_level.h new file mode 100644 index 0000000000..acd1231fe2 --- /dev/null +++ b/third_party/libwebrtc/audio/audio_level.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_AUDIO_LEVEL_H_ +#define AUDIO_AUDIO_LEVEL_H_ + +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/thread_annotations.h" + +namespace webrtc { + +class AudioFrame; +namespace voe { + +// This class is thread-safe. However, TotalEnergy() and TotalDuration() are +// related, so if you call ComputeLevel() on a different thread than you read +// these values, you still need to use lock to read them as a pair. +class AudioLevel { + public: + AudioLevel(); + ~AudioLevel(); + void Reset(); + + // Returns the current audio level linearly [0,32767], which gets updated + // every "kUpdateFrequency+1" call to ComputeLevel() based on the maximum + // audio level of any audio frame, decaying by a factor of 1/4 each time + // LevelFullRange() gets updated. + // Called on "API thread(s)" from APIs like VoEBase::CreateChannel(), + // VoEBase::StopSend(). + int16_t LevelFullRange() const; + void ResetLevelFullRange(); + // See the description for "totalAudioEnergy" in the WebRTC stats spec + // (https://w3c.github.io/webrtc-stats/#dom-rtcaudiohandlerstats-totalaudioenergy) + // In our implementation, the total audio energy increases by the + // energy-equivalent of LevelFullRange() at the time of ComputeLevel(), rather + // than the energy of the samples in that specific audio frame. As a result, + // we may report a higher audio energy and audio level than the spec mandates. + // TODO(https://crbug.com/webrtc/10784): We should either do what the spec + // says or update the spec to match our implementation. If we want to have a + // decaying audio level we should probably update both the spec and the + // implementation to reduce the complexity of the definition. If we want to + // continue to have decaying audio we should have unittests covering the + // behavior of the decay. + double TotalEnergy() const; + double TotalDuration() const; + + // Called on a native capture audio thread (platform dependent) from the + // AudioTransport::RecordedDataIsAvailable() callback. + // In Chrome, this method is called on the AudioInputDevice thread. + void ComputeLevel(const AudioFrame& audioFrame, double duration); + + private: + enum { kUpdateFrequency = 10 }; + + mutable Mutex mutex_; + + int16_t abs_max_ RTC_GUARDED_BY(mutex_); + int16_t count_ RTC_GUARDED_BY(mutex_); + int16_t current_level_full_range_ RTC_GUARDED_BY(mutex_); + + double total_energy_ RTC_GUARDED_BY(mutex_) = 0.0; + double total_duration_ RTC_GUARDED_BY(mutex_) = 0.0; +}; + +} // namespace voe +} // namespace webrtc + +#endif // AUDIO_AUDIO_LEVEL_H_ diff --git a/third_party/libwebrtc/audio/audio_receive_stream.cc b/third_party/libwebrtc/audio/audio_receive_stream.cc new file mode 100644 index 0000000000..c49b83f95f --- /dev/null +++ b/third_party/libwebrtc/audio/audio_receive_stream.cc @@ -0,0 +1,475 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/audio_receive_stream.h" + +#include <string> +#include <utility> + +#include "absl/memory/memory.h" +#include "api/array_view.h" +#include "api/audio_codecs/audio_format.h" +#include "api/call/audio_sink.h" +#include "api/rtp_parameters.h" +#include "api/sequence_checker.h" +#include "audio/audio_send_stream.h" +#include "audio/audio_state.h" +#include "audio/channel_receive.h" +#include "audio/conversion.h" +#include "call/rtp_config.h" +#include "call/rtp_stream_receiver_controller_interface.h" +#include "modules/rtp_rtcp/source/rtp_packet_received.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/time_utils.h" + +namespace webrtc { + +std::string AudioReceiveStreamInterface::Config::Rtp::ToString() const { + char ss_buf[1024]; + rtc::SimpleStringBuilder ss(ss_buf); + ss << "{remote_ssrc: " << remote_ssrc; + ss << ", local_ssrc: " << local_ssrc; + ss << ", nack: " << nack.ToString(); + ss << ", rtcp_event_observer: " + << (rtcp_event_observer ? "(rtcp_event_observer)" : "nullptr"); + ss << '}'; + return ss.str(); +} + +std::string AudioReceiveStreamInterface::Config::ToString() const { + char ss_buf[1024]; + rtc::SimpleStringBuilder ss(ss_buf); + ss << "{rtp: " << rtp.ToString(); + ss << ", rtcp_send_transport: " + << (rtcp_send_transport ? "(Transport)" : "null"); + if (!sync_group.empty()) { + ss << ", sync_group: " << sync_group; + } + ss << '}'; + return ss.str(); +} + +namespace { +std::unique_ptr<voe::ChannelReceiveInterface> CreateChannelReceive( + Clock* clock, + webrtc::AudioState* audio_state, + NetEqFactory* neteq_factory, + const webrtc::AudioReceiveStreamInterface::Config& config, + RtcEventLog* event_log) { + RTC_DCHECK(audio_state); + internal::AudioState* internal_audio_state = + static_cast<internal::AudioState*>(audio_state); + return voe::CreateChannelReceive( + clock, neteq_factory, internal_audio_state->audio_device_module(), + config.rtcp_send_transport, event_log, config.rtp.local_ssrc, + config.rtp.remote_ssrc, config.jitter_buffer_max_packets, + config.jitter_buffer_fast_accelerate, config.jitter_buffer_min_delay_ms, + config.enable_non_sender_rtt, config.decoder_factory, + config.codec_pair_id, std::move(config.frame_decryptor), + config.crypto_options, std::move(config.frame_transformer), + config.rtp.rtcp_event_observer); +} +} // namespace + +AudioReceiveStreamImpl::AudioReceiveStreamImpl( + Clock* clock, + PacketRouter* packet_router, + NetEqFactory* neteq_factory, + const webrtc::AudioReceiveStreamInterface::Config& config, + const rtc::scoped_refptr<webrtc::AudioState>& audio_state, + webrtc::RtcEventLog* event_log) + : AudioReceiveStreamImpl(clock, + packet_router, + config, + audio_state, + event_log, + CreateChannelReceive(clock, + audio_state.get(), + neteq_factory, + config, + event_log)) {} + +AudioReceiveStreamImpl::AudioReceiveStreamImpl( + Clock* clock, + PacketRouter* packet_router, + const webrtc::AudioReceiveStreamInterface::Config& config, + const rtc::scoped_refptr<webrtc::AudioState>& audio_state, + webrtc::RtcEventLog* event_log, + std::unique_ptr<voe::ChannelReceiveInterface> channel_receive) + : config_(config), + audio_state_(audio_state), + source_tracker_(clock), + channel_receive_(std::move(channel_receive)) { + RTC_LOG(LS_INFO) << "AudioReceiveStreamImpl: " << config.rtp.remote_ssrc; + RTC_DCHECK(config.decoder_factory); + RTC_DCHECK(config.rtcp_send_transport); + RTC_DCHECK(audio_state_); + RTC_DCHECK(channel_receive_); + + RTC_DCHECK(packet_router); + // Configure bandwidth estimation. + channel_receive_->RegisterReceiverCongestionControlObjects(packet_router); + + // When output is muted, ChannelReceive will directly notify the source + // tracker of "delivered" frames, so RtpReceiver information will continue to + // be updated. + channel_receive_->SetSourceTracker(&source_tracker_); + + // Complete configuration. + // TODO(solenberg): Config NACK history window (which is a packet count), + // using the actual packet size for the configured codec. + channel_receive_->SetNACKStatus(config.rtp.nack.rtp_history_ms != 0, + config.rtp.nack.rtp_history_ms / 20); + channel_receive_->SetReceiveCodecs(config.decoder_map); + // `frame_transformer` and `frame_decryptor` have been given to + // `channel_receive_` already. +} + +AudioReceiveStreamImpl::~AudioReceiveStreamImpl() { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + RTC_LOG(LS_INFO) << "~AudioReceiveStreamImpl: " << remote_ssrc(); + Stop(); + channel_receive_->SetAssociatedSendChannel(nullptr); + channel_receive_->ResetReceiverCongestionControlObjects(); +} + +void AudioReceiveStreamImpl::RegisterWithTransport( + RtpStreamReceiverControllerInterface* receiver_controller) { + RTC_DCHECK_RUN_ON(&packet_sequence_checker_); + RTC_DCHECK(!rtp_stream_receiver_); + rtp_stream_receiver_ = receiver_controller->CreateReceiver( + remote_ssrc(), channel_receive_.get()); +} + +void AudioReceiveStreamImpl::UnregisterFromTransport() { + RTC_DCHECK_RUN_ON(&packet_sequence_checker_); + rtp_stream_receiver_.reset(); +} + +void AudioReceiveStreamImpl::ReconfigureForTesting( + const webrtc::AudioReceiveStreamInterface::Config& config) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + + // SSRC can't be changed mid-stream. + RTC_DCHECK_EQ(remote_ssrc(), config.rtp.remote_ssrc); + RTC_DCHECK_EQ(local_ssrc(), config.rtp.local_ssrc); + + // Configuration parameters which cannot be changed. + RTC_DCHECK_EQ(config_.rtcp_send_transport, config.rtcp_send_transport); + // Decoder factory cannot be changed because it is configured at + // voe::Channel construction time. + RTC_DCHECK_EQ(config_.decoder_factory, config.decoder_factory); + + // TODO(solenberg): Config NACK history window (which is a packet count), + // using the actual packet size for the configured codec. + RTC_DCHECK_EQ(config_.rtp.nack.rtp_history_ms, config.rtp.nack.rtp_history_ms) + << "Use SetUseTransportCcAndNackHistory"; + + RTC_DCHECK(config_.decoder_map == config.decoder_map) << "Use SetDecoderMap"; + RTC_DCHECK_EQ(config_.frame_transformer, config.frame_transformer) + << "Use SetDepacketizerToDecoderFrameTransformer"; + + config_ = config; +} + +void AudioReceiveStreamImpl::Start() { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + if (playing_) { + return; + } + channel_receive_->StartPlayout(); + playing_ = true; + audio_state()->AddReceivingStream(this); +} + +void AudioReceiveStreamImpl::Stop() { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + if (!playing_) { + return; + } + channel_receive_->StopPlayout(); + playing_ = false; + audio_state()->RemoveReceivingStream(this); +} + +bool AudioReceiveStreamImpl::IsRunning() const { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + return playing_; +} + +void AudioReceiveStreamImpl::SetDepacketizerToDecoderFrameTransformer( + rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + channel_receive_->SetDepacketizerToDecoderFrameTransformer( + std::move(frame_transformer)); +} + +void AudioReceiveStreamImpl::SetDecoderMap( + std::map<int, SdpAudioFormat> decoder_map) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + config_.decoder_map = std::move(decoder_map); + channel_receive_->SetReceiveCodecs(config_.decoder_map); +} + +void AudioReceiveStreamImpl::SetNackHistory(int history_ms) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + RTC_DCHECK_GE(history_ms, 0); + + if (config_.rtp.nack.rtp_history_ms == history_ms) + return; + + config_.rtp.nack.rtp_history_ms = history_ms; + // TODO(solenberg): Config NACK history window (which is a packet count), + // using the actual packet size for the configured codec. + channel_receive_->SetNACKStatus(history_ms != 0, history_ms / 20); +} + +void AudioReceiveStreamImpl::SetNonSenderRttMeasurement(bool enabled) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + config_.enable_non_sender_rtt = enabled; + channel_receive_->SetNonSenderRttMeasurement(enabled); +} + +void AudioReceiveStreamImpl::SetFrameDecryptor( + rtc::scoped_refptr<webrtc::FrameDecryptorInterface> frame_decryptor) { + // TODO(bugs.webrtc.org/11993): This is called via WebRtcAudioReceiveStream, + // expect to be called on the network thread. + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + channel_receive_->SetFrameDecryptor(std::move(frame_decryptor)); +} + +webrtc::AudioReceiveStreamInterface::Stats AudioReceiveStreamImpl::GetStats( + bool get_and_clear_legacy_stats) const { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + webrtc::AudioReceiveStreamInterface::Stats stats; + stats.remote_ssrc = remote_ssrc(); + + webrtc::CallReceiveStatistics call_stats = + channel_receive_->GetRTCPStatistics(); + // TODO(solenberg): Don't return here if we can't get the codec - return the + // stats we *can* get. + auto receive_codec = channel_receive_->GetReceiveCodec(); + if (!receive_codec) { + return stats; + } + + stats.payload_bytes_received = call_stats.payload_bytes_received; + stats.header_and_padding_bytes_received = + call_stats.header_and_padding_bytes_received; + stats.packets_received = call_stats.packetsReceived; + stats.packets_lost = call_stats.cumulativeLost; + stats.nacks_sent = call_stats.nacks_sent; + stats.capture_start_ntp_time_ms = call_stats.capture_start_ntp_time_ms_; + stats.last_packet_received = call_stats.last_packet_received; + stats.codec_name = receive_codec->second.name; + stats.codec_payload_type = receive_codec->first; + int clockrate_khz = receive_codec->second.clockrate_hz / 1000; + if (clockrate_khz > 0) { + stats.jitter_ms = call_stats.jitterSamples / clockrate_khz; + } + stats.delay_estimate_ms = channel_receive_->GetDelayEstimate(); + stats.audio_level = channel_receive_->GetSpeechOutputLevelFullRange(); + stats.total_output_energy = channel_receive_->GetTotalOutputEnergy(); + stats.total_output_duration = channel_receive_->GetTotalOutputDuration(); + stats.estimated_playout_ntp_timestamp_ms = + channel_receive_->GetCurrentEstimatedPlayoutNtpTimestampMs( + rtc::TimeMillis()); + + // Get jitter buffer and total delay (alg + jitter + playout) stats. + auto ns = channel_receive_->GetNetworkStatistics(get_and_clear_legacy_stats); + stats.packets_discarded = ns.packetsDiscarded; + stats.fec_packets_received = ns.fecPacketsReceived; + stats.fec_packets_discarded = ns.fecPacketsDiscarded; + stats.jitter_buffer_ms = ns.currentBufferSize; + stats.jitter_buffer_preferred_ms = ns.preferredBufferSize; + stats.total_samples_received = ns.totalSamplesReceived; + stats.concealed_samples = ns.concealedSamples; + stats.silent_concealed_samples = ns.silentConcealedSamples; + stats.concealment_events = ns.concealmentEvents; + stats.jitter_buffer_delay_seconds = + static_cast<double>(ns.jitterBufferDelayMs) / + static_cast<double>(rtc::kNumMillisecsPerSec); + stats.jitter_buffer_emitted_count = ns.jitterBufferEmittedCount; + stats.jitter_buffer_target_delay_seconds = + static_cast<double>(ns.jitterBufferTargetDelayMs) / + static_cast<double>(rtc::kNumMillisecsPerSec); + stats.jitter_buffer_minimum_delay_seconds = + static_cast<double>(ns.jitterBufferMinimumDelayMs) / + static_cast<double>(rtc::kNumMillisecsPerSec); + stats.inserted_samples_for_deceleration = ns.insertedSamplesForDeceleration; + stats.removed_samples_for_acceleration = ns.removedSamplesForAcceleration; + stats.expand_rate = Q14ToFloat(ns.currentExpandRate); + stats.speech_expand_rate = Q14ToFloat(ns.currentSpeechExpandRate); + stats.secondary_decoded_rate = Q14ToFloat(ns.currentSecondaryDecodedRate); + stats.secondary_discarded_rate = Q14ToFloat(ns.currentSecondaryDiscardedRate); + stats.accelerate_rate = Q14ToFloat(ns.currentAccelerateRate); + stats.preemptive_expand_rate = Q14ToFloat(ns.currentPreemptiveRate); + stats.jitter_buffer_flushes = ns.packetBufferFlushes; + stats.delayed_packet_outage_samples = ns.delayedPacketOutageSamples; + stats.relative_packet_arrival_delay_seconds = + static_cast<double>(ns.relativePacketArrivalDelayMs) / + static_cast<double>(rtc::kNumMillisecsPerSec); + stats.interruption_count = ns.interruptionCount; + stats.total_interruption_duration_ms = ns.totalInterruptionDurationMs; + + auto ds = channel_receive_->GetDecodingCallStatistics(); + stats.decoding_calls_to_silence_generator = ds.calls_to_silence_generator; + stats.decoding_calls_to_neteq = ds.calls_to_neteq; + stats.decoding_normal = ds.decoded_normal; + stats.decoding_plc = ds.decoded_neteq_plc; + stats.decoding_codec_plc = ds.decoded_codec_plc; + stats.decoding_cng = ds.decoded_cng; + stats.decoding_plc_cng = ds.decoded_plc_cng; + stats.decoding_muted_output = ds.decoded_muted_output; + + stats.last_sender_report_timestamp_ms = + call_stats.last_sender_report_timestamp_ms; + stats.last_sender_report_remote_timestamp_ms = + call_stats.last_sender_report_remote_timestamp_ms; + stats.sender_reports_packets_sent = call_stats.sender_reports_packets_sent; + stats.sender_reports_bytes_sent = call_stats.sender_reports_bytes_sent; + stats.sender_reports_reports_count = call_stats.sender_reports_reports_count; + stats.round_trip_time = call_stats.round_trip_time; + stats.round_trip_time_measurements = call_stats.round_trip_time_measurements; + stats.total_round_trip_time = call_stats.total_round_trip_time; + + return stats; +} + +void AudioReceiveStreamImpl::SetSink(AudioSinkInterface* sink) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + channel_receive_->SetSink(sink); +} + +void AudioReceiveStreamImpl::SetGain(float gain) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + channel_receive_->SetChannelOutputVolumeScaling(gain); +} + +bool AudioReceiveStreamImpl::SetBaseMinimumPlayoutDelayMs(int delay_ms) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + return channel_receive_->SetBaseMinimumPlayoutDelayMs(delay_ms); +} + +int AudioReceiveStreamImpl::GetBaseMinimumPlayoutDelayMs() const { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + return channel_receive_->GetBaseMinimumPlayoutDelayMs(); +} + +std::vector<RtpSource> AudioReceiveStreamImpl::GetSources() const { + return source_tracker_.GetSources(); +} + +AudioMixer::Source::AudioFrameInfo +AudioReceiveStreamImpl::GetAudioFrameWithInfo(int sample_rate_hz, + AudioFrame* audio_frame) { + AudioMixer::Source::AudioFrameInfo audio_frame_info = + channel_receive_->GetAudioFrameWithInfo(sample_rate_hz, audio_frame); + if (audio_frame_info != AudioMixer::Source::AudioFrameInfo::kError && + !audio_frame->packet_infos_.empty()) { + source_tracker_.OnFrameDelivered(audio_frame->packet_infos_); + } + return audio_frame_info; +} + +int AudioReceiveStreamImpl::Ssrc() const { + return remote_ssrc(); +} + +int AudioReceiveStreamImpl::PreferredSampleRate() const { + return channel_receive_->PreferredSampleRate(); +} + +uint32_t AudioReceiveStreamImpl::id() const { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + return remote_ssrc(); +} + +absl::optional<Syncable::Info> AudioReceiveStreamImpl::GetInfo() const { + // TODO(bugs.webrtc.org/11993): This is called via RtpStreamsSynchronizer, + // expect to be called on the network thread. + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + return channel_receive_->GetSyncInfo(); +} + +bool AudioReceiveStreamImpl::GetPlayoutRtpTimestamp(uint32_t* rtp_timestamp, + int64_t* time_ms) const { + // Called on video capture thread. + return channel_receive_->GetPlayoutRtpTimestamp(rtp_timestamp, time_ms); +} + +void AudioReceiveStreamImpl::SetEstimatedPlayoutNtpTimestampMs( + int64_t ntp_timestamp_ms, + int64_t time_ms) { + // Called on video capture thread. + channel_receive_->SetEstimatedPlayoutNtpTimestampMs(ntp_timestamp_ms, + time_ms); +} + +bool AudioReceiveStreamImpl::SetMinimumPlayoutDelay(int delay_ms) { + // TODO(bugs.webrtc.org/11993): This is called via RtpStreamsSynchronizer, + // expect to be called on the network thread. + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + return channel_receive_->SetMinimumPlayoutDelay(delay_ms); +} + +void AudioReceiveStreamImpl::AssociateSendStream( + internal::AudioSendStream* send_stream) { + RTC_DCHECK_RUN_ON(&packet_sequence_checker_); + channel_receive_->SetAssociatedSendChannel( + send_stream ? send_stream->GetChannel() : nullptr); + associated_send_stream_ = send_stream; +} + +void AudioReceiveStreamImpl::DeliverRtcp(const uint8_t* packet, size_t length) { + // TODO(solenberg): Tests call this function on a network thread, libjingle + // calls on the worker thread. We should move towards always using a network + // thread. Then this check can be enabled. + // RTC_DCHECK(!thread_checker_.IsCurrent()); + channel_receive_->ReceivedRTCPPacket(packet, length); +} + +void AudioReceiveStreamImpl::SetSyncGroup(absl::string_view sync_group) { + RTC_DCHECK_RUN_ON(&packet_sequence_checker_); + config_.sync_group = std::string(sync_group); +} + +void AudioReceiveStreamImpl::SetLocalSsrc(uint32_t local_ssrc) { + RTC_DCHECK_RUN_ON(&packet_sequence_checker_); + // TODO(tommi): Consider storing local_ssrc in one place. + config_.rtp.local_ssrc = local_ssrc; + channel_receive_->OnLocalSsrcChange(local_ssrc); +} + +uint32_t AudioReceiveStreamImpl::local_ssrc() const { + RTC_DCHECK_RUN_ON(&packet_sequence_checker_); + RTC_DCHECK_EQ(config_.rtp.local_ssrc, channel_receive_->GetLocalSsrc()); + return config_.rtp.local_ssrc; +} + +const std::string& AudioReceiveStreamImpl::sync_group() const { + RTC_DCHECK_RUN_ON(&packet_sequence_checker_); + return config_.sync_group; +} + +const AudioSendStream* +AudioReceiveStreamImpl::GetAssociatedSendStreamForTesting() const { + RTC_DCHECK_RUN_ON(&packet_sequence_checker_); + return associated_send_stream_; +} + +internal::AudioState* AudioReceiveStreamImpl::audio_state() const { + auto* audio_state = static_cast<internal::AudioState*>(audio_state_.get()); + RTC_DCHECK(audio_state); + return audio_state; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/audio_receive_stream.h b/third_party/libwebrtc/audio/audio_receive_stream.h new file mode 100644 index 0000000000..db49631638 --- /dev/null +++ b/third_party/libwebrtc/audio/audio_receive_stream.h @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_AUDIO_RECEIVE_STREAM_H_ +#define AUDIO_AUDIO_RECEIVE_STREAM_H_ + +#include <map> +#include <memory> +#include <string> +#include <vector> + +#include "absl/strings/string_view.h" +#include "api/audio/audio_mixer.h" +#include "api/neteq/neteq_factory.h" +#include "api/rtp_headers.h" +#include "api/sequence_checker.h" +#include "audio/audio_state.h" +#include "call/audio_receive_stream.h" +#include "call/syncable.h" +#include "modules/rtp_rtcp/source/source_tracker.h" +#include "rtc_base/system/no_unique_address.h" +#include "system_wrappers/include/clock.h" + +namespace webrtc { +class PacketRouter; +class RtcEventLog; +class RtpStreamReceiverControllerInterface; +class RtpStreamReceiverInterface; + +namespace voe { +class ChannelReceiveInterface; +} // namespace voe + +namespace internal { +class AudioSendStream; +} // namespace internal + +class AudioReceiveStreamImpl final : public webrtc::AudioReceiveStreamInterface, + public AudioMixer::Source, + public Syncable { + public: + AudioReceiveStreamImpl( + Clock* clock, + PacketRouter* packet_router, + NetEqFactory* neteq_factory, + const webrtc::AudioReceiveStreamInterface::Config& config, + const rtc::scoped_refptr<webrtc::AudioState>& audio_state, + webrtc::RtcEventLog* event_log); + // For unit tests, which need to supply a mock channel receive. + AudioReceiveStreamImpl( + Clock* clock, + PacketRouter* packet_router, + const webrtc::AudioReceiveStreamInterface::Config& config, + const rtc::scoped_refptr<webrtc::AudioState>& audio_state, + webrtc::RtcEventLog* event_log, + std::unique_ptr<voe::ChannelReceiveInterface> channel_receive); + + AudioReceiveStreamImpl() = delete; + AudioReceiveStreamImpl(const AudioReceiveStreamImpl&) = delete; + AudioReceiveStreamImpl& operator=(const AudioReceiveStreamImpl&) = delete; + + // Destruction happens on the worker thread. Prior to destruction the caller + // must ensure that a registration with the transport has been cleared. See + // `RegisterWithTransport` for details. + // TODO(tommi): As a further improvement to this, performing the full + // destruction on the network thread could be made the default. + ~AudioReceiveStreamImpl() override; + + // Called on the network thread to register/unregister with the network + // transport. + void RegisterWithTransport( + RtpStreamReceiverControllerInterface* receiver_controller); + // If registration has previously been done (via `RegisterWithTransport`) then + // `UnregisterFromTransport` must be called prior to destruction, on the + // network thread. + void UnregisterFromTransport(); + + // webrtc::AudioReceiveStreamInterface implementation. + void Start() override; + void Stop() override; + bool IsRunning() const override; + void SetDepacketizerToDecoderFrameTransformer( + rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer) + override; + void SetDecoderMap(std::map<int, SdpAudioFormat> decoder_map) override; + void SetNackHistory(int history_ms) override; + void SetNonSenderRttMeasurement(bool enabled) override; + void SetFrameDecryptor(rtc::scoped_refptr<webrtc::FrameDecryptorInterface> + frame_decryptor) override; + + webrtc::AudioReceiveStreamInterface::Stats GetStats( + bool get_and_clear_legacy_stats) const override; + void SetSink(AudioSinkInterface* sink) override; + void SetGain(float gain) override; + bool SetBaseMinimumPlayoutDelayMs(int delay_ms) override; + int GetBaseMinimumPlayoutDelayMs() const override; + std::vector<webrtc::RtpSource> GetSources() const override; + + // AudioMixer::Source + AudioFrameInfo GetAudioFrameWithInfo(int sample_rate_hz, + AudioFrame* audio_frame) override; + int Ssrc() const override; + int PreferredSampleRate() const override; + + // Syncable + uint32_t id() const override; + absl::optional<Syncable::Info> GetInfo() const override; + bool GetPlayoutRtpTimestamp(uint32_t* rtp_timestamp, + int64_t* time_ms) const override; + void SetEstimatedPlayoutNtpTimestampMs(int64_t ntp_timestamp_ms, + int64_t time_ms) override; + bool SetMinimumPlayoutDelay(int delay_ms) override; + + void AssociateSendStream(internal::AudioSendStream* send_stream); + void DeliverRtcp(const uint8_t* packet, size_t length); + + void SetSyncGroup(absl::string_view sync_group); + + void SetLocalSsrc(uint32_t local_ssrc); + + uint32_t local_ssrc() const; + + uint32_t remote_ssrc() const override { + // The remote_ssrc member variable of config_ will never change and can be + // considered const. + return config_.rtp.remote_ssrc; + } + + // Returns a reference to the currently set sync group of the stream. + // Must be called on the packet delivery thread. + const std::string& sync_group() const; + + const AudioSendStream* GetAssociatedSendStreamForTesting() const; + + // TODO(tommi): Remove this method. + void ReconfigureForTesting( + const webrtc::AudioReceiveStreamInterface::Config& config); + + private: + internal::AudioState* audio_state() const; + + RTC_NO_UNIQUE_ADDRESS SequenceChecker worker_thread_checker_; + // TODO(bugs.webrtc.org/11993): This checker conceptually represents + // operations that belong to the network thread. The Call class is currently + // moving towards handling network packets on the network thread and while + // that work is ongoing, this checker may in practice represent the worker + // thread, but still serves as a mechanism of grouping together concepts + // that belong to the network thread. Once the packets are fully delivered + // on the network thread, this comment will be deleted. + RTC_NO_UNIQUE_ADDRESS SequenceChecker packet_sequence_checker_{ + SequenceChecker::kDetached}; + webrtc::AudioReceiveStreamInterface::Config config_; + rtc::scoped_refptr<webrtc::AudioState> audio_state_; + SourceTracker source_tracker_; + const std::unique_ptr<voe::ChannelReceiveInterface> channel_receive_; + AudioSendStream* associated_send_stream_ + RTC_GUARDED_BY(packet_sequence_checker_) = nullptr; + + bool playing_ RTC_GUARDED_BY(worker_thread_checker_) = false; + + std::unique_ptr<RtpStreamReceiverInterface> rtp_stream_receiver_ + RTC_GUARDED_BY(packet_sequence_checker_); +}; +} // namespace webrtc + +#endif // AUDIO_AUDIO_RECEIVE_STREAM_H_ diff --git a/third_party/libwebrtc/audio/audio_receive_stream_unittest.cc b/third_party/libwebrtc/audio/audio_receive_stream_unittest.cc new file mode 100644 index 0000000000..451d5f9b91 --- /dev/null +++ b/third_party/libwebrtc/audio/audio_receive_stream_unittest.cc @@ -0,0 +1,431 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/audio_receive_stream.h" + +#include <map> +#include <string> +#include <utility> +#include <vector> + +#include "api/test/mock_audio_mixer.h" +#include "api/test/mock_frame_decryptor.h" +#include "audio/conversion.h" +#include "audio/mock_voe_channel_proxy.h" +#include "call/rtp_stream_receiver_controller.h" +#include "logging/rtc_event_log/mock/mock_rtc_event_log.h" +#include "modules/audio_device/include/mock_audio_device.h" +#include "modules/audio_processing/include/mock_audio_processing.h" +#include "modules/pacing/packet_router.h" +#include "modules/rtp_rtcp/source/byte_io.h" +#include "rtc_base/time_utils.h" +#include "test/gtest.h" +#include "test/mock_audio_decoder_factory.h" +#include "test/mock_transport.h" +#include "test/run_loop.h" + +namespace webrtc { +namespace test { +namespace { + +using ::testing::_; +using ::testing::FloatEq; +using ::testing::NiceMock; +using ::testing::Return; + +AudioDecodingCallStats MakeAudioDecodeStatsForTest() { + AudioDecodingCallStats audio_decode_stats; + audio_decode_stats.calls_to_silence_generator = 234; + audio_decode_stats.calls_to_neteq = 567; + audio_decode_stats.decoded_normal = 890; + audio_decode_stats.decoded_neteq_plc = 123; + audio_decode_stats.decoded_codec_plc = 124; + audio_decode_stats.decoded_cng = 456; + audio_decode_stats.decoded_plc_cng = 789; + audio_decode_stats.decoded_muted_output = 987; + return audio_decode_stats; +} + +const uint32_t kRemoteSsrc = 1234; +const uint32_t kLocalSsrc = 5678; +const int kJitterBufferDelay = -7; +const int kPlayoutBufferDelay = 302; +const unsigned int kSpeechOutputLevel = 99; +const double kTotalOutputEnergy = 0.25; +const double kTotalOutputDuration = 0.5; +const int64_t kPlayoutNtpTimestampMs = 5678; + +const CallReceiveStatistics kCallStats = {678, 234, -12, 567, 78, 890, 123}; +const std::pair<int, SdpAudioFormat> kReceiveCodec = { + 123, + {"codec_name_recv", 96000, 0}}; +const NetworkStatistics kNetworkStats = { + /*currentBufferSize=*/123, + /*preferredBufferSize=*/456, + /*jitterPeaksFound=*/false, + /*totalSamplesReceived=*/789012, + /*concealedSamples=*/3456, + /*silentConcealedSamples=*/123, + /*concealmentEvents=*/456, + /*jitterBufferDelayMs=*/789, + /*jitterBufferEmittedCount=*/543, + /*jitterBufferTargetDelayMs=*/123, + /*jitterBufferMinimumDelayMs=*/222, + /*insertedSamplesForDeceleration=*/432, + /*removedSamplesForAcceleration=*/321, + /*fecPacketsReceived=*/123, + /*fecPacketsDiscarded=*/101, + /*packetsDiscarded=*/989, + /*currentExpandRate=*/789, + /*currentSpeechExpandRate=*/12, + /*currentPreemptiveRate=*/345, + /*currentAccelerateRate =*/678, + /*currentSecondaryDecodedRate=*/901, + /*currentSecondaryDiscardedRate=*/0, + /*meanWaitingTimeMs=*/-1, + /*maxWaitingTimeMs=*/-1, + /*packetBufferFlushes=*/0, + /*delayedPacketOutageSamples=*/0, + /*relativePacketArrivalDelayMs=*/135, + /*interruptionCount=*/-1, + /*totalInterruptionDurationMs=*/-1}; +const AudioDecodingCallStats kAudioDecodeStats = MakeAudioDecodeStatsForTest(); + +struct ConfigHelper { + explicit ConfigHelper(bool use_null_audio_processing) + : ConfigHelper(rtc::make_ref_counted<MockAudioMixer>(), + use_null_audio_processing) {} + + ConfigHelper(rtc::scoped_refptr<MockAudioMixer> audio_mixer, + bool use_null_audio_processing) + : audio_mixer_(audio_mixer) { + using ::testing::Invoke; + + AudioState::Config config; + config.audio_mixer = audio_mixer_; + config.audio_processing = + use_null_audio_processing + ? nullptr + : rtc::make_ref_counted<NiceMock<MockAudioProcessing>>(); + config.audio_device_module = + rtc::make_ref_counted<testing::NiceMock<MockAudioDeviceModule>>(); + audio_state_ = AudioState::Create(config); + + channel_receive_ = new ::testing::StrictMock<MockChannelReceive>(); + EXPECT_CALL(*channel_receive_, SetNACKStatus(true, 15)).Times(1); + EXPECT_CALL(*channel_receive_, + RegisterReceiverCongestionControlObjects(&packet_router_)) + .Times(1); + EXPECT_CALL(*channel_receive_, ResetReceiverCongestionControlObjects()) + .Times(1); + EXPECT_CALL(*channel_receive_, SetAssociatedSendChannel(nullptr)).Times(1); + EXPECT_CALL(*channel_receive_, SetReceiveCodecs(_)) + .WillRepeatedly(Invoke([](const std::map<int, SdpAudioFormat>& codecs) { + EXPECT_THAT(codecs, ::testing::IsEmpty()); + })); + EXPECT_CALL(*channel_receive_, SetSourceTracker(_)); + EXPECT_CALL(*channel_receive_, GetLocalSsrc()) + .WillRepeatedly(Return(kLocalSsrc)); + + stream_config_.rtp.local_ssrc = kLocalSsrc; + stream_config_.rtp.remote_ssrc = kRemoteSsrc; + stream_config_.rtp.nack.rtp_history_ms = 300; + stream_config_.rtcp_send_transport = &rtcp_send_transport_; + stream_config_.decoder_factory = + rtc::make_ref_counted<MockAudioDecoderFactory>(); + } + + std::unique_ptr<AudioReceiveStreamImpl> CreateAudioReceiveStream() { + auto ret = std::make_unique<AudioReceiveStreamImpl>( + Clock::GetRealTimeClock(), &packet_router_, stream_config_, + audio_state_, &event_log_, + std::unique_ptr<voe::ChannelReceiveInterface>(channel_receive_)); + ret->RegisterWithTransport(&rtp_stream_receiver_controller_); + return ret; + } + + AudioReceiveStreamInterface::Config& config() { return stream_config_; } + rtc::scoped_refptr<MockAudioMixer> audio_mixer() { return audio_mixer_; } + MockChannelReceive* channel_receive() { return channel_receive_; } + + void SetupMockForGetStats() { + using ::testing::DoAll; + using ::testing::SetArgPointee; + + ASSERT_TRUE(channel_receive_); + EXPECT_CALL(*channel_receive_, GetRTCPStatistics()) + .WillOnce(Return(kCallStats)); + EXPECT_CALL(*channel_receive_, GetDelayEstimate()) + .WillOnce(Return(kJitterBufferDelay + kPlayoutBufferDelay)); + EXPECT_CALL(*channel_receive_, GetSpeechOutputLevelFullRange()) + .WillOnce(Return(kSpeechOutputLevel)); + EXPECT_CALL(*channel_receive_, GetTotalOutputEnergy()) + .WillOnce(Return(kTotalOutputEnergy)); + EXPECT_CALL(*channel_receive_, GetTotalOutputDuration()) + .WillOnce(Return(kTotalOutputDuration)); + EXPECT_CALL(*channel_receive_, GetNetworkStatistics(_)) + .WillOnce(Return(kNetworkStats)); + EXPECT_CALL(*channel_receive_, GetDecodingCallStatistics()) + .WillOnce(Return(kAudioDecodeStats)); + EXPECT_CALL(*channel_receive_, GetReceiveCodec()) + .WillOnce(Return(kReceiveCodec)); + EXPECT_CALL(*channel_receive_, GetCurrentEstimatedPlayoutNtpTimestampMs(_)) + .WillOnce(Return(kPlayoutNtpTimestampMs)); + } + + private: + PacketRouter packet_router_; + MockRtcEventLog event_log_; + rtc::scoped_refptr<AudioState> audio_state_; + rtc::scoped_refptr<MockAudioMixer> audio_mixer_; + AudioReceiveStreamInterface::Config stream_config_; + ::testing::StrictMock<MockChannelReceive>* channel_receive_ = nullptr; + RtpStreamReceiverController rtp_stream_receiver_controller_; + MockTransport rtcp_send_transport_; +}; + +const std::vector<uint8_t> CreateRtcpSenderReport() { + std::vector<uint8_t> packet; + const size_t kRtcpSrLength = 28; // In bytes. + packet.resize(kRtcpSrLength); + packet[0] = 0x80; // Version 2. + packet[1] = 0xc8; // PT = 200, SR. + // Length in number of 32-bit words - 1. + ByteWriter<uint16_t>::WriteBigEndian(&packet[2], 6); + ByteWriter<uint32_t>::WriteBigEndian(&packet[4], kLocalSsrc); + return packet; +} +} // namespace + +TEST(AudioReceiveStreamTest, ConfigToString) { + AudioReceiveStreamInterface::Config config; + config.rtp.remote_ssrc = kRemoteSsrc; + config.rtp.local_ssrc = kLocalSsrc; + EXPECT_EQ( + "{rtp: {remote_ssrc: 1234, local_ssrc: 5678, nack: " + "{rtp_history_ms: 0}}, " + "rtcp_send_transport: null}", + config.ToString()); +} + +TEST(AudioReceiveStreamTest, ConstructDestruct) { + test::RunLoop loop; + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(use_null_audio_processing); + auto recv_stream = helper.CreateAudioReceiveStream(); + recv_stream->UnregisterFromTransport(); + } +} + +TEST(AudioReceiveStreamTest, ReceiveRtcpPacket) { + test::RunLoop loop; + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(use_null_audio_processing); + auto recv_stream = helper.CreateAudioReceiveStream(); + std::vector<uint8_t> rtcp_packet = CreateRtcpSenderReport(); + EXPECT_CALL(*helper.channel_receive(), + ReceivedRTCPPacket(&rtcp_packet[0], rtcp_packet.size())) + .WillOnce(Return()); + recv_stream->DeliverRtcp(&rtcp_packet[0], rtcp_packet.size()); + recv_stream->UnregisterFromTransport(); + } +} + +TEST(AudioReceiveStreamTest, GetStats) { + test::RunLoop loop; + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(use_null_audio_processing); + auto recv_stream = helper.CreateAudioReceiveStream(); + helper.SetupMockForGetStats(); + AudioReceiveStreamInterface::Stats stats = + recv_stream->GetStats(/*get_and_clear_legacy_stats=*/true); + EXPECT_EQ(kRemoteSsrc, stats.remote_ssrc); + EXPECT_EQ(kCallStats.payload_bytes_received, stats.payload_bytes_received); + EXPECT_EQ(kCallStats.header_and_padding_bytes_received, + stats.header_and_padding_bytes_received); + EXPECT_EQ(static_cast<uint32_t>(kCallStats.packetsReceived), + stats.packets_received); + EXPECT_EQ(kCallStats.cumulativeLost, stats.packets_lost); + EXPECT_EQ(kReceiveCodec.second.name, stats.codec_name); + EXPECT_EQ( + kCallStats.jitterSamples / (kReceiveCodec.second.clockrate_hz / 1000), + stats.jitter_ms); + EXPECT_EQ(kNetworkStats.currentBufferSize, stats.jitter_buffer_ms); + EXPECT_EQ(kNetworkStats.preferredBufferSize, + stats.jitter_buffer_preferred_ms); + EXPECT_EQ(static_cast<uint32_t>(kJitterBufferDelay + kPlayoutBufferDelay), + stats.delay_estimate_ms); + EXPECT_EQ(static_cast<int32_t>(kSpeechOutputLevel), stats.audio_level); + EXPECT_EQ(kTotalOutputEnergy, stats.total_output_energy); + EXPECT_EQ(kNetworkStats.totalSamplesReceived, stats.total_samples_received); + EXPECT_EQ(kTotalOutputDuration, stats.total_output_duration); + EXPECT_EQ(kNetworkStats.concealedSamples, stats.concealed_samples); + EXPECT_EQ(kNetworkStats.concealmentEvents, stats.concealment_events); + EXPECT_EQ(static_cast<double>(kNetworkStats.jitterBufferDelayMs) / + static_cast<double>(rtc::kNumMillisecsPerSec), + stats.jitter_buffer_delay_seconds); + EXPECT_EQ(kNetworkStats.jitterBufferEmittedCount, + stats.jitter_buffer_emitted_count); + EXPECT_EQ(static_cast<double>(kNetworkStats.jitterBufferTargetDelayMs) / + static_cast<double>(rtc::kNumMillisecsPerSec), + stats.jitter_buffer_target_delay_seconds); + EXPECT_EQ(static_cast<double>(kNetworkStats.jitterBufferMinimumDelayMs) / + static_cast<double>(rtc::kNumMillisecsPerSec), + stats.jitter_buffer_minimum_delay_seconds); + EXPECT_EQ(kNetworkStats.insertedSamplesForDeceleration, + stats.inserted_samples_for_deceleration); + EXPECT_EQ(kNetworkStats.removedSamplesForAcceleration, + stats.removed_samples_for_acceleration); + EXPECT_EQ(kNetworkStats.fecPacketsReceived, stats.fec_packets_received); + EXPECT_EQ(kNetworkStats.fecPacketsDiscarded, stats.fec_packets_discarded); + EXPECT_EQ(kNetworkStats.packetsDiscarded, stats.packets_discarded); + EXPECT_EQ(Q14ToFloat(kNetworkStats.currentExpandRate), stats.expand_rate); + EXPECT_EQ(Q14ToFloat(kNetworkStats.currentSpeechExpandRate), + stats.speech_expand_rate); + EXPECT_EQ(Q14ToFloat(kNetworkStats.currentSecondaryDecodedRate), + stats.secondary_decoded_rate); + EXPECT_EQ(Q14ToFloat(kNetworkStats.currentSecondaryDiscardedRate), + stats.secondary_discarded_rate); + EXPECT_EQ(Q14ToFloat(kNetworkStats.currentAccelerateRate), + stats.accelerate_rate); + EXPECT_EQ(Q14ToFloat(kNetworkStats.currentPreemptiveRate), + stats.preemptive_expand_rate); + EXPECT_EQ(kNetworkStats.packetBufferFlushes, stats.jitter_buffer_flushes); + EXPECT_EQ(kNetworkStats.delayedPacketOutageSamples, + stats.delayed_packet_outage_samples); + EXPECT_EQ(static_cast<double>(kNetworkStats.relativePacketArrivalDelayMs) / + static_cast<double>(rtc::kNumMillisecsPerSec), + stats.relative_packet_arrival_delay_seconds); + EXPECT_EQ(kNetworkStats.interruptionCount, stats.interruption_count); + EXPECT_EQ(kNetworkStats.totalInterruptionDurationMs, + stats.total_interruption_duration_ms); + + EXPECT_EQ(kAudioDecodeStats.calls_to_silence_generator, + stats.decoding_calls_to_silence_generator); + EXPECT_EQ(kAudioDecodeStats.calls_to_neteq, stats.decoding_calls_to_neteq); + EXPECT_EQ(kAudioDecodeStats.decoded_normal, stats.decoding_normal); + EXPECT_EQ(kAudioDecodeStats.decoded_neteq_plc, stats.decoding_plc); + EXPECT_EQ(kAudioDecodeStats.decoded_codec_plc, stats.decoding_codec_plc); + EXPECT_EQ(kAudioDecodeStats.decoded_cng, stats.decoding_cng); + EXPECT_EQ(kAudioDecodeStats.decoded_plc_cng, stats.decoding_plc_cng); + EXPECT_EQ(kAudioDecodeStats.decoded_muted_output, + stats.decoding_muted_output); + EXPECT_EQ(kCallStats.capture_start_ntp_time_ms_, + stats.capture_start_ntp_time_ms); + EXPECT_EQ(kPlayoutNtpTimestampMs, stats.estimated_playout_ntp_timestamp_ms); + recv_stream->UnregisterFromTransport(); + } +} + +TEST(AudioReceiveStreamTest, SetGain) { + test::RunLoop loop; + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(use_null_audio_processing); + auto recv_stream = helper.CreateAudioReceiveStream(); + EXPECT_CALL(*helper.channel_receive(), + SetChannelOutputVolumeScaling(FloatEq(0.765f))); + recv_stream->SetGain(0.765f); + recv_stream->UnregisterFromTransport(); + } +} + +TEST(AudioReceiveStreamTest, StreamsShouldBeAddedToMixerOnceOnStart) { + test::RunLoop loop; + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper1(use_null_audio_processing); + ConfigHelper helper2(helper1.audio_mixer(), use_null_audio_processing); + auto recv_stream1 = helper1.CreateAudioReceiveStream(); + auto recv_stream2 = helper2.CreateAudioReceiveStream(); + + EXPECT_CALL(*helper1.channel_receive(), StartPlayout()).Times(1); + EXPECT_CALL(*helper2.channel_receive(), StartPlayout()).Times(1); + EXPECT_CALL(*helper1.channel_receive(), StopPlayout()).Times(1); + EXPECT_CALL(*helper2.channel_receive(), StopPlayout()).Times(1); + EXPECT_CALL(*helper1.audio_mixer(), AddSource(recv_stream1.get())) + .WillOnce(Return(true)); + EXPECT_CALL(*helper1.audio_mixer(), AddSource(recv_stream2.get())) + .WillOnce(Return(true)); + EXPECT_CALL(*helper1.audio_mixer(), RemoveSource(recv_stream1.get())) + .Times(1); + EXPECT_CALL(*helper1.audio_mixer(), RemoveSource(recv_stream2.get())) + .Times(1); + + recv_stream1->Start(); + recv_stream2->Start(); + + // One more should not result in any more mixer sources added. + recv_stream1->Start(); + + // Stop stream before it is being destructed. + recv_stream2->Stop(); + + recv_stream1->UnregisterFromTransport(); + recv_stream2->UnregisterFromTransport(); + } +} + +TEST(AudioReceiveStreamTest, ReconfigureWithUpdatedConfig) { + test::RunLoop loop; + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(use_null_audio_processing); + auto recv_stream = helper.CreateAudioReceiveStream(); + + auto new_config = helper.config(); + + MockChannelReceive& channel_receive = *helper.channel_receive(); + + // TODO(tommi, nisse): This applies new extensions to the internal config, + // but there's nothing that actually verifies that the changes take effect. + // In fact Call manages the extensions separately in Call::ReceiveRtpConfig + // and changing this config value (there seem to be a few copies), doesn't + // affect that logic. + recv_stream->ReconfigureForTesting(new_config); + + new_config.decoder_map.emplace(1, SdpAudioFormat("foo", 8000, 1)); + EXPECT_CALL(channel_receive, SetReceiveCodecs(new_config.decoder_map)); + recv_stream->SetDecoderMap(new_config.decoder_map); + + EXPECT_CALL(channel_receive, SetNACKStatus(true, 15 + 1)).Times(1); + recv_stream->SetNackHistory(300 + 20); + + recv_stream->UnregisterFromTransport(); + } +} + +TEST(AudioReceiveStreamTest, ReconfigureWithFrameDecryptor) { + test::RunLoop loop; + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(use_null_audio_processing); + auto recv_stream = helper.CreateAudioReceiveStream(); + + auto new_config_0 = helper.config(); + rtc::scoped_refptr<FrameDecryptorInterface> mock_frame_decryptor_0( + rtc::make_ref_counted<MockFrameDecryptor>()); + new_config_0.frame_decryptor = mock_frame_decryptor_0; + + // TODO(tommi): While this changes the internal config value, it doesn't + // actually change what frame_decryptor is used. WebRtcAudioReceiveStream + // recreates the whole instance in order to change this value. + // So, it's not clear if changing this post initialization needs to be + // supported. + recv_stream->ReconfigureForTesting(new_config_0); + + auto new_config_1 = helper.config(); + rtc::scoped_refptr<FrameDecryptorInterface> mock_frame_decryptor_1( + rtc::make_ref_counted<MockFrameDecryptor>()); + new_config_1.frame_decryptor = mock_frame_decryptor_1; + new_config_1.crypto_options.sframe.require_frame_encryption = true; + recv_stream->ReconfigureForTesting(new_config_1); + recv_stream->UnregisterFromTransport(); + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/audio_send_stream.cc b/third_party/libwebrtc/audio/audio_send_stream.cc new file mode 100644 index 0000000000..bffb910832 --- /dev/null +++ b/third_party/libwebrtc/audio/audio_send_stream.cc @@ -0,0 +1,921 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/audio_send_stream.h" + +#include <memory> +#include <string> +#include <utility> +#include <vector> + +#include "api/audio_codecs/audio_encoder.h" +#include "api/audio_codecs/audio_encoder_factory.h" +#include "api/audio_codecs/audio_format.h" +#include "api/call/transport.h" +#include "api/crypto/frame_encryptor_interface.h" +#include "api/function_view.h" +#include "api/rtc_event_log/rtc_event_log.h" +#include "api/task_queue/task_queue_base.h" +#include "audio/audio_state.h" +#include "audio/channel_send.h" +#include "audio/conversion.h" +#include "call/rtp_config.h" +#include "call/rtp_transport_controller_send_interface.h" +#include "common_audio/vad/include/vad.h" +#include "logging/rtc_event_log/events/rtc_event_audio_send_stream_config.h" +#include "logging/rtc_event_log/rtc_stream_config.h" +#include "media/base/media_channel.h" +#include "modules/audio_coding/codecs/cng/audio_encoder_cng.h" +#include "modules/audio_coding/codecs/red/audio_encoder_copy_red.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/rtp_rtcp/source/rtp_header_extensions.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/strings/audio_format_to_string.h" +#include "rtc_base/trace_event.h" + +namespace webrtc { +namespace { + +void UpdateEventLogStreamConfig(RtcEventLog* event_log, + const AudioSendStream::Config& config, + const AudioSendStream::Config* old_config) { + using SendCodecSpec = AudioSendStream::Config::SendCodecSpec; + // Only update if any of the things we log have changed. + auto payload_types_equal = [](const absl::optional<SendCodecSpec>& a, + const absl::optional<SendCodecSpec>& b) { + if (a.has_value() && b.has_value()) { + return a->format.name == b->format.name && + a->payload_type == b->payload_type; + } + return !a.has_value() && !b.has_value(); + }; + + if (old_config && config.rtp.ssrc == old_config->rtp.ssrc && + config.rtp.extensions == old_config->rtp.extensions && + payload_types_equal(config.send_codec_spec, + old_config->send_codec_spec)) { + return; + } + + auto rtclog_config = std::make_unique<rtclog::StreamConfig>(); + rtclog_config->local_ssrc = config.rtp.ssrc; + rtclog_config->rtp_extensions = config.rtp.extensions; + if (config.send_codec_spec) { + rtclog_config->codecs.emplace_back(config.send_codec_spec->format.name, + config.send_codec_spec->payload_type, 0); + } + event_log->Log(std::make_unique<RtcEventAudioSendStreamConfig>( + std::move(rtclog_config))); +} + +} // namespace + +constexpr char AudioAllocationConfig::kKey[]; + +std::unique_ptr<StructParametersParser> AudioAllocationConfig::Parser() { + return StructParametersParser::Create( // + "min", &min_bitrate, // + "max", &max_bitrate, // + "prio_rate", &priority_bitrate, // + "prio_rate_raw", &priority_bitrate_raw, // + "rate_prio", &bitrate_priority); +} + +AudioAllocationConfig::AudioAllocationConfig( + const FieldTrialsView& field_trials) { + Parser()->Parse(field_trials.Lookup(kKey)); + if (priority_bitrate_raw && !priority_bitrate.IsZero()) { + RTC_LOG(LS_WARNING) << "'priority_bitrate' and '_raw' are mutually " + "exclusive but both were configured."; + } +} + +namespace internal { +AudioSendStream::AudioSendStream( + Clock* clock, + const webrtc::AudioSendStream::Config& config, + const rtc::scoped_refptr<webrtc::AudioState>& audio_state, + TaskQueueFactory* task_queue_factory, + RtpTransportControllerSendInterface* rtp_transport, + BitrateAllocatorInterface* bitrate_allocator, + RtcEventLog* event_log, + RtcpRttStats* rtcp_rtt_stats, + const absl::optional<RtpState>& suspended_rtp_state, + const FieldTrialsView& field_trials) + : AudioSendStream(clock, + config, + audio_state, + task_queue_factory, + rtp_transport, + bitrate_allocator, + event_log, + suspended_rtp_state, + voe::CreateChannelSend(clock, + task_queue_factory, + config.send_transport, + rtcp_rtt_stats, + event_log, + config.frame_encryptor.get(), + config.crypto_options, + config.rtp.extmap_allow_mixed, + config.rtcp_report_interval_ms, + config.rtp.ssrc, + config.frame_transformer, + rtp_transport, + field_trials), + field_trials) {} + +AudioSendStream::AudioSendStream( + Clock* clock, + const webrtc::AudioSendStream::Config& config, + const rtc::scoped_refptr<webrtc::AudioState>& audio_state, + TaskQueueFactory* task_queue_factory, + RtpTransportControllerSendInterface* rtp_transport, + BitrateAllocatorInterface* bitrate_allocator, + RtcEventLog* event_log, + const absl::optional<RtpState>& suspended_rtp_state, + std::unique_ptr<voe::ChannelSendInterface> channel_send, + const FieldTrialsView& field_trials) + : clock_(clock), + field_trials_(field_trials), + allocate_audio_without_feedback_( + field_trials_.IsEnabled("WebRTC-Audio-ABWENoTWCC")), + enable_audio_alr_probing_( + !field_trials_.IsDisabled("WebRTC-Audio-AlrProbing")), + allocation_settings_(field_trials_), + config_(Config(/*send_transport=*/nullptr)), + audio_state_(audio_state), + channel_send_(std::move(channel_send)), + event_log_(event_log), + use_legacy_overhead_calculation_( + field_trials_.IsEnabled("WebRTC-Audio-LegacyOverhead")), + bitrate_allocator_(bitrate_allocator), + rtp_transport_(rtp_transport), + rtp_rtcp_module_(channel_send_->GetRtpRtcp()), + suspended_rtp_state_(suspended_rtp_state) { + RTC_LOG(LS_INFO) << "AudioSendStream: " << config.rtp.ssrc; + RTC_DCHECK(audio_state_); + RTC_DCHECK(channel_send_); + RTC_DCHECK(bitrate_allocator_); + RTC_DCHECK(rtp_transport); + + RTC_DCHECK(rtp_rtcp_module_); + + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + ConfigureStream(config, true, nullptr); + UpdateCachedTargetAudioBitrateConstraints(); +} + +AudioSendStream::~AudioSendStream() { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + RTC_LOG(LS_INFO) << "~AudioSendStream: " << config_.rtp.ssrc; + RTC_DCHECK(!sending_); + channel_send_->ResetSenderCongestionControlObjects(); +} + +const webrtc::AudioSendStream::Config& AudioSendStream::GetConfig() const { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + return config_; +} + +void AudioSendStream::Reconfigure( + const webrtc::AudioSendStream::Config& new_config, + SetParametersCallback callback) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + ConfigureStream(new_config, false, std::move(callback)); +} + +AudioSendStream::ExtensionIds AudioSendStream::FindExtensionIds( + const std::vector<RtpExtension>& extensions) { + ExtensionIds ids; + for (const auto& extension : extensions) { + if (extension.uri == RtpExtension::kAudioLevelUri) { + ids.audio_level = extension.id; + } else if (extension.uri == RtpExtension::kAbsSendTimeUri) { + ids.abs_send_time = extension.id; + } else if (extension.uri == RtpExtension::kTransportSequenceNumberUri) { + ids.transport_sequence_number = extension.id; + } else if (extension.uri == RtpExtension::kMidUri) { + ids.mid = extension.id; + } else if (extension.uri == RtpExtension::kRidUri) { + ids.rid = extension.id; + } else if (extension.uri == RtpExtension::kRepairedRidUri) { + ids.repaired_rid = extension.id; + } else if (extension.uri == RtpExtension::kAbsoluteCaptureTimeUri) { + ids.abs_capture_time = extension.id; + } + } + return ids; +} + +int AudioSendStream::TransportSeqNumId(const AudioSendStream::Config& config) { + return FindExtensionIds(config.rtp.extensions).transport_sequence_number; +} + +void AudioSendStream::ConfigureStream( + const webrtc::AudioSendStream::Config& new_config, + bool first_time, + SetParametersCallback callback) { + RTC_LOG(LS_INFO) << "AudioSendStream::ConfigureStream: " + << new_config.ToString(); + UpdateEventLogStreamConfig(event_log_, new_config, + first_time ? nullptr : &config_); + + const auto& old_config = config_; + + // Configuration parameters which cannot be changed. + RTC_DCHECK(first_time || + old_config.send_transport == new_config.send_transport); + RTC_DCHECK(first_time || old_config.rtp.ssrc == new_config.rtp.ssrc); + if (suspended_rtp_state_ && first_time) { + rtp_rtcp_module_->SetRtpState(*suspended_rtp_state_); + } + if (first_time || old_config.rtp.c_name != new_config.rtp.c_name) { + channel_send_->SetRTCP_CNAME(new_config.rtp.c_name); + } + + // Enable the frame encryptor if a new frame encryptor has been provided. + if (first_time || new_config.frame_encryptor != old_config.frame_encryptor) { + channel_send_->SetFrameEncryptor(new_config.frame_encryptor); + } + + if (first_time || + new_config.frame_transformer != old_config.frame_transformer) { + channel_send_->SetEncoderToPacketizerFrameTransformer( + new_config.frame_transformer); + } + + if (first_time || + new_config.rtp.extmap_allow_mixed != old_config.rtp.extmap_allow_mixed) { + rtp_rtcp_module_->SetExtmapAllowMixed(new_config.rtp.extmap_allow_mixed); + } + + const ExtensionIds old_ids = FindExtensionIds(old_config.rtp.extensions); + const ExtensionIds new_ids = FindExtensionIds(new_config.rtp.extensions); + + // Audio level indication + if (first_time || new_ids.audio_level != old_ids.audio_level) { + channel_send_->SetSendAudioLevelIndicationStatus(new_ids.audio_level != 0, + new_ids.audio_level); + } + + if (first_time || new_ids.abs_send_time != old_ids.abs_send_time) { + absl::string_view uri = AbsoluteSendTime::Uri(); + rtp_rtcp_module_->DeregisterSendRtpHeaderExtension(uri); + if (new_ids.abs_send_time) { + rtp_rtcp_module_->RegisterRtpHeaderExtension(uri, new_ids.abs_send_time); + } + } + + bool transport_seq_num_id_changed = + new_ids.transport_sequence_number != old_ids.transport_sequence_number; + if (first_time || + (transport_seq_num_id_changed && !allocate_audio_without_feedback_)) { + if (!first_time) { + channel_send_->ResetSenderCongestionControlObjects(); + } + + if (!allocate_audio_without_feedback_ && + new_ids.transport_sequence_number != 0) { + rtp_rtcp_module_->RegisterRtpHeaderExtension( + TransportSequenceNumber::Uri(), new_ids.transport_sequence_number); + // Probing in application limited region is only used in combination with + // send side congestion control, wich depends on feedback packets which + // requires transport sequence numbers to be enabled. + // Optionally request ALR probing but do not override any existing + // request from other streams. + if (enable_audio_alr_probing_) { + rtp_transport_->EnablePeriodicAlrProbing(true); + } + } + channel_send_->RegisterSenderCongestionControlObjects(rtp_transport_); + } + // MID RTP header extension. + if ((first_time || new_ids.mid != old_ids.mid || + new_config.rtp.mid != old_config.rtp.mid) && + new_ids.mid != 0 && !new_config.rtp.mid.empty()) { + rtp_rtcp_module_->RegisterRtpHeaderExtension(RtpMid::Uri(), new_ids.mid); + rtp_rtcp_module_->SetMid(new_config.rtp.mid); + } + + if (first_time || new_ids.abs_capture_time != old_ids.abs_capture_time) { + absl::string_view uri = AbsoluteCaptureTimeExtension::Uri(); + rtp_rtcp_module_->DeregisterSendRtpHeaderExtension(uri); + if (new_ids.abs_capture_time) { + rtp_rtcp_module_->RegisterRtpHeaderExtension(uri, + new_ids.abs_capture_time); + } + } + + if (!ReconfigureSendCodec(new_config)) { + RTC_LOG(LS_ERROR) << "Failed to set up send codec state."; + + webrtc::InvokeSetParametersCallback( + callback, webrtc::RTCError(webrtc::RTCErrorType::INTERNAL_ERROR, + "Failed to set up send codec state.")); + } + + // Set currently known overhead (used in ANA, opus only). + { + MutexLock lock(&overhead_per_packet_lock_); + UpdateOverheadForEncoder(); + } + + channel_send_->CallEncoder([this](AudioEncoder* encoder) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + if (!encoder) { + return; + } + frame_length_range_ = encoder->GetFrameLengthRange(); + UpdateCachedTargetAudioBitrateConstraints(); + }); + + if (sending_) { + ReconfigureBitrateObserver(new_config); + } + + config_ = new_config; + if (!first_time) { + UpdateCachedTargetAudioBitrateConstraints(); + } + + webrtc::InvokeSetParametersCallback(callback, webrtc::RTCError::OK()); +} + +void AudioSendStream::Start() { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + if (sending_) { + return; + } + if (!config_.has_dscp && config_.min_bitrate_bps != -1 && + config_.max_bitrate_bps != -1 && + (allocate_audio_without_feedback_ || TransportSeqNumId(config_) != 0)) { + rtp_transport_->AccountForAudioPacketsInPacedSender(true); + rtp_transport_->IncludeOverheadInPacedSender(); + rtp_rtcp_module_->SetAsPartOfAllocation(true); + ConfigureBitrateObserver(); + } else { + rtp_rtcp_module_->SetAsPartOfAllocation(false); + } + channel_send_->StartSend(); + sending_ = true; + audio_state()->AddSendingStream(this, encoder_sample_rate_hz_, + encoder_num_channels_); +} + +void AudioSendStream::Stop() { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + if (!sending_) { + return; + } + + RemoveBitrateObserver(); + channel_send_->StopSend(); + sending_ = false; + audio_state()->RemoveSendingStream(this); +} + +void AudioSendStream::SendAudioData(std::unique_ptr<AudioFrame> audio_frame) { + RTC_CHECK_RUNS_SERIALIZED(&audio_capture_race_checker_); + RTC_DCHECK_GT(audio_frame->sample_rate_hz_, 0); + TRACE_EVENT0("webrtc", "AudioSendStream::SendAudioData"); + double duration = static_cast<double>(audio_frame->samples_per_channel_) / + audio_frame->sample_rate_hz_; + { + // Note: SendAudioData() passes the frame further down the pipeline and it + // may eventually get sent. But this method is invoked even if we are not + // connected, as long as we have an AudioSendStream (created as a result of + // an O/A exchange). This means that we are calculating audio levels whether + // or not we are sending samples. + // TODO(https://crbug.com/webrtc/10771): All "media-source" related stats + // should move from send-streams to the local audio sources or tracks; a + // send-stream should not be required to read the microphone audio levels. + MutexLock lock(&audio_level_lock_); + audio_level_.ComputeLevel(*audio_frame, duration); + } + channel_send_->ProcessAndEncodeAudio(std::move(audio_frame)); +} + +bool AudioSendStream::SendTelephoneEvent(int payload_type, + int payload_frequency, + int event, + int duration_ms) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + channel_send_->SetSendTelephoneEventPayloadType(payload_type, + payload_frequency); + return channel_send_->SendTelephoneEventOutband(event, duration_ms); +} + +void AudioSendStream::SetMuted(bool muted) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + channel_send_->SetInputMute(muted); +} + +webrtc::AudioSendStream::Stats AudioSendStream::GetStats() const { + return GetStats(true); +} + +webrtc::AudioSendStream::Stats AudioSendStream::GetStats( + bool has_remote_tracks) const { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + webrtc::AudioSendStream::Stats stats; + stats.local_ssrc = config_.rtp.ssrc; + stats.target_bitrate_bps = channel_send_->GetTargetBitrate(); + + webrtc::CallSendStatistics call_stats = channel_send_->GetRTCPStatistics(); + stats.rtcp_packet_type_counts = call_stats.rtcp_packet_type_counts; + stats.payload_bytes_sent = call_stats.payload_bytes_sent; + stats.header_and_padding_bytes_sent = + call_stats.header_and_padding_bytes_sent; + stats.retransmitted_bytes_sent = call_stats.retransmitted_bytes_sent; + stats.packets_sent = call_stats.packetsSent; + stats.total_packet_send_delay = call_stats.total_packet_send_delay; + stats.retransmitted_packets_sent = call_stats.retransmitted_packets_sent; + // RTT isn't known until a RTCP report is received. Until then, VoiceEngine + // returns 0 to indicate an error value. + if (call_stats.rttMs > 0) { + stats.rtt_ms = call_stats.rttMs; + } + if (config_.send_codec_spec) { + const auto& spec = *config_.send_codec_spec; + stats.codec_name = spec.format.name; + stats.codec_payload_type = spec.payload_type; + + // Get data from the last remote RTCP report. + for (const ReportBlockData& block : + channel_send_->GetRemoteRTCPReportBlocks()) { + // Lookup report for send ssrc only. + if (block.source_ssrc() == stats.local_ssrc) { + stats.packets_lost = block.cumulative_lost(); + stats.fraction_lost = block.fraction_lost(); + if (spec.format.clockrate_hz > 0) { + stats.jitter_ms = block.jitter(spec.format.clockrate_hz).ms(); + } + break; + } + } + } + + { + MutexLock lock(&audio_level_lock_); + stats.audio_level = audio_level_.LevelFullRange(); + stats.total_input_energy = audio_level_.TotalEnergy(); + stats.total_input_duration = audio_level_.TotalDuration(); + } + + stats.ana_statistics = channel_send_->GetANAStatistics(); + + AudioProcessing* ap = audio_state_->audio_processing(); + if (ap) { + stats.apm_statistics = ap->GetStatistics(has_remote_tracks); + } + + stats.report_block_datas = std::move(call_stats.report_block_datas); + + stats.nacks_received = call_stats.nacks_received; + + return stats; +} + +void AudioSendStream::DeliverRtcp(const uint8_t* packet, size_t length) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + channel_send_->ReceivedRTCPPacket(packet, length); + + { + // Poll if overhead has changed, which it can do if ack triggers us to stop + // sending mid/rid. + MutexLock lock(&overhead_per_packet_lock_); + UpdateOverheadForEncoder(); + } + UpdateCachedTargetAudioBitrateConstraints(); +} + +uint32_t AudioSendStream::OnBitrateUpdated(BitrateAllocationUpdate update) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + + // Pick a target bitrate between the constraints. Overrules the allocator if + // it 1) allocated a bitrate of zero to disable the stream or 2) allocated a + // higher than max to allow for e.g. extra FEC. + RTC_DCHECK(cached_constraints_.has_value()); + update.target_bitrate.Clamp(cached_constraints_->min, + cached_constraints_->max); + update.stable_target_bitrate.Clamp(cached_constraints_->min, + cached_constraints_->max); + + channel_send_->OnBitrateAllocation(update); + + // The amount of audio protection is not exposed by the encoder, hence + // always returning 0. + return 0; +} + +void AudioSendStream::SetTransportOverhead( + int transport_overhead_per_packet_bytes) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + { + MutexLock lock(&overhead_per_packet_lock_); + transport_overhead_per_packet_bytes_ = transport_overhead_per_packet_bytes; + UpdateOverheadForEncoder(); + } + UpdateCachedTargetAudioBitrateConstraints(); +} + +void AudioSendStream::UpdateOverheadForEncoder() { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + size_t overhead_per_packet_bytes = GetPerPacketOverheadBytes(); + if (overhead_per_packet_ == overhead_per_packet_bytes) { + return; + } + overhead_per_packet_ = overhead_per_packet_bytes; + + channel_send_->CallEncoder([&](AudioEncoder* encoder) { + encoder->OnReceivedOverhead(overhead_per_packet_bytes); + }); + if (total_packet_overhead_bytes_ != overhead_per_packet_bytes) { + total_packet_overhead_bytes_ = overhead_per_packet_bytes; + if (registered_with_allocator_) { + ConfigureBitrateObserver(); + } + } +} + +size_t AudioSendStream::TestOnlyGetPerPacketOverheadBytes() const { + MutexLock lock(&overhead_per_packet_lock_); + return GetPerPacketOverheadBytes(); +} + +size_t AudioSendStream::GetPerPacketOverheadBytes() const { + return transport_overhead_per_packet_bytes_ + + rtp_rtcp_module_->ExpectedPerPacketOverhead(); +} + +RtpState AudioSendStream::GetRtpState() const { + return rtp_rtcp_module_->GetRtpState(); +} + +const voe::ChannelSendInterface* AudioSendStream::GetChannel() const { + return channel_send_.get(); +} + +internal::AudioState* AudioSendStream::audio_state() { + internal::AudioState* audio_state = + static_cast<internal::AudioState*>(audio_state_.get()); + RTC_DCHECK(audio_state); + return audio_state; +} + +const internal::AudioState* AudioSendStream::audio_state() const { + internal::AudioState* audio_state = + static_cast<internal::AudioState*>(audio_state_.get()); + RTC_DCHECK(audio_state); + return audio_state; +} + +void AudioSendStream::StoreEncoderProperties(int sample_rate_hz, + size_t num_channels) { + encoder_sample_rate_hz_ = sample_rate_hz; + encoder_num_channels_ = num_channels; + if (sending_) { + // Update AudioState's information about the stream. + audio_state()->AddSendingStream(this, sample_rate_hz, num_channels); + } +} + +// Apply current codec settings to a single voe::Channel used for sending. +bool AudioSendStream::SetupSendCodec(const Config& new_config) { + RTC_DCHECK(new_config.send_codec_spec); + const auto& spec = *new_config.send_codec_spec; + + RTC_DCHECK(new_config.encoder_factory); + std::unique_ptr<AudioEncoder> encoder = + new_config.encoder_factory->MakeAudioEncoder( + spec.payload_type, spec.format, new_config.codec_pair_id); + + if (!encoder) { + RTC_DLOG(LS_ERROR) << "Unable to create encoder for " + << rtc::ToString(spec.format); + return false; + } + + // If a bitrate has been specified for the codec, use it over the + // codec's default. + if (spec.target_bitrate_bps) { + encoder->OnReceivedTargetAudioBitrate(*spec.target_bitrate_bps); + } + + // Enable ANA if configured (currently only used by Opus). + if (new_config.audio_network_adaptor_config) { + if (encoder->EnableAudioNetworkAdaptor( + *new_config.audio_network_adaptor_config, event_log_)) { + RTC_LOG(LS_INFO) << "Audio network adaptor enabled on SSRC " + << new_config.rtp.ssrc; + } else { + RTC_LOG(LS_INFO) << "Failed to enable Audio network adaptor on SSRC " + << new_config.rtp.ssrc; + } + } + + // Wrap the encoder in an AudioEncoderCNG, if VAD is enabled. + if (spec.cng_payload_type) { + AudioEncoderCngConfig cng_config; + cng_config.num_channels = encoder->NumChannels(); + cng_config.payload_type = *spec.cng_payload_type; + cng_config.speech_encoder = std::move(encoder); + cng_config.vad_mode = Vad::kVadNormal; + encoder = CreateComfortNoiseEncoder(std::move(cng_config)); + + RegisterCngPayloadType(*spec.cng_payload_type, + new_config.send_codec_spec->format.clockrate_hz); + } + + // Wrap the encoder in a RED encoder, if RED is enabled. + if (spec.red_payload_type) { + AudioEncoderCopyRed::Config red_config; + red_config.payload_type = *spec.red_payload_type; + red_config.speech_encoder = std::move(encoder); + encoder = std::make_unique<AudioEncoderCopyRed>(std::move(red_config), + field_trials_); + } + + // Set currently known overhead (used in ANA, opus only). + // If overhead changes later, it will be updated in UpdateOverheadForEncoder. + { + MutexLock lock(&overhead_per_packet_lock_); + size_t overhead = GetPerPacketOverheadBytes(); + if (overhead > 0) { + encoder->OnReceivedOverhead(overhead); + } + } + + StoreEncoderProperties(encoder->SampleRateHz(), encoder->NumChannels()); + channel_send_->SetEncoder(new_config.send_codec_spec->payload_type, + std::move(encoder)); + + return true; +} + +bool AudioSendStream::ReconfigureSendCodec(const Config& new_config) { + const auto& old_config = config_; + + if (!new_config.send_codec_spec) { + // We cannot de-configure a send codec. So we will do nothing. + // By design, the send codec should have not been configured. + RTC_DCHECK(!old_config.send_codec_spec); + return true; + } + + if (new_config.send_codec_spec == old_config.send_codec_spec && + new_config.audio_network_adaptor_config == + old_config.audio_network_adaptor_config) { + return true; + } + + // If we have no encoder, or the format or payload type's changed, create a + // new encoder. + if (!old_config.send_codec_spec || + new_config.send_codec_spec->format != + old_config.send_codec_spec->format || + new_config.send_codec_spec->payload_type != + old_config.send_codec_spec->payload_type || + new_config.send_codec_spec->red_payload_type != + old_config.send_codec_spec->red_payload_type) { + return SetupSendCodec(new_config); + } + + const absl::optional<int>& new_target_bitrate_bps = + new_config.send_codec_spec->target_bitrate_bps; + // If a bitrate has been specified for the codec, use it over the + // codec's default. + if (new_target_bitrate_bps && + new_target_bitrate_bps != + old_config.send_codec_spec->target_bitrate_bps) { + channel_send_->CallEncoder([&](AudioEncoder* encoder) { + encoder->OnReceivedTargetAudioBitrate(*new_target_bitrate_bps); + }); + } + + ReconfigureANA(new_config); + ReconfigureCNG(new_config); + + return true; +} + +void AudioSendStream::ReconfigureANA(const Config& new_config) { + if (new_config.audio_network_adaptor_config == + config_.audio_network_adaptor_config) { + return; + } + if (new_config.audio_network_adaptor_config) { + // This lock needs to be acquired before CallEncoder, since it aquires + // another lock and we need to maintain the same order at all call sites to + // avoid deadlock. + MutexLock lock(&overhead_per_packet_lock_); + size_t overhead = GetPerPacketOverheadBytes(); + channel_send_->CallEncoder([&](AudioEncoder* encoder) { + if (encoder->EnableAudioNetworkAdaptor( + *new_config.audio_network_adaptor_config, event_log_)) { + RTC_LOG(LS_INFO) << "Audio network adaptor enabled on SSRC " + << new_config.rtp.ssrc; + if (overhead > 0) { + encoder->OnReceivedOverhead(overhead); + } + } else { + RTC_LOG(LS_INFO) << "Failed to enable Audio network adaptor on SSRC " + << new_config.rtp.ssrc; + } + }); + } else { + channel_send_->CallEncoder( + [&](AudioEncoder* encoder) { encoder->DisableAudioNetworkAdaptor(); }); + RTC_LOG(LS_INFO) << "Audio network adaptor disabled on SSRC " + << new_config.rtp.ssrc; + } +} + +void AudioSendStream::ReconfigureCNG(const Config& new_config) { + if (new_config.send_codec_spec->cng_payload_type == + config_.send_codec_spec->cng_payload_type) { + return; + } + + // Register the CNG payload type if it's been added, don't do anything if CNG + // is removed. Payload types must not be redefined. + if (new_config.send_codec_spec->cng_payload_type) { + RegisterCngPayloadType(*new_config.send_codec_spec->cng_payload_type, + new_config.send_codec_spec->format.clockrate_hz); + } + + // Wrap or unwrap the encoder in an AudioEncoderCNG. + channel_send_->ModifyEncoder([&](std::unique_ptr<AudioEncoder>* encoder_ptr) { + std::unique_ptr<AudioEncoder> old_encoder(std::move(*encoder_ptr)); + auto sub_encoders = old_encoder->ReclaimContainedEncoders(); + if (!sub_encoders.empty()) { + // Replace enc with its sub encoder. We need to put the sub + // encoder in a temporary first, since otherwise the old value + // of enc would be destroyed before the new value got assigned, + // which would be bad since the new value is a part of the old + // value. + auto tmp = std::move(sub_encoders[0]); + old_encoder = std::move(tmp); + } + if (new_config.send_codec_spec->cng_payload_type) { + AudioEncoderCngConfig config; + config.speech_encoder = std::move(old_encoder); + config.num_channels = config.speech_encoder->NumChannels(); + config.payload_type = *new_config.send_codec_spec->cng_payload_type; + config.vad_mode = Vad::kVadNormal; + *encoder_ptr = CreateComfortNoiseEncoder(std::move(config)); + } else { + *encoder_ptr = std::move(old_encoder); + } + }); +} + +void AudioSendStream::ReconfigureBitrateObserver( + const webrtc::AudioSendStream::Config& new_config) { + // Since the Config's default is for both of these to be -1, this test will + // allow us to configure the bitrate observer if the new config has bitrate + // limits set, but would only have us call RemoveBitrateObserver if we were + // previously configured with bitrate limits. + if (config_.min_bitrate_bps == new_config.min_bitrate_bps && + config_.max_bitrate_bps == new_config.max_bitrate_bps && + config_.bitrate_priority == new_config.bitrate_priority && + TransportSeqNumId(config_) == TransportSeqNumId(new_config) && + config_.audio_network_adaptor_config == + new_config.audio_network_adaptor_config) { + return; + } + + if (!new_config.has_dscp && new_config.min_bitrate_bps != -1 && + new_config.max_bitrate_bps != -1 && TransportSeqNumId(new_config) != 0) { + rtp_transport_->AccountForAudioPacketsInPacedSender(true); + rtp_transport_->IncludeOverheadInPacedSender(); + // We may get a callback immediately as the observer is registered, so + // make sure the bitrate limits in config_ are up-to-date. + config_.min_bitrate_bps = new_config.min_bitrate_bps; + config_.max_bitrate_bps = new_config.max_bitrate_bps; + + config_.bitrate_priority = new_config.bitrate_priority; + ConfigureBitrateObserver(); + rtp_rtcp_module_->SetAsPartOfAllocation(true); + } else { + rtp_transport_->AccountForAudioPacketsInPacedSender(false); + RemoveBitrateObserver(); + rtp_rtcp_module_->SetAsPartOfAllocation(false); + } +} + +void AudioSendStream::ConfigureBitrateObserver() { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + // This either updates the current observer or adds a new observer. + // TODO(srte): Add overhead compensation here. + auto constraints = GetMinMaxBitrateConstraints(); + RTC_DCHECK(constraints.has_value()); + + DataRate priority_bitrate = allocation_settings_.priority_bitrate; + if (use_legacy_overhead_calculation_) { + // OverheadPerPacket = Ipv4(20B) + UDP(8B) + SRTP(10B) + RTP(12) + constexpr int kOverheadPerPacket = 20 + 8 + 10 + 12; + const TimeDelta kMinPacketDuration = TimeDelta::Millis(20); + DataRate max_overhead = + DataSize::Bytes(kOverheadPerPacket) / kMinPacketDuration; + priority_bitrate += max_overhead; + } else { + RTC_DCHECK(frame_length_range_); + const DataSize overhead_per_packet = + DataSize::Bytes(total_packet_overhead_bytes_); + DataRate min_overhead = overhead_per_packet / frame_length_range_->second; + priority_bitrate += min_overhead; + } + + if (allocation_settings_.priority_bitrate_raw) { + priority_bitrate = *allocation_settings_.priority_bitrate_raw; + } + + bitrate_allocator_->AddObserver( + this, + MediaStreamAllocationConfig{ + constraints->min.bps<uint32_t>(), constraints->max.bps<uint32_t>(), 0, + priority_bitrate.bps(), true, + allocation_settings_.bitrate_priority.value_or( + config_.bitrate_priority)}); + + registered_with_allocator_ = true; +} + +void AudioSendStream::RemoveBitrateObserver() { + registered_with_allocator_ = false; + bitrate_allocator_->RemoveObserver(this); +} + +absl::optional<AudioSendStream::TargetAudioBitrateConstraints> +AudioSendStream::GetMinMaxBitrateConstraints() const { + if (config_.min_bitrate_bps < 0 || config_.max_bitrate_bps < 0) { + RTC_LOG(LS_WARNING) << "Config is invalid: min_bitrate_bps=" + << config_.min_bitrate_bps + << "; max_bitrate_bps=" << config_.max_bitrate_bps + << "; both expected greater or equal to 0"; + return absl::nullopt; + } + TargetAudioBitrateConstraints constraints{ + DataRate::BitsPerSec(config_.min_bitrate_bps), + DataRate::BitsPerSec(config_.max_bitrate_bps)}; + + // If bitrates were explicitly overriden via field trial, use those values. + if (allocation_settings_.min_bitrate) + constraints.min = *allocation_settings_.min_bitrate; + if (allocation_settings_.max_bitrate) + constraints.max = *allocation_settings_.max_bitrate; + + RTC_DCHECK_GE(constraints.min, DataRate::Zero()); + RTC_DCHECK_GE(constraints.max, DataRate::Zero()); + if (constraints.max < constraints.min) { + RTC_LOG(LS_WARNING) << "TargetAudioBitrateConstraints::max is less than " + << "TargetAudioBitrateConstraints::min"; + return absl::nullopt; + } + if (use_legacy_overhead_calculation_) { + // OverheadPerPacket = Ipv4(20B) + UDP(8B) + SRTP(10B) + RTP(12) + const DataSize kOverheadPerPacket = DataSize::Bytes(20 + 8 + 10 + 12); + const TimeDelta kMaxFrameLength = + TimeDelta::Millis(60); // Based on Opus spec + const DataRate kMinOverhead = kOverheadPerPacket / kMaxFrameLength; + constraints.min += kMinOverhead; + constraints.max += kMinOverhead; + } else { + if (!frame_length_range_.has_value()) { + RTC_LOG(LS_WARNING) << "frame_length_range_ is not set"; + return absl::nullopt; + } + const DataSize kOverheadPerPacket = + DataSize::Bytes(total_packet_overhead_bytes_); + constraints.min += kOverheadPerPacket / frame_length_range_->second; + constraints.max += kOverheadPerPacket / frame_length_range_->first; + } + return constraints; +} + +void AudioSendStream::RegisterCngPayloadType(int payload_type, + int clockrate_hz) { + channel_send_->RegisterCngPayloadType(payload_type, clockrate_hz); +} + +void AudioSendStream::UpdateCachedTargetAudioBitrateConstraints() { + absl::optional<AudioSendStream::TargetAudioBitrateConstraints> + new_constraints = GetMinMaxBitrateConstraints(); + if (!new_constraints.has_value()) { + return; + } + cached_constraints_ = new_constraints; +} + +} // namespace internal +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/audio_send_stream.h b/third_party/libwebrtc/audio/audio_send_stream.h new file mode 100644 index 0000000000..62ccd524cb --- /dev/null +++ b/third_party/libwebrtc/audio/audio_send_stream.h @@ -0,0 +1,238 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_AUDIO_SEND_STREAM_H_ +#define AUDIO_AUDIO_SEND_STREAM_H_ + +#include <memory> +#include <utility> +#include <vector> + +#include "absl/functional/any_invocable.h" +#include "api/field_trials_view.h" +#include "api/sequence_checker.h" +#include "api/task_queue/task_queue_base.h" +#include "audio/audio_level.h" +#include "audio/channel_send.h" +#include "call/audio_send_stream.h" +#include "call/audio_state.h" +#include "call/bitrate_allocator.h" +#include "modules/rtp_rtcp/source/rtp_rtcp_interface.h" +#include "rtc_base/experiments/struct_parameters_parser.h" +#include "rtc_base/race_checker.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/task_queue.h" + +namespace webrtc { +class RtcEventLog; +class RtcpRttStats; +class RtpTransportControllerSendInterface; + +struct AudioAllocationConfig { + static constexpr char kKey[] = "WebRTC-Audio-Allocation"; + // Field Trial configured bitrates to use as overrides over default/user + // configured bitrate range when audio bitrate allocation is enabled. + absl::optional<DataRate> min_bitrate; + absl::optional<DataRate> max_bitrate; + DataRate priority_bitrate = DataRate::Zero(); + // By default the priority_bitrate is compensated for packet overhead. + // Use this flag to configure a raw value instead. + absl::optional<DataRate> priority_bitrate_raw; + absl::optional<double> bitrate_priority; + + std::unique_ptr<StructParametersParser> Parser(); + explicit AudioAllocationConfig(const FieldTrialsView& field_trials); +}; +namespace internal { +class AudioState; + +class AudioSendStream final : public webrtc::AudioSendStream, + public webrtc::BitrateAllocatorObserver { + public: + AudioSendStream(Clock* clock, + const webrtc::AudioSendStream::Config& config, + const rtc::scoped_refptr<webrtc::AudioState>& audio_state, + TaskQueueFactory* task_queue_factory, + RtpTransportControllerSendInterface* rtp_transport, + BitrateAllocatorInterface* bitrate_allocator, + RtcEventLog* event_log, + RtcpRttStats* rtcp_rtt_stats, + const absl::optional<RtpState>& suspended_rtp_state, + const FieldTrialsView& field_trials); + // For unit tests, which need to supply a mock ChannelSend. + AudioSendStream(Clock* clock, + const webrtc::AudioSendStream::Config& config, + const rtc::scoped_refptr<webrtc::AudioState>& audio_state, + TaskQueueFactory* task_queue_factory, + RtpTransportControllerSendInterface* rtp_transport, + BitrateAllocatorInterface* bitrate_allocator, + RtcEventLog* event_log, + const absl::optional<RtpState>& suspended_rtp_state, + std::unique_ptr<voe::ChannelSendInterface> channel_send, + const FieldTrialsView& field_trials); + + AudioSendStream() = delete; + AudioSendStream(const AudioSendStream&) = delete; + AudioSendStream& operator=(const AudioSendStream&) = delete; + + ~AudioSendStream() override; + + // webrtc::AudioSendStream implementation. + const webrtc::AudioSendStream::Config& GetConfig() const override; + void Reconfigure(const webrtc::AudioSendStream::Config& config, + SetParametersCallback callback) override; + void Start() override; + void Stop() override; + void SendAudioData(std::unique_ptr<AudioFrame> audio_frame) override; + bool SendTelephoneEvent(int payload_type, + int payload_frequency, + int event, + int duration_ms) override; + void SetMuted(bool muted) override; + webrtc::AudioSendStream::Stats GetStats() const override; + webrtc::AudioSendStream::Stats GetStats( + bool has_remote_tracks) const override; + + void DeliverRtcp(const uint8_t* packet, size_t length); + + // Implements BitrateAllocatorObserver. + uint32_t OnBitrateUpdated(BitrateAllocationUpdate update) override; + + void SetTransportOverhead(int transport_overhead_per_packet_bytes); + + RtpState GetRtpState() const; + const voe::ChannelSendInterface* GetChannel() const; + + // Returns combined per-packet overhead. + size_t TestOnlyGetPerPacketOverheadBytes() const + RTC_LOCKS_EXCLUDED(overhead_per_packet_lock_); + + private: + class TimedTransport; + // Constraints including overhead. + struct TargetAudioBitrateConstraints { + DataRate min; + DataRate max; + }; + + internal::AudioState* audio_state(); + const internal::AudioState* audio_state() const; + + void StoreEncoderProperties(int sample_rate_hz, size_t num_channels) + RTC_RUN_ON(worker_thread_checker_); + + void ConfigureStream(const Config& new_config, + bool first_time, + SetParametersCallback callback) + RTC_RUN_ON(worker_thread_checker_); + bool SetupSendCodec(const Config& new_config) + RTC_RUN_ON(worker_thread_checker_); + bool ReconfigureSendCodec(const Config& new_config) + RTC_RUN_ON(worker_thread_checker_); + void ReconfigureANA(const Config& new_config) + RTC_RUN_ON(worker_thread_checker_); + void ReconfigureCNG(const Config& new_config) + RTC_RUN_ON(worker_thread_checker_); + void ReconfigureBitrateObserver(const Config& new_config) + RTC_RUN_ON(worker_thread_checker_); + + void ConfigureBitrateObserver() RTC_RUN_ON(worker_thread_checker_); + void RemoveBitrateObserver() RTC_RUN_ON(worker_thread_checker_); + + // Returns bitrate constraints, maybe including overhead when enabled by + // field trial. + absl::optional<TargetAudioBitrateConstraints> GetMinMaxBitrateConstraints() + const RTC_RUN_ON(worker_thread_checker_); + + // Sets per-packet overhead on encoded (for ANA) based on current known values + // of transport and packetization overheads. + void UpdateOverheadForEncoder() + RTC_EXCLUSIVE_LOCKS_REQUIRED(overhead_per_packet_lock_); + + // Returns combined per-packet overhead. + size_t GetPerPacketOverheadBytes() const + RTC_EXCLUSIVE_LOCKS_REQUIRED(overhead_per_packet_lock_); + + void RegisterCngPayloadType(int payload_type, int clockrate_hz) + RTC_RUN_ON(worker_thread_checker_); + + void UpdateCachedTargetAudioBitrateConstraints() + RTC_RUN_ON(worker_thread_checker_); + + Clock* clock_; + const FieldTrialsView& field_trials_; + + SequenceChecker worker_thread_checker_; + rtc::RaceChecker audio_capture_race_checker_; + + const bool allocate_audio_without_feedback_; + const bool force_no_audio_feedback_ = allocate_audio_without_feedback_; + const bool enable_audio_alr_probing_; + const AudioAllocationConfig allocation_settings_; + + webrtc::AudioSendStream::Config config_ + RTC_GUARDED_BY(worker_thread_checker_); + rtc::scoped_refptr<webrtc::AudioState> audio_state_; + const std::unique_ptr<voe::ChannelSendInterface> channel_send_; + RtcEventLog* const event_log_; + const bool use_legacy_overhead_calculation_; + + int encoder_sample_rate_hz_ RTC_GUARDED_BY(worker_thread_checker_) = 0; + size_t encoder_num_channels_ RTC_GUARDED_BY(worker_thread_checker_) = 0; + bool sending_ RTC_GUARDED_BY(worker_thread_checker_) = false; + mutable Mutex audio_level_lock_; + // Keeps track of audio level, total audio energy and total samples duration. + // https://w3c.github.io/webrtc-stats/#dom-rtcaudiohandlerstats-totalaudioenergy + webrtc::voe::AudioLevel audio_level_ RTC_GUARDED_BY(audio_level_lock_); + + BitrateAllocatorInterface* const bitrate_allocator_ + RTC_GUARDED_BY(worker_thread_checker_); + absl::optional<AudioSendStream::TargetAudioBitrateConstraints> + cached_constraints_ RTC_GUARDED_BY(worker_thread_checker_) = + absl::nullopt; + RtpTransportControllerSendInterface* const rtp_transport_; + + RtpRtcpInterface* const rtp_rtcp_module_; + absl::optional<RtpState> const suspended_rtp_state_; + + // RFC 5285: Each distinct extension MUST have a unique ID. The value 0 is + // reserved for padding and MUST NOT be used as a local identifier. + // So it should be safe to use 0 here to indicate "not configured". + struct ExtensionIds { + int audio_level = 0; + int abs_send_time = 0; + int abs_capture_time = 0; + int transport_sequence_number = 0; + int mid = 0; + int rid = 0; + int repaired_rid = 0; + }; + static ExtensionIds FindExtensionIds( + const std::vector<RtpExtension>& extensions); + static int TransportSeqNumId(const Config& config); + + mutable Mutex overhead_per_packet_lock_; + size_t overhead_per_packet_ RTC_GUARDED_BY(overhead_per_packet_lock_) = 0; + + // Current transport overhead (ICE, TURN, etc.) + size_t transport_overhead_per_packet_bytes_ + RTC_GUARDED_BY(overhead_per_packet_lock_) = 0; + + bool registered_with_allocator_ RTC_GUARDED_BY(worker_thread_checker_) = + false; + size_t total_packet_overhead_bytes_ RTC_GUARDED_BY(worker_thread_checker_) = + 0; + absl::optional<std::pair<TimeDelta, TimeDelta>> frame_length_range_ + RTC_GUARDED_BY(worker_thread_checker_); +}; +} // namespace internal +} // namespace webrtc + +#endif // AUDIO_AUDIO_SEND_STREAM_H_ diff --git a/third_party/libwebrtc/audio/audio_send_stream_tests.cc b/third_party/libwebrtc/audio/audio_send_stream_tests.cc new file mode 100644 index 0000000000..ff95ed70e1 --- /dev/null +++ b/third_party/libwebrtc/audio/audio_send_stream_tests.cc @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <string> +#include <utility> +#include <vector> + +#include "modules/rtp_rtcp/include/rtp_header_extension_map.h" +#include "modules/rtp_rtcp/source/rtp_header_extensions.h" +#include "modules/rtp_rtcp/source/rtp_packet.h" +#include "test/call_test.h" +#include "test/field_trial.h" +#include "test/gtest.h" +#include "test/rtcp_packet_parser.h" +#include "test/video_test_constants.h" + +namespace webrtc { +namespace test { +namespace { + +enum : int { // The first valid value is 1. + kAudioLevelExtensionId = 1, + kTransportSequenceNumberExtensionId, +}; + +class AudioSendTest : public SendTest { + public: + AudioSendTest() : SendTest(VideoTestConstants::kDefaultTimeout) {} + + size_t GetNumVideoStreams() const override { return 0; } + size_t GetNumAudioStreams() const override { return 1; } + size_t GetNumFlexfecStreams() const override { return 0; } +}; +} // namespace + +using AudioSendStreamCallTest = CallTest; + +TEST_F(AudioSendStreamCallTest, SupportsCName) { + static std::string kCName = "PjqatC14dGfbVwGPUOA9IH7RlsFDbWl4AhXEiDsBizo="; + class CNameObserver : public AudioSendTest { + public: + CNameObserver() = default; + + private: + Action OnSendRtcp(rtc::ArrayView<const uint8_t> packet) override { + RtcpPacketParser parser; + EXPECT_TRUE(parser.Parse(packet)); + if (parser.sdes()->num_packets() > 0) { + EXPECT_EQ(1u, parser.sdes()->chunks().size()); + EXPECT_EQ(kCName, parser.sdes()->chunks()[0].cname); + + observation_complete_.Set(); + } + + return SEND_PACKET; + } + + void ModifyAudioConfigs(AudioSendStream::Config* send_config, + std::vector<AudioReceiveStreamInterface::Config>* + receive_configs) override { + send_config->rtp.c_name = kCName; + } + + void PerformTest() override { + EXPECT_TRUE(Wait()) << "Timed out while waiting for RTCP with CNAME."; + } + } test; + + RunBaseTest(&test); +} + +TEST_F(AudioSendStreamCallTest, NoExtensionsByDefault) { + class NoExtensionsObserver : public AudioSendTest { + public: + NoExtensionsObserver() = default; + + private: + Action OnSendRtp(rtc::ArrayView<const uint8_t> packet) override { + RtpPacket rtp_packet; + EXPECT_TRUE(rtp_packet.Parse(packet)); // rtp packet is valid. + EXPECT_EQ(packet[0] & 0b0001'0000, 0); // extension bit not set. + + observation_complete_.Set(); + return SEND_PACKET; + } + + void ModifyAudioConfigs(AudioSendStream::Config* send_config, + std::vector<AudioReceiveStreamInterface::Config>* + receive_configs) override { + send_config->rtp.extensions.clear(); + } + + void PerformTest() override { + EXPECT_TRUE(Wait()) << "Timed out while waiting for a single RTP packet."; + } + } test; + + RunBaseTest(&test); +} + +TEST_F(AudioSendStreamCallTest, SupportsAudioLevel) { + class AudioLevelObserver : public AudioSendTest { + public: + AudioLevelObserver() : AudioSendTest() { + extensions_.Register<AudioLevel>(kAudioLevelExtensionId); + } + + Action OnSendRtp(rtc::ArrayView<const uint8_t> packet) override { + RtpPacket rtp_packet(&extensions_); + EXPECT_TRUE(rtp_packet.Parse(packet)); + + uint8_t audio_level = 0; + bool voice = false; + EXPECT_TRUE(rtp_packet.GetExtension<AudioLevel>(&voice, &audio_level)); + if (audio_level != 0) { + // Wait for at least one packet with a non-zero level. + observation_complete_.Set(); + } else { + RTC_LOG(LS_WARNING) << "Got a packet with zero audioLevel - waiting" + " for another packet..."; + } + + return SEND_PACKET; + } + + void ModifyAudioConfigs(AudioSendStream::Config* send_config, + std::vector<AudioReceiveStreamInterface::Config>* + receive_configs) override { + send_config->rtp.extensions.clear(); + send_config->rtp.extensions.push_back( + RtpExtension(RtpExtension::kAudioLevelUri, kAudioLevelExtensionId)); + } + + void PerformTest() override { + EXPECT_TRUE(Wait()) << "Timed out while waiting for single RTP packet."; + } + + private: + RtpHeaderExtensionMap extensions_; + } test; + + RunBaseTest(&test); +} + +class TransportWideSequenceNumberObserver : public AudioSendTest { + public: + explicit TransportWideSequenceNumberObserver(bool expect_sequence_number) + : AudioSendTest(), expect_sequence_number_(expect_sequence_number) { + extensions_.Register<TransportSequenceNumber>( + kTransportSequenceNumberExtensionId); + } + + private: + Action OnSendRtp(rtc::ArrayView<const uint8_t> packet) override { + RtpPacket rtp_packet(&extensions_); + EXPECT_TRUE(rtp_packet.Parse(packet)); + + EXPECT_EQ(rtp_packet.HasExtension<TransportSequenceNumber>(), + expect_sequence_number_); + EXPECT_FALSE(rtp_packet.HasExtension<TransmissionOffset>()); + EXPECT_FALSE(rtp_packet.HasExtension<AbsoluteSendTime>()); + + observation_complete_.Set(); + + return SEND_PACKET; + } + + void ModifyAudioConfigs(AudioSendStream::Config* send_config, + std::vector<AudioReceiveStreamInterface::Config>* + receive_configs) override { + send_config->rtp.extensions.clear(); + send_config->rtp.extensions.push_back( + RtpExtension(RtpExtension::kTransportSequenceNumberUri, + kTransportSequenceNumberExtensionId)); + } + + void PerformTest() override { + EXPECT_TRUE(Wait()) << "Timed out while waiting for a single RTP packet."; + } + const bool expect_sequence_number_; + RtpHeaderExtensionMap extensions_; +}; + +TEST_F(AudioSendStreamCallTest, SendsTransportWideSequenceNumbersInFieldTrial) { + TransportWideSequenceNumberObserver test(/*expect_sequence_number=*/true); + RunBaseTest(&test); +} + +TEST_F(AudioSendStreamCallTest, SendDtmf) { + static const uint8_t kDtmfPayloadType = 120; + static const int kDtmfPayloadFrequency = 8000; + static const int kDtmfEventFirst = 12; + static const int kDtmfEventLast = 31; + static const int kDtmfDuration = 50; + class DtmfObserver : public AudioSendTest { + public: + DtmfObserver() = default; + + private: + Action OnSendRtp(rtc::ArrayView<const uint8_t> packet) override { + RtpPacket rtp_packet; + EXPECT_TRUE(rtp_packet.Parse(packet)); + + if (rtp_packet.PayloadType() == kDtmfPayloadType) { + EXPECT_EQ(rtp_packet.headers_size(), 12u); + EXPECT_EQ(rtp_packet.size(), 16u); + const int event = rtp_packet.payload()[0]; + if (event != expected_dtmf_event_) { + ++expected_dtmf_event_; + EXPECT_EQ(event, expected_dtmf_event_); + if (expected_dtmf_event_ == kDtmfEventLast) { + observation_complete_.Set(); + } + } + } + + return SEND_PACKET; + } + + void OnAudioStreamsCreated(AudioSendStream* send_stream, + const std::vector<AudioReceiveStreamInterface*>& + receive_streams) override { + // Need to start stream here, else DTMF events are dropped. + send_stream->Start(); + for (int event = kDtmfEventFirst; event <= kDtmfEventLast; ++event) { + send_stream->SendTelephoneEvent(kDtmfPayloadType, kDtmfPayloadFrequency, + event, kDtmfDuration); + } + } + + void PerformTest() override { + EXPECT_TRUE(Wait()) << "Timed out while waiting for DTMF stream."; + } + + int expected_dtmf_event_ = kDtmfEventFirst; + } test; + + RunBaseTest(&test); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/audio_send_stream_unittest.cc b/third_party/libwebrtc/audio/audio_send_stream_unittest.cc new file mode 100644 index 0000000000..d842afdfe5 --- /dev/null +++ b/third_party/libwebrtc/audio/audio_send_stream_unittest.cc @@ -0,0 +1,927 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/audio_send_stream.h" + +#include <memory> +#include <string> +#include <thread> +#include <utility> +#include <vector> + +#include "api/task_queue/default_task_queue_factory.h" +#include "api/test/mock_frame_encryptor.h" +#include "audio/audio_state.h" +#include "audio/conversion.h" +#include "audio/mock_voe_channel_proxy.h" +#include "call/test/mock_rtp_transport_controller_send.h" +#include "logging/rtc_event_log/mock/mock_rtc_event_log.h" +#include "modules/audio_device/include/mock_audio_device.h" +#include "modules/audio_mixer/audio_mixer_impl.h" +#include "modules/audio_mixer/sine_wave_generator.h" +#include "modules/audio_processing/include/audio_processing_statistics.h" +#include "modules/audio_processing/include/mock_audio_processing.h" +#include "modules/rtp_rtcp/mocks/mock_network_link_rtcp_observer.h" +#include "modules/rtp_rtcp/mocks/mock_rtp_rtcp.h" +#include "system_wrappers/include/clock.h" +#include "test/gtest.h" +#include "test/mock_audio_encoder.h" +#include "test/mock_audio_encoder_factory.h" +#include "test/scoped_key_value_config.h" +#include "test/time_controller/real_time_controller.h" + +namespace webrtc { +namespace test { +namespace { + +using ::testing::_; +using ::testing::AnyNumber; +using ::testing::Eq; +using ::testing::Field; +using ::testing::InSequence; +using ::testing::Invoke; +using ::testing::Ne; +using ::testing::NiceMock; +using ::testing::Return; +using ::testing::StrEq; + +static const float kTolerance = 0.0001f; + +const uint32_t kSsrc = 1234; +const char* kCName = "foo_name"; +const int kAudioLevelId = 2; +const int kTransportSequenceNumberId = 4; +const int32_t kEchoDelayMedian = 254; +const int32_t kEchoDelayStdDev = -3; +const double kDivergentFilterFraction = 0.2f; +const double kEchoReturnLoss = -65; +const double kEchoReturnLossEnhancement = 101; +const double kResidualEchoLikelihood = -1.0f; +const double kResidualEchoLikelihoodMax = 23.0f; +const CallSendStatistics kCallStats = {112, 12, 13456, 17890}; +constexpr int kFractionLost = 123; +constexpr int kCumulativeLost = 567; +constexpr uint32_t kInterarrivalJitter = 132; +const int kTelephoneEventPayloadType = 123; +const int kTelephoneEventPayloadFrequency = 65432; +const int kTelephoneEventCode = 45; +const int kTelephoneEventDuration = 6789; +constexpr int kIsacPayloadType = 103; +const SdpAudioFormat kIsacFormat = {"isac", 16000, 1}; +const SdpAudioFormat kOpusFormat = {"opus", 48000, 2}; +const SdpAudioFormat kG722Format = {"g722", 8000, 1}; +const AudioCodecSpec kCodecSpecs[] = { + {kIsacFormat, {16000, 1, 32000, 10000, 32000}}, + {kOpusFormat, {48000, 1, 32000, 6000, 510000}}, + {kG722Format, {16000, 1, 64000}}}; + +// TODO(dklee): This mirrors calculation in audio_send_stream.cc, which +// should be made more precise in the future. This can be changed when that +// logic is more accurate. +const DataSize kOverheadPerPacket = DataSize::Bytes(20 + 8 + 10 + 12); +const TimeDelta kMinFrameLength = TimeDelta::Millis(20); +const TimeDelta kMaxFrameLength = TimeDelta::Millis(120); +const DataRate kMinOverheadRate = kOverheadPerPacket / kMaxFrameLength; +const DataRate kMaxOverheadRate = kOverheadPerPacket / kMinFrameLength; + +class MockLimitObserver : public BitrateAllocator::LimitObserver { + public: + MOCK_METHOD(void, + OnAllocationLimitsChanged, + (BitrateAllocationLimits), + (override)); +}; + +std::unique_ptr<MockAudioEncoder> SetupAudioEncoderMock( + int payload_type, + const SdpAudioFormat& format) { + for (const auto& spec : kCodecSpecs) { + if (format == spec.format) { + std::unique_ptr<MockAudioEncoder> encoder( + new ::testing::NiceMock<MockAudioEncoder>()); + ON_CALL(*encoder.get(), SampleRateHz()) + .WillByDefault(Return(spec.info.sample_rate_hz)); + ON_CALL(*encoder.get(), NumChannels()) + .WillByDefault(Return(spec.info.num_channels)); + ON_CALL(*encoder.get(), RtpTimestampRateHz()) + .WillByDefault(Return(spec.format.clockrate_hz)); + ON_CALL(*encoder.get(), GetFrameLengthRange()) + .WillByDefault(Return(absl::optional<std::pair<TimeDelta, TimeDelta>>{ + {TimeDelta::Millis(20), TimeDelta::Millis(120)}})); + return encoder; + } + } + return nullptr; +} + +rtc::scoped_refptr<MockAudioEncoderFactory> SetupEncoderFactoryMock() { + rtc::scoped_refptr<MockAudioEncoderFactory> factory = + rtc::make_ref_counted<MockAudioEncoderFactory>(); + ON_CALL(*factory.get(), GetSupportedEncoders()) + .WillByDefault(Return(std::vector<AudioCodecSpec>( + std::begin(kCodecSpecs), std::end(kCodecSpecs)))); + ON_CALL(*factory.get(), QueryAudioEncoder(_)) + .WillByDefault(Invoke( + [](const SdpAudioFormat& format) -> absl::optional<AudioCodecInfo> { + for (const auto& spec : kCodecSpecs) { + if (format == spec.format) { + return spec.info; + } + } + return absl::nullopt; + })); + ON_CALL(*factory.get(), MakeAudioEncoderMock(_, _, _, _)) + .WillByDefault(Invoke([](int payload_type, const SdpAudioFormat& format, + absl::optional<AudioCodecPairId> codec_pair_id, + std::unique_ptr<AudioEncoder>* return_value) { + *return_value = SetupAudioEncoderMock(payload_type, format); + })); + return factory; +} + +struct ConfigHelper { + ConfigHelper(bool audio_bwe_enabled, + bool expect_set_encoder_call, + bool use_null_audio_processing) + : stream_config_(/*send_transport=*/nullptr), + audio_processing_( + use_null_audio_processing + ? nullptr + : rtc::make_ref_counted<NiceMock<MockAudioProcessing>>()), + bitrate_allocator_(&limit_observer_), + audio_encoder_(nullptr) { + using ::testing::Invoke; + + AudioState::Config config; + config.audio_mixer = AudioMixerImpl::Create(); + config.audio_processing = audio_processing_; + config.audio_device_module = rtc::make_ref_counted<MockAudioDeviceModule>(); + audio_state_ = AudioState::Create(config); + + SetupDefaultChannelSend(audio_bwe_enabled); + SetupMockForSetupSendCodec(expect_set_encoder_call); + SetupMockForCallEncoder(); + + // Use ISAC as default codec so as to prevent unnecessary `channel_proxy_` + // calls from the default ctor behavior. + stream_config_.send_codec_spec = + AudioSendStream::Config::SendCodecSpec(kIsacPayloadType, kIsacFormat); + stream_config_.rtp.ssrc = kSsrc; + stream_config_.rtp.c_name = kCName; + stream_config_.rtp.extensions.push_back( + RtpExtension(RtpExtension::kAudioLevelUri, kAudioLevelId)); + if (audio_bwe_enabled) { + AddBweToConfig(&stream_config_); + } + stream_config_.encoder_factory = SetupEncoderFactoryMock(); + stream_config_.min_bitrate_bps = 10000; + stream_config_.max_bitrate_bps = 65000; + } + + std::unique_ptr<internal::AudioSendStream> CreateAudioSendStream() { + return std::unique_ptr<internal::AudioSendStream>( + new internal::AudioSendStream( + time_controller_.GetClock(), stream_config_, audio_state_, + time_controller_.GetTaskQueueFactory(), &rtp_transport_, + &bitrate_allocator_, &event_log_, absl::nullopt, + std::unique_ptr<voe::ChannelSendInterface>(channel_send_), + field_trials)); + } + + AudioSendStream::Config& config() { return stream_config_; } + MockAudioEncoderFactory& mock_encoder_factory() { + return *static_cast<MockAudioEncoderFactory*>( + stream_config_.encoder_factory.get()); + } + MockRtpRtcpInterface* rtp_rtcp() { return &rtp_rtcp_; } + MockChannelSend* channel_send() { return channel_send_; } + RtpTransportControllerSendInterface* transport() { return &rtp_transport_; } + + static void AddBweToConfig(AudioSendStream::Config* config) { + config->rtp.extensions.push_back(RtpExtension( + RtpExtension::kTransportSequenceNumberUri, kTransportSequenceNumberId)); + config->send_codec_spec->transport_cc_enabled = true; + } + + void SetupDefaultChannelSend(bool audio_bwe_enabled) { + EXPECT_TRUE(channel_send_ == nullptr); + channel_send_ = new ::testing::StrictMock<MockChannelSend>(); + EXPECT_CALL(*channel_send_, GetRtpRtcp()).WillRepeatedly(Invoke([this]() { + return &this->rtp_rtcp_; + })); + EXPECT_CALL(rtp_rtcp_, SSRC).WillRepeatedly(Return(kSsrc)); + EXPECT_CALL(*channel_send_, SetRTCP_CNAME(StrEq(kCName))).Times(1); + EXPECT_CALL(*channel_send_, SetFrameEncryptor(_)).Times(1); + EXPECT_CALL(*channel_send_, SetEncoderToPacketizerFrameTransformer(_)) + .Times(1); + EXPECT_CALL(rtp_rtcp_, SetExtmapAllowMixed(false)).Times(1); + EXPECT_CALL(*channel_send_, + SetSendAudioLevelIndicationStatus(true, kAudioLevelId)) + .Times(1); + EXPECT_CALL(rtp_transport_, GetRtcpObserver) + .WillRepeatedly(Return(&rtcp_observer_)); + if (audio_bwe_enabled) { + EXPECT_CALL(rtp_rtcp_, + RegisterRtpHeaderExtension(TransportSequenceNumber::Uri(), + kTransportSequenceNumberId)) + .Times(1); + } + EXPECT_CALL(*channel_send_, + RegisterSenderCongestionControlObjects(&rtp_transport_)) + .Times(1); + EXPECT_CALL(*channel_send_, ResetSenderCongestionControlObjects()).Times(1); + } + + void SetupMockForSetupSendCodec(bool expect_set_encoder_call) { + if (expect_set_encoder_call) { + EXPECT_CALL(*channel_send_, SetEncoder) + .WillOnce( + [this](int payload_type, std::unique_ptr<AudioEncoder> encoder) { + this->audio_encoder_ = std::move(encoder); + return true; + }); + } + } + + void SetupMockForCallEncoder() { + // Let ModifyEncoder to invoke mock audio encoder. + EXPECT_CALL(*channel_send_, CallEncoder(_)) + .WillRepeatedly( + [this](rtc::FunctionView<void(AudioEncoder*)> modifier) { + if (this->audio_encoder_) + modifier(this->audio_encoder_.get()); + }); + } + + void SetupMockForSendTelephoneEvent() { + EXPECT_TRUE(channel_send_); + EXPECT_CALL(*channel_send_, SetSendTelephoneEventPayloadType( + kTelephoneEventPayloadType, + kTelephoneEventPayloadFrequency)); + EXPECT_CALL( + *channel_send_, + SendTelephoneEventOutband(kTelephoneEventCode, kTelephoneEventDuration)) + .WillOnce(Return(true)); + } + + void SetupMockForGetStats(bool use_null_audio_processing) { + using ::testing::DoAll; + using ::testing::SetArgPointee; + using ::testing::SetArgReferee; + + std::vector<ReportBlockData> report_blocks; + ReportBlockData block; + block.set_source_ssrc(780); + block.set_fraction_lost_raw(kFractionLost); + block.set_cumulative_lost(kCumulativeLost); + block.set_jitter(kInterarrivalJitter); + report_blocks.push_back(block); // Has wrong SSRC. + block.set_source_ssrc(kSsrc); + report_blocks.push_back(block); // Correct block. + block.set_fraction_lost_raw(0); + report_blocks.push_back(block); // Duplicate SSRC, bad fraction_lost. + + EXPECT_TRUE(channel_send_); + EXPECT_CALL(*channel_send_, GetRTCPStatistics()) + .WillRepeatedly(Return(kCallStats)); + EXPECT_CALL(*channel_send_, GetRemoteRTCPReportBlocks()) + .WillRepeatedly(Return(report_blocks)); + EXPECT_CALL(*channel_send_, GetANAStatistics()) + .WillRepeatedly(Return(ANAStats())); + EXPECT_CALL(*channel_send_, GetTargetBitrate()).WillRepeatedly(Return(0)); + + audio_processing_stats_.echo_return_loss = kEchoReturnLoss; + audio_processing_stats_.echo_return_loss_enhancement = + kEchoReturnLossEnhancement; + audio_processing_stats_.delay_median_ms = kEchoDelayMedian; + audio_processing_stats_.delay_standard_deviation_ms = kEchoDelayStdDev; + audio_processing_stats_.divergent_filter_fraction = + kDivergentFilterFraction; + audio_processing_stats_.residual_echo_likelihood = kResidualEchoLikelihood; + audio_processing_stats_.residual_echo_likelihood_recent_max = + kResidualEchoLikelihoodMax; + if (!use_null_audio_processing) { + ASSERT_TRUE(audio_processing_); + EXPECT_CALL(*audio_processing_, GetStatistics(true)) + .WillRepeatedly(Return(audio_processing_stats_)); + } + } + + test::ScopedKeyValueConfig field_trials; + + private: + RealTimeController time_controller_; + rtc::scoped_refptr<AudioState> audio_state_; + AudioSendStream::Config stream_config_; + ::testing::StrictMock<MockChannelSend>* channel_send_ = nullptr; + rtc::scoped_refptr<MockAudioProcessing> audio_processing_; + AudioProcessingStats audio_processing_stats_; + ::testing::StrictMock<MockNetworkLinkRtcpObserver> rtcp_observer_; + ::testing::NiceMock<MockRtcEventLog> event_log_; + ::testing::NiceMock<MockRtpTransportControllerSend> rtp_transport_; + ::testing::NiceMock<MockRtpRtcpInterface> rtp_rtcp_; + ::testing::NiceMock<MockLimitObserver> limit_observer_; + BitrateAllocator bitrate_allocator_; + std::unique_ptr<AudioEncoder> audio_encoder_; +}; + +// The audio level ranges linearly [0,32767]. +std::unique_ptr<AudioFrame> CreateAudioFrame1kHzSineWave(int16_t audio_level, + int duration_ms, + int sample_rate_hz, + size_t num_channels) { + size_t samples_per_channel = sample_rate_hz / (1000 / duration_ms); + std::vector<int16_t> audio_data(samples_per_channel * num_channels, 0); + std::unique_ptr<AudioFrame> audio_frame = std::make_unique<AudioFrame>(); + audio_frame->UpdateFrame(0 /* RTP timestamp */, &audio_data[0], + samples_per_channel, sample_rate_hz, + AudioFrame::SpeechType::kNormalSpeech, + AudioFrame::VADActivity::kVadUnknown, num_channels); + SineWaveGenerator wave_generator(1000.0, audio_level); + wave_generator.GenerateNextFrame(audio_frame.get()); + return audio_frame; +} + +} // namespace + +TEST(AudioSendStreamTest, ConfigToString) { + AudioSendStream::Config config(/*send_transport=*/nullptr); + config.rtp.ssrc = kSsrc; + config.rtp.c_name = kCName; + config.min_bitrate_bps = 12000; + config.max_bitrate_bps = 34000; + config.has_dscp = true; + config.send_codec_spec = + AudioSendStream::Config::SendCodecSpec(kIsacPayloadType, kIsacFormat); + config.send_codec_spec->nack_enabled = true; + config.send_codec_spec->transport_cc_enabled = false; + config.send_codec_spec->cng_payload_type = 42; + config.send_codec_spec->red_payload_type = 43; + config.encoder_factory = MockAudioEncoderFactory::CreateUnusedFactory(); + config.rtp.extmap_allow_mixed = true; + config.rtp.extensions.push_back( + RtpExtension(RtpExtension::kAudioLevelUri, kAudioLevelId)); + config.rtcp_report_interval_ms = 2500; + EXPECT_EQ( + "{rtp: {ssrc: 1234, extmap-allow-mixed: true, extensions: [{uri: " + "urn:ietf:params:rtp-hdrext:ssrc-audio-level, id: 2}], " + "c_name: foo_name}, rtcp_report_interval_ms: 2500, " + "send_transport: null, " + "min_bitrate_bps: 12000, max_bitrate_bps: 34000, has " + "audio_network_adaptor_config: false, has_dscp: true, " + "send_codec_spec: {nack_enabled: true, transport_cc_enabled: false, " + "enable_non_sender_rtt: false, cng_payload_type: 42, " + "red_payload_type: 43, payload_type: 103, " + "format: {name: isac, clockrate_hz: 16000, num_channels: 1, " + "parameters: {}}}}", + config.ToString()); +} + +TEST(AudioSendStreamTest, ConstructDestruct) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(false, true, use_null_audio_processing); + auto send_stream = helper.CreateAudioSendStream(); + } +} + +TEST(AudioSendStreamTest, SendTelephoneEvent) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(false, true, use_null_audio_processing); + auto send_stream = helper.CreateAudioSendStream(); + helper.SetupMockForSendTelephoneEvent(); + EXPECT_TRUE(send_stream->SendTelephoneEvent( + kTelephoneEventPayloadType, kTelephoneEventPayloadFrequency, + kTelephoneEventCode, kTelephoneEventDuration)); + } +} + +TEST(AudioSendStreamTest, SetMuted) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(false, true, use_null_audio_processing); + auto send_stream = helper.CreateAudioSendStream(); + EXPECT_CALL(*helper.channel_send(), SetInputMute(true)); + send_stream->SetMuted(true); + } +} + +TEST(AudioSendStreamTest, AudioBweCorrectObjectsOnChannelProxy) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(true, true, use_null_audio_processing); + auto send_stream = helper.CreateAudioSendStream(); + } +} + +TEST(AudioSendStreamTest, NoAudioBweCorrectObjectsOnChannelProxy) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(false, true, use_null_audio_processing); + auto send_stream = helper.CreateAudioSendStream(); + } +} + +TEST(AudioSendStreamTest, GetStats) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(false, true, use_null_audio_processing); + auto send_stream = helper.CreateAudioSendStream(); + helper.SetupMockForGetStats(use_null_audio_processing); + AudioSendStream::Stats stats = send_stream->GetStats(true); + EXPECT_EQ(kSsrc, stats.local_ssrc); + EXPECT_EQ(kCallStats.payload_bytes_sent, stats.payload_bytes_sent); + EXPECT_EQ(kCallStats.header_and_padding_bytes_sent, + stats.header_and_padding_bytes_sent); + EXPECT_EQ(kCallStats.packetsSent, stats.packets_sent); + EXPECT_EQ(stats.packets_lost, kCumulativeLost); + EXPECT_FLOAT_EQ(stats.fraction_lost, Q8ToFloat(kFractionLost)); + EXPECT_EQ(kIsacFormat.name, stats.codec_name); + EXPECT_EQ(stats.jitter_ms, + static_cast<int32_t>(kInterarrivalJitter / + (kIsacFormat.clockrate_hz / 1000))); + EXPECT_EQ(kCallStats.rttMs, stats.rtt_ms); + EXPECT_EQ(0, stats.audio_level); + EXPECT_EQ(0, stats.total_input_energy); + EXPECT_EQ(0, stats.total_input_duration); + + if (!use_null_audio_processing) { + EXPECT_EQ(kEchoDelayMedian, stats.apm_statistics.delay_median_ms); + EXPECT_EQ(kEchoDelayStdDev, + stats.apm_statistics.delay_standard_deviation_ms); + EXPECT_EQ(kEchoReturnLoss, stats.apm_statistics.echo_return_loss); + EXPECT_EQ(kEchoReturnLossEnhancement, + stats.apm_statistics.echo_return_loss_enhancement); + EXPECT_EQ(kDivergentFilterFraction, + stats.apm_statistics.divergent_filter_fraction); + EXPECT_EQ(kResidualEchoLikelihood, + stats.apm_statistics.residual_echo_likelihood); + EXPECT_EQ(kResidualEchoLikelihoodMax, + stats.apm_statistics.residual_echo_likelihood_recent_max); + } + } +} + +TEST(AudioSendStreamTest, GetStatsAudioLevel) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(false, true, use_null_audio_processing); + auto send_stream = helper.CreateAudioSendStream(); + helper.SetupMockForGetStats(use_null_audio_processing); + EXPECT_CALL(*helper.channel_send(), ProcessAndEncodeAudio) + .Times(AnyNumber()); + + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumChannels = 1; + + constexpr int16_t kSilentAudioLevel = 0; + constexpr int16_t kMaxAudioLevel = 32767; // Audio level is [0,32767]. + constexpr int kAudioFrameDurationMs = 10; + + // Process 10 audio frames (100 ms) of silence. After this, on the next + // (11-th) frame, the audio level will be updated with the maximum audio + // level of the first 11 frames. See AudioLevel. + for (size_t i = 0; i < 10; ++i) { + send_stream->SendAudioData( + CreateAudioFrame1kHzSineWave(kSilentAudioLevel, kAudioFrameDurationMs, + kSampleRateHz, kNumChannels)); + } + AudioSendStream::Stats stats = send_stream->GetStats(); + EXPECT_EQ(kSilentAudioLevel, stats.audio_level); + EXPECT_NEAR(0.0f, stats.total_input_energy, kTolerance); + EXPECT_NEAR(0.1f, stats.total_input_duration, + kTolerance); // 100 ms = 0.1 s + + // Process 10 audio frames (100 ms) of maximum audio level. + // Note that AudioLevel updates the audio level every 11th frame, processing + // 10 frames above was needed to see a non-zero audio level here. + for (size_t i = 0; i < 10; ++i) { + send_stream->SendAudioData(CreateAudioFrame1kHzSineWave( + kMaxAudioLevel, kAudioFrameDurationMs, kSampleRateHz, kNumChannels)); + } + stats = send_stream->GetStats(); + EXPECT_EQ(kMaxAudioLevel, stats.audio_level); + // Energy increases by energy*duration, where energy is audio level in + // [0,1]. + EXPECT_NEAR(0.1f, stats.total_input_energy, kTolerance); // 0.1 s of max + EXPECT_NEAR(0.2f, stats.total_input_duration, + kTolerance); // 200 ms = 0.2 s + } +} + +TEST(AudioSendStreamTest, SendCodecAppliesAudioNetworkAdaptor) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(true, true, use_null_audio_processing); + helper.config().send_codec_spec = + AudioSendStream::Config::SendCodecSpec(0, kOpusFormat); + const std::string kAnaConfigString = "abcde"; + const std::string kAnaReconfigString = "12345"; + + helper.config().audio_network_adaptor_config = kAnaConfigString; + + EXPECT_CALL(helper.mock_encoder_factory(), MakeAudioEncoderMock(_, _, _, _)) + .WillOnce(Invoke([&kAnaConfigString, &kAnaReconfigString]( + int payload_type, const SdpAudioFormat& format, + absl::optional<AudioCodecPairId> codec_pair_id, + std::unique_ptr<AudioEncoder>* return_value) { + auto mock_encoder = SetupAudioEncoderMock(payload_type, format); + EXPECT_CALL(*mock_encoder, + EnableAudioNetworkAdaptor(StrEq(kAnaConfigString), _)) + .WillOnce(Return(true)); + EXPECT_CALL(*mock_encoder, + EnableAudioNetworkAdaptor(StrEq(kAnaReconfigString), _)) + .WillOnce(Return(true)); + *return_value = std::move(mock_encoder); + })); + + auto send_stream = helper.CreateAudioSendStream(); + + auto stream_config = helper.config(); + stream_config.audio_network_adaptor_config = kAnaReconfigString; + + send_stream->Reconfigure(stream_config, nullptr); + } +} + +TEST(AudioSendStreamTest, AudioNetworkAdaptorReceivesOverhead) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(true, true, use_null_audio_processing); + helper.config().send_codec_spec = + AudioSendStream::Config::SendCodecSpec(0, kOpusFormat); + const std::string kAnaConfigString = "abcde"; + + EXPECT_CALL(helper.mock_encoder_factory(), MakeAudioEncoderMock(_, _, _, _)) + .WillOnce(Invoke( + [&kAnaConfigString](int payload_type, const SdpAudioFormat& format, + absl::optional<AudioCodecPairId> codec_pair_id, + std::unique_ptr<AudioEncoder>* return_value) { + auto mock_encoder = SetupAudioEncoderMock(payload_type, format); + InSequence s; + EXPECT_CALL( + *mock_encoder, + OnReceivedOverhead(Eq(kOverheadPerPacket.bytes<size_t>()))) + .Times(2); + EXPECT_CALL(*mock_encoder, + EnableAudioNetworkAdaptor(StrEq(kAnaConfigString), _)) + .WillOnce(Return(true)); + // Note: Overhead is received AFTER ANA has been enabled. + EXPECT_CALL( + *mock_encoder, + OnReceivedOverhead(Eq(kOverheadPerPacket.bytes<size_t>()))) + .WillOnce(Return()); + *return_value = std::move(mock_encoder); + })); + EXPECT_CALL(*helper.rtp_rtcp(), ExpectedPerPacketOverhead) + .WillRepeatedly(Return(kOverheadPerPacket.bytes<size_t>())); + + auto send_stream = helper.CreateAudioSendStream(); + + auto stream_config = helper.config(); + stream_config.audio_network_adaptor_config = kAnaConfigString; + + send_stream->Reconfigure(stream_config, nullptr); + } +} + +// VAD is applied when codec is mono and the CNG frequency matches the codec +// clock rate. +TEST(AudioSendStreamTest, SendCodecCanApplyVad) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(false, false, use_null_audio_processing); + helper.config().send_codec_spec = + AudioSendStream::Config::SendCodecSpec(9, kG722Format); + helper.config().send_codec_spec->cng_payload_type = 105; + std::unique_ptr<AudioEncoder> stolen_encoder; + EXPECT_CALL(*helper.channel_send(), SetEncoder) + .WillOnce([&stolen_encoder](int payload_type, + std::unique_ptr<AudioEncoder> encoder) { + stolen_encoder = std::move(encoder); + return true; + }); + EXPECT_CALL(*helper.channel_send(), RegisterCngPayloadType(105, 8000)); + + auto send_stream = helper.CreateAudioSendStream(); + + // We cannot truly determine if the encoder created is an AudioEncoderCng. + // It is the only reasonable implementation that will return something from + // ReclaimContainedEncoders, though. + ASSERT_TRUE(stolen_encoder); + EXPECT_FALSE(stolen_encoder->ReclaimContainedEncoders().empty()); + } +} + +TEST(AudioSendStreamTest, DoesNotPassHigherBitrateThanMaxBitrate) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(false, true, use_null_audio_processing); + auto send_stream = helper.CreateAudioSendStream(); + EXPECT_CALL( + *helper.channel_send(), + OnBitrateAllocation( + Field(&BitrateAllocationUpdate::target_bitrate, + Eq(DataRate::BitsPerSec(helper.config().max_bitrate_bps))))); + BitrateAllocationUpdate update; + update.target_bitrate = + DataRate::BitsPerSec(helper.config().max_bitrate_bps + 5000); + update.packet_loss_ratio = 0; + update.round_trip_time = TimeDelta::Millis(50); + update.bwe_period = TimeDelta::Millis(6000); + send_stream->OnBitrateUpdated(update); + } +} + +TEST(AudioSendStreamTest, SSBweTargetInRangeRespected) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(true, true, use_null_audio_processing); + auto send_stream = helper.CreateAudioSendStream(); + EXPECT_CALL( + *helper.channel_send(), + OnBitrateAllocation(Field( + &BitrateAllocationUpdate::target_bitrate, + Eq(DataRate::BitsPerSec(helper.config().max_bitrate_bps - 5000))))); + BitrateAllocationUpdate update; + update.target_bitrate = + DataRate::BitsPerSec(helper.config().max_bitrate_bps - 5000); + send_stream->OnBitrateUpdated(update); + } +} + +TEST(AudioSendStreamTest, SSBweFieldTrialMinRespected) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(true, true, use_null_audio_processing); + ScopedKeyValueConfig field_trials( + helper.field_trials, "WebRTC-Audio-Allocation/min:6kbps,max:64kbps/"); + auto send_stream = helper.CreateAudioSendStream(); + EXPECT_CALL( + *helper.channel_send(), + OnBitrateAllocation(Field(&BitrateAllocationUpdate::target_bitrate, + Eq(DataRate::KilobitsPerSec(6))))); + BitrateAllocationUpdate update; + update.target_bitrate = DataRate::KilobitsPerSec(1); + send_stream->OnBitrateUpdated(update); + } +} + +TEST(AudioSendStreamTest, SSBweFieldTrialMaxRespected) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(true, true, use_null_audio_processing); + ScopedKeyValueConfig field_trials( + helper.field_trials, "WebRTC-Audio-Allocation/min:6kbps,max:64kbps/"); + auto send_stream = helper.CreateAudioSendStream(); + EXPECT_CALL( + *helper.channel_send(), + OnBitrateAllocation(Field(&BitrateAllocationUpdate::target_bitrate, + Eq(DataRate::KilobitsPerSec(64))))); + BitrateAllocationUpdate update; + update.target_bitrate = DataRate::KilobitsPerSec(128); + send_stream->OnBitrateUpdated(update); + } +} + +TEST(AudioSendStreamTest, SSBweWithOverhead) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(true, true, use_null_audio_processing); + ScopedKeyValueConfig field_trials(helper.field_trials, + "WebRTC-Audio-LegacyOverhead/Disabled/"); + EXPECT_CALL(*helper.rtp_rtcp(), ExpectedPerPacketOverhead) + .WillRepeatedly(Return(kOverheadPerPacket.bytes<size_t>())); + auto send_stream = helper.CreateAudioSendStream(); + const DataRate bitrate = + DataRate::BitsPerSec(helper.config().max_bitrate_bps) + + kMaxOverheadRate; + EXPECT_CALL(*helper.channel_send(), + OnBitrateAllocation(Field( + &BitrateAllocationUpdate::target_bitrate, Eq(bitrate)))); + BitrateAllocationUpdate update; + update.target_bitrate = bitrate; + send_stream->OnBitrateUpdated(update); + } +} + +TEST(AudioSendStreamTest, SSBweWithOverheadMinRespected) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(true, true, use_null_audio_processing); + ScopedKeyValueConfig field_trials( + helper.field_trials, + "WebRTC-Audio-LegacyOverhead/Disabled/" + "WebRTC-Audio-Allocation/min:6kbps,max:64kbps/"); + EXPECT_CALL(*helper.rtp_rtcp(), ExpectedPerPacketOverhead) + .WillRepeatedly(Return(kOverheadPerPacket.bytes<size_t>())); + auto send_stream = helper.CreateAudioSendStream(); + const DataRate bitrate = DataRate::KilobitsPerSec(6) + kMinOverheadRate; + EXPECT_CALL(*helper.channel_send(), + OnBitrateAllocation(Field( + &BitrateAllocationUpdate::target_bitrate, Eq(bitrate)))); + BitrateAllocationUpdate update; + update.target_bitrate = DataRate::KilobitsPerSec(1); + send_stream->OnBitrateUpdated(update); + } +} + +TEST(AudioSendStreamTest, SSBweWithOverheadMaxRespected) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(true, true, use_null_audio_processing); + ScopedKeyValueConfig field_trials( + helper.field_trials, + "WebRTC-Audio-LegacyOverhead/Disabled/" + "WebRTC-Audio-Allocation/min:6kbps,max:64kbps/"); + EXPECT_CALL(*helper.rtp_rtcp(), ExpectedPerPacketOverhead) + .WillRepeatedly(Return(kOverheadPerPacket.bytes<size_t>())); + auto send_stream = helper.CreateAudioSendStream(); + const DataRate bitrate = DataRate::KilobitsPerSec(64) + kMaxOverheadRate; + EXPECT_CALL(*helper.channel_send(), + OnBitrateAllocation(Field( + &BitrateAllocationUpdate::target_bitrate, Eq(bitrate)))); + BitrateAllocationUpdate update; + update.target_bitrate = DataRate::KilobitsPerSec(128); + send_stream->OnBitrateUpdated(update); + } +} + +TEST(AudioSendStreamTest, ProbingIntervalOnBitrateUpdated) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(false, true, use_null_audio_processing); + auto send_stream = helper.CreateAudioSendStream(); + + EXPECT_CALL(*helper.channel_send(), + OnBitrateAllocation(Field(&BitrateAllocationUpdate::bwe_period, + Eq(TimeDelta::Millis(5000))))); + BitrateAllocationUpdate update; + update.target_bitrate = + DataRate::BitsPerSec(helper.config().max_bitrate_bps + 5000); + update.packet_loss_ratio = 0; + update.round_trip_time = TimeDelta::Millis(50); + update.bwe_period = TimeDelta::Millis(5000); + send_stream->OnBitrateUpdated(update); + } +} + +// Test that AudioSendStream doesn't recreate the encoder unnecessarily. +TEST(AudioSendStreamTest, DontRecreateEncoder) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(false, false, use_null_audio_processing); + // WillOnce is (currently) the default used by ConfigHelper if asked to set + // an expectation for SetEncoder. Since this behavior is essential for this + // test to be correct, it's instead set-up manually here. Otherwise a simple + // change to ConfigHelper (say to WillRepeatedly) would silently make this + // test useless. + EXPECT_CALL(*helper.channel_send(), SetEncoder).WillOnce(Return()); + + EXPECT_CALL(*helper.channel_send(), RegisterCngPayloadType(105, 8000)); + + helper.config().send_codec_spec = + AudioSendStream::Config::SendCodecSpec(9, kG722Format); + helper.config().send_codec_spec->cng_payload_type = 105; + auto send_stream = helper.CreateAudioSendStream(); + send_stream->Reconfigure(helper.config(), nullptr); + } +} + +TEST(AudioSendStreamTest, ReconfigureTransportCcResetsFirst) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(false, true, use_null_audio_processing); + auto send_stream = helper.CreateAudioSendStream(); + auto new_config = helper.config(); + ConfigHelper::AddBweToConfig(&new_config); + + EXPECT_CALL(*helper.rtp_rtcp(), + RegisterRtpHeaderExtension(TransportSequenceNumber::Uri(), + kTransportSequenceNumberId)) + .Times(1); + { + ::testing::InSequence seq; + EXPECT_CALL(*helper.channel_send(), ResetSenderCongestionControlObjects()) + .Times(1); + EXPECT_CALL(*helper.channel_send(), + RegisterSenderCongestionControlObjects(helper.transport())) + .Times(1); + } + + send_stream->Reconfigure(new_config, nullptr); + } +} + +TEST(AudioSendStreamTest, OnTransportOverheadChanged) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(false, true, use_null_audio_processing); + auto send_stream = helper.CreateAudioSendStream(); + auto new_config = helper.config(); + + // CallEncoder will be called on overhead change. + EXPECT_CALL(*helper.channel_send(), CallEncoder); + + const size_t transport_overhead_per_packet_bytes = 333; + send_stream->SetTransportOverhead(transport_overhead_per_packet_bytes); + + EXPECT_EQ(transport_overhead_per_packet_bytes, + send_stream->TestOnlyGetPerPacketOverheadBytes()); + } +} + +TEST(AudioSendStreamTest, DoesntCallEncoderWhenOverheadUnchanged) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(false, true, use_null_audio_processing); + auto send_stream = helper.CreateAudioSendStream(); + auto new_config = helper.config(); + + // CallEncoder will be called on overhead change. + EXPECT_CALL(*helper.channel_send(), CallEncoder); + const size_t transport_overhead_per_packet_bytes = 333; + send_stream->SetTransportOverhead(transport_overhead_per_packet_bytes); + + // Set the same overhead again, CallEncoder should not be called again. + EXPECT_CALL(*helper.channel_send(), CallEncoder).Times(0); + send_stream->SetTransportOverhead(transport_overhead_per_packet_bytes); + + // New overhead, call CallEncoder again + EXPECT_CALL(*helper.channel_send(), CallEncoder); + send_stream->SetTransportOverhead(transport_overhead_per_packet_bytes + 1); + } +} + +TEST(AudioSendStreamTest, AudioOverheadChanged) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(false, true, use_null_audio_processing); + const size_t audio_overhead_per_packet_bytes = 555; + EXPECT_CALL(*helper.rtp_rtcp(), ExpectedPerPacketOverhead) + .WillRepeatedly(Return(audio_overhead_per_packet_bytes)); + auto send_stream = helper.CreateAudioSendStream(); + auto new_config = helper.config(); + + BitrateAllocationUpdate update; + update.target_bitrate = + DataRate::BitsPerSec(helper.config().max_bitrate_bps) + + kMaxOverheadRate; + EXPECT_CALL(*helper.channel_send(), OnBitrateAllocation); + send_stream->OnBitrateUpdated(update); + + EXPECT_EQ(audio_overhead_per_packet_bytes, + send_stream->TestOnlyGetPerPacketOverheadBytes()); + + EXPECT_CALL(*helper.rtp_rtcp(), ExpectedPerPacketOverhead) + .WillRepeatedly(Return(audio_overhead_per_packet_bytes + 20)); + EXPECT_CALL(*helper.channel_send(), OnBitrateAllocation); + send_stream->OnBitrateUpdated(update); + + EXPECT_EQ(audio_overhead_per_packet_bytes + 20, + send_stream->TestOnlyGetPerPacketOverheadBytes()); + } +} + +TEST(AudioSendStreamTest, OnAudioAndTransportOverheadChanged) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(false, true, use_null_audio_processing); + const size_t audio_overhead_per_packet_bytes = 555; + EXPECT_CALL(*helper.rtp_rtcp(), ExpectedPerPacketOverhead) + .WillRepeatedly(Return(audio_overhead_per_packet_bytes)); + auto send_stream = helper.CreateAudioSendStream(); + auto new_config = helper.config(); + + const size_t transport_overhead_per_packet_bytes = 333; + send_stream->SetTransportOverhead(transport_overhead_per_packet_bytes); + + BitrateAllocationUpdate update; + update.target_bitrate = + DataRate::BitsPerSec(helper.config().max_bitrate_bps) + + kMaxOverheadRate; + EXPECT_CALL(*helper.channel_send(), OnBitrateAllocation); + send_stream->OnBitrateUpdated(update); + + EXPECT_EQ( + transport_overhead_per_packet_bytes + audio_overhead_per_packet_bytes, + send_stream->TestOnlyGetPerPacketOverheadBytes()); + } +} + +// Validates that reconfiguring the AudioSendStream with a Frame encryptor +// correctly reconfigures on the object without crashing. +TEST(AudioSendStreamTest, ReconfigureWithFrameEncryptor) { + for (bool use_null_audio_processing : {false, true}) { + ConfigHelper helper(false, true, use_null_audio_processing); + auto send_stream = helper.CreateAudioSendStream(); + auto new_config = helper.config(); + + rtc::scoped_refptr<FrameEncryptorInterface> mock_frame_encryptor_0( + rtc::make_ref_counted<MockFrameEncryptor>()); + new_config.frame_encryptor = mock_frame_encryptor_0; + EXPECT_CALL(*helper.channel_send(), SetFrameEncryptor(Ne(nullptr))) + .Times(1); + send_stream->Reconfigure(new_config, nullptr); + + // Not updating the frame encryptor shouldn't force it to reconfigure. + EXPECT_CALL(*helper.channel_send(), SetFrameEncryptor(_)).Times(0); + send_stream->Reconfigure(new_config, nullptr); + + // Updating frame encryptor to a new object should force a call to the + // proxy. + rtc::scoped_refptr<FrameEncryptorInterface> mock_frame_encryptor_1( + rtc::make_ref_counted<MockFrameEncryptor>()); + new_config.frame_encryptor = mock_frame_encryptor_1; + new_config.crypto_options.sframe.require_frame_encryption = true; + EXPECT_CALL(*helper.channel_send(), SetFrameEncryptor(Ne(nullptr))) + .Times(1); + send_stream->Reconfigure(new_config, nullptr); + } +} +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/audio_state.cc b/third_party/libwebrtc/audio/audio_state.cc new file mode 100644 index 0000000000..6f20e7b128 --- /dev/null +++ b/third_party/libwebrtc/audio/audio_state.cc @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/audio_state.h" + +#include <algorithm> +#include <memory> +#include <utility> +#include <vector> + +#include "api/sequence_checker.h" +#include "api/task_queue/task_queue_base.h" +#include "api/units/time_delta.h" +#include "audio/audio_receive_stream.h" +#include "audio/audio_send_stream.h" +#include "modules/audio_device/include/audio_device.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { +namespace internal { + +AudioState::AudioState(const AudioState::Config& config) + : config_(config), + audio_transport_(config_.audio_mixer.get(), + config_.audio_processing.get(), + config_.async_audio_processing_factory.get()) { + RTC_DCHECK(config_.audio_mixer); + RTC_DCHECK(config_.audio_device_module); +} + +AudioState::~AudioState() { + RTC_DCHECK_RUN_ON(&thread_checker_); + RTC_DCHECK(receiving_streams_.empty()); + RTC_DCHECK(sending_streams_.empty()); + RTC_DCHECK(!null_audio_poller_.Running()); +} + +AudioProcessing* AudioState::audio_processing() { + return config_.audio_processing.get(); +} + +AudioTransport* AudioState::audio_transport() { + return &audio_transport_; +} + +void AudioState::AddReceivingStream( + webrtc::AudioReceiveStreamInterface* stream) { + RTC_DCHECK_RUN_ON(&thread_checker_); + RTC_DCHECK_EQ(0, receiving_streams_.count(stream)); + receiving_streams_.insert(stream); + if (!config_.audio_mixer->AddSource( + static_cast<AudioReceiveStreamImpl*>(stream))) { + RTC_DLOG(LS_ERROR) << "Failed to add source to mixer."; + } + + // Make sure playback is initialized; start playing if enabled. + UpdateNullAudioPollerState(); + auto* adm = config_.audio_device_module.get(); + if (!adm->Playing()) { + if (adm->InitPlayout() == 0) { + if (playout_enabled_) { + adm->StartPlayout(); + } + } else { + RTC_DLOG_F(LS_ERROR) << "Failed to initialize playout."; + } + } +} + +void AudioState::RemoveReceivingStream( + webrtc::AudioReceiveStreamInterface* stream) { + RTC_DCHECK_RUN_ON(&thread_checker_); + auto count = receiving_streams_.erase(stream); + RTC_DCHECK_EQ(1, count); + config_.audio_mixer->RemoveSource( + static_cast<AudioReceiveStreamImpl*>(stream)); + UpdateNullAudioPollerState(); + if (receiving_streams_.empty()) { + config_.audio_device_module->StopPlayout(); + } +} + +void AudioState::AddSendingStream(webrtc::AudioSendStream* stream, + int sample_rate_hz, + size_t num_channels) { + RTC_DCHECK_RUN_ON(&thread_checker_); + auto& properties = sending_streams_[stream]; + properties.sample_rate_hz = sample_rate_hz; + properties.num_channels = num_channels; + UpdateAudioTransportWithSendingStreams(); + + // Make sure recording is initialized; start recording if enabled. + auto* adm = config_.audio_device_module.get(); + if (!adm->Recording()) { + if (adm->InitRecording() == 0) { + if (recording_enabled_) { + adm->StartRecording(); + } + } else { + RTC_DLOG_F(LS_ERROR) << "Failed to initialize recording."; + } + } +} + +void AudioState::RemoveSendingStream(webrtc::AudioSendStream* stream) { + RTC_DCHECK_RUN_ON(&thread_checker_); + auto count = sending_streams_.erase(stream); + RTC_DCHECK_EQ(1, count); + UpdateAudioTransportWithSendingStreams(); + if (sending_streams_.empty()) { + config_.audio_device_module->StopRecording(); + } +} + +void AudioState::SetPlayout(bool enabled) { + RTC_LOG(LS_INFO) << "SetPlayout(" << enabled << ")"; + RTC_DCHECK_RUN_ON(&thread_checker_); + if (playout_enabled_ != enabled) { + playout_enabled_ = enabled; + if (enabled) { + UpdateNullAudioPollerState(); + if (!receiving_streams_.empty()) { + config_.audio_device_module->StartPlayout(); + } + } else { + config_.audio_device_module->StopPlayout(); + UpdateNullAudioPollerState(); + } + } +} + +void AudioState::SetRecording(bool enabled) { + RTC_LOG(LS_INFO) << "SetRecording(" << enabled << ")"; + RTC_DCHECK_RUN_ON(&thread_checker_); + if (recording_enabled_ != enabled) { + recording_enabled_ = enabled; + if (enabled) { + if (!sending_streams_.empty()) { + config_.audio_device_module->StartRecording(); + } + } else { + config_.audio_device_module->StopRecording(); + } + } +} + +void AudioState::SetStereoChannelSwapping(bool enable) { + RTC_DCHECK(thread_checker_.IsCurrent()); + audio_transport_.SetStereoChannelSwapping(enable); +} + +void AudioState::UpdateAudioTransportWithSendingStreams() { + RTC_DCHECK(thread_checker_.IsCurrent()); + std::vector<AudioSender*> audio_senders; + int max_sample_rate_hz = 8000; + size_t max_num_channels = 1; + for (const auto& kv : sending_streams_) { + audio_senders.push_back(kv.first); + max_sample_rate_hz = std::max(max_sample_rate_hz, kv.second.sample_rate_hz); + max_num_channels = std::max(max_num_channels, kv.second.num_channels); + } + audio_transport_.UpdateAudioSenders(std::move(audio_senders), + max_sample_rate_hz, max_num_channels); +} + +void AudioState::UpdateNullAudioPollerState() { + // Run NullAudioPoller when there are receiving streams and playout is + // disabled. + if (!receiving_streams_.empty() && !playout_enabled_) { + if (!null_audio_poller_.Running()) { + AudioTransport* audio_transport = &audio_transport_; + null_audio_poller_ = RepeatingTaskHandle::Start( + TaskQueueBase::Current(), [audio_transport] { + static constexpr size_t kNumChannels = 1; + static constexpr uint32_t kSamplesPerSecond = 48'000; + // 10ms of samples + static constexpr size_t kNumSamples = kSamplesPerSecond / 100; + + // Buffer to hold the audio samples. + int16_t buffer[kNumSamples * kNumChannels]; + + // Output variables from `NeedMorePlayData`. + size_t n_samples; + int64_t elapsed_time_ms; + int64_t ntp_time_ms; + audio_transport->NeedMorePlayData( + kNumSamples, sizeof(int16_t), kNumChannels, kSamplesPerSecond, + buffer, n_samples, &elapsed_time_ms, &ntp_time_ms); + + // Reschedule the next poll iteration. + return TimeDelta::Millis(10); + }); + } + } else { + null_audio_poller_.Stop(); + } +} +} // namespace internal + +rtc::scoped_refptr<AudioState> AudioState::Create( + const AudioState::Config& config) { + return rtc::make_ref_counted<internal::AudioState>(config); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/audio_state.h b/third_party/libwebrtc/audio/audio_state.h new file mode 100644 index 0000000000..88aaaa3697 --- /dev/null +++ b/third_party/libwebrtc/audio/audio_state.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_AUDIO_STATE_H_ +#define AUDIO_AUDIO_STATE_H_ + +#include <map> +#include <memory> + +#include "api/sequence_checker.h" +#include "audio/audio_transport_impl.h" +#include "call/audio_state.h" +#include "rtc_base/containers/flat_set.h" +#include "rtc_base/ref_count.h" +#include "rtc_base/task_utils/repeating_task.h" +#include "rtc_base/thread_annotations.h" + +namespace webrtc { + +class AudioSendStream; +class AudioReceiveStreamInterface; + +namespace internal { + +class AudioState : public webrtc::AudioState { + public: + explicit AudioState(const AudioState::Config& config); + + AudioState() = delete; + AudioState(const AudioState&) = delete; + AudioState& operator=(const AudioState&) = delete; + + ~AudioState() override; + + AudioProcessing* audio_processing() override; + AudioTransport* audio_transport() override; + + void SetPlayout(bool enabled) override; + void SetRecording(bool enabled) override; + + void SetStereoChannelSwapping(bool enable) override; + + AudioDeviceModule* audio_device_module() { + RTC_DCHECK(config_.audio_device_module); + return config_.audio_device_module.get(); + } + + void AddReceivingStream(webrtc::AudioReceiveStreamInterface* stream); + void RemoveReceivingStream(webrtc::AudioReceiveStreamInterface* stream); + + void AddSendingStream(webrtc::AudioSendStream* stream, + int sample_rate_hz, + size_t num_channels); + void RemoveSendingStream(webrtc::AudioSendStream* stream); + + private: + void UpdateAudioTransportWithSendingStreams(); + void UpdateNullAudioPollerState() RTC_RUN_ON(&thread_checker_); + + SequenceChecker thread_checker_; + SequenceChecker process_thread_checker_{SequenceChecker::kDetached}; + const webrtc::AudioState::Config config_; + bool recording_enabled_ = true; + bool playout_enabled_ = true; + + // Transports mixed audio from the mixer to the audio device and + // recorded audio to the sending streams. + AudioTransportImpl audio_transport_; + + // Null audio poller is used to continue polling the audio streams if audio + // playout is disabled so that audio processing still happens and the audio + // stats are still updated. + RepeatingTaskHandle null_audio_poller_ RTC_GUARDED_BY(&thread_checker_); + + webrtc::flat_set<webrtc::AudioReceiveStreamInterface*> receiving_streams_; + struct StreamProperties { + int sample_rate_hz = 0; + size_t num_channels = 0; + }; + std::map<webrtc::AudioSendStream*, StreamProperties> sending_streams_; +}; +} // namespace internal +} // namespace webrtc + +#endif // AUDIO_AUDIO_STATE_H_ diff --git a/third_party/libwebrtc/audio/audio_state_unittest.cc b/third_party/libwebrtc/audio/audio_state_unittest.cc new file mode 100644 index 0000000000..070e220979 --- /dev/null +++ b/third_party/libwebrtc/audio/audio_state_unittest.cc @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/audio_state.h" + +#include <memory> +#include <utility> +#include <vector> + +#include "api/task_queue/test/mock_task_queue_base.h" +#include "call/test/mock_audio_send_stream.h" +#include "modules/audio_device/include/mock_audio_device.h" +#include "modules/audio_mixer/audio_mixer_impl.h" +#include "modules/audio_processing/include/mock_audio_processing.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { +namespace { + +using ::testing::_; +using ::testing::Matcher; +using ::testing::NiceMock; +using ::testing::StrictMock; +using ::testing::Values; + +constexpr int kSampleRate = 16000; +constexpr int kNumberOfChannels = 1; + +struct FakeAsyncAudioProcessingHelper { + class FakeTaskQueue : public StrictMock<MockTaskQueueBase> { + public: + FakeTaskQueue() = default; + + void Delete() override { delete this; } + void PostTaskImpl(absl::AnyInvocable<void() &&> task, + const PostTaskTraits& /*traits*/, + const Location& /*location*/) override { + std::move(task)(); + } + }; + + class FakeTaskQueueFactory : public TaskQueueFactory { + public: + FakeTaskQueueFactory() = default; + ~FakeTaskQueueFactory() override = default; + std::unique_ptr<TaskQueueBase, TaskQueueDeleter> CreateTaskQueue( + absl::string_view name, + Priority priority) const override { + return std::unique_ptr<webrtc::TaskQueueBase, webrtc::TaskQueueDeleter>( + new FakeTaskQueue()); + } + }; + + class MockAudioFrameProcessor : public AudioFrameProcessor { + public: + ~MockAudioFrameProcessor() override = default; + + MOCK_METHOD(void, ProcessCalled, ()); + MOCK_METHOD(void, SinkSet, ()); + MOCK_METHOD(void, SinkCleared, ()); + + void Process(std::unique_ptr<AudioFrame> frame) override { + ProcessCalled(); + sink_callback_(std::move(frame)); + } + + void SetSink(OnAudioFrameCallback sink_callback) override { + sink_callback_ = std::move(sink_callback); + if (sink_callback_ == nullptr) + SinkCleared(); + else + SinkSet(); + } + + private: + OnAudioFrameCallback sink_callback_; + }; + + NiceMock<MockAudioFrameProcessor> audio_frame_processor_; + FakeTaskQueueFactory task_queue_factory_; + + rtc::scoped_refptr<AsyncAudioProcessing::Factory> CreateFactory() { + return rtc::make_ref_counted<AsyncAudioProcessing::Factory>( + audio_frame_processor_, task_queue_factory_); + } +}; + +struct ConfigHelper { + struct Params { + bool use_null_audio_processing; + bool use_async_audio_processing; + }; + + explicit ConfigHelper(const Params& params) + : audio_mixer(AudioMixerImpl::Create()) { + audio_state_config.audio_mixer = audio_mixer; + audio_state_config.audio_processing = + params.use_null_audio_processing + ? nullptr + : rtc::make_ref_counted<testing::NiceMock<MockAudioProcessing>>(); + audio_state_config.audio_device_module = + rtc::make_ref_counted<NiceMock<MockAudioDeviceModule>>(); + if (params.use_async_audio_processing) { + audio_state_config.async_audio_processing_factory = + async_audio_processing_helper_.CreateFactory(); + } + } + AudioState::Config& config() { return audio_state_config; } + rtc::scoped_refptr<AudioMixer> mixer() { return audio_mixer; } + NiceMock<FakeAsyncAudioProcessingHelper::MockAudioFrameProcessor>& + mock_audio_frame_processor() { + return async_audio_processing_helper_.audio_frame_processor_; + } + + private: + AudioState::Config audio_state_config; + rtc::scoped_refptr<AudioMixer> audio_mixer; + FakeAsyncAudioProcessingHelper async_audio_processing_helper_; +}; + +class FakeAudioSource : public AudioMixer::Source { + public: + // TODO(aleloi): Valid overrides commented out, because the gmock + // methods don't use any override declarations, and we want to avoid + // warnings from -Winconsistent-missing-override. See + // http://crbug.com/428099. + int Ssrc() const /*override*/ { return 0; } + + int PreferredSampleRate() const /*override*/ { return kSampleRate; } + + MOCK_METHOD(AudioFrameInfo, + GetAudioFrameWithInfo, + (int sample_rate_hz, AudioFrame*), + (override)); +}; + +std::vector<int16_t> Create10msTestData(int sample_rate_hz, + size_t num_channels) { + const int samples_per_channel = sample_rate_hz / 100; + std::vector<int16_t> audio_data(samples_per_channel * num_channels, 0); + // Fill the first channel with a 1kHz sine wave. + const float inc = (2 * 3.14159265f * 1000) / sample_rate_hz; + float w = 0.f; + for (int i = 0; i < samples_per_channel; ++i) { + audio_data[i * num_channels] = static_cast<int16_t>(32767.f * std::sin(w)); + w += inc; + } + return audio_data; +} + +std::vector<uint32_t> ComputeChannelLevels(AudioFrame* audio_frame) { + const size_t num_channels = audio_frame->num_channels_; + const size_t samples_per_channel = audio_frame->samples_per_channel_; + std::vector<uint32_t> levels(num_channels, 0); + for (size_t i = 0; i < samples_per_channel; ++i) { + for (size_t j = 0; j < num_channels; ++j) { + levels[j] += std::abs(audio_frame->data()[i * num_channels + j]); + } + } + return levels; +} +} // namespace + +class AudioStateTest : public ::testing::TestWithParam<ConfigHelper::Params> {}; + +TEST_P(AudioStateTest, Create) { + ConfigHelper helper(GetParam()); + auto audio_state = AudioState::Create(helper.config()); + EXPECT_TRUE(audio_state.get()); +} + +TEST_P(AudioStateTest, ConstructDestruct) { + ConfigHelper helper(GetParam()); + rtc::scoped_refptr<internal::AudioState> audio_state( + rtc::make_ref_counted<internal::AudioState>(helper.config())); +} + +TEST_P(AudioStateTest, RecordedAudioArrivesAtSingleStream) { + ConfigHelper helper(GetParam()); + + if (GetParam().use_async_audio_processing) { + EXPECT_CALL(helper.mock_audio_frame_processor(), SinkSet); + EXPECT_CALL(helper.mock_audio_frame_processor(), ProcessCalled); + EXPECT_CALL(helper.mock_audio_frame_processor(), SinkCleared); + } + + rtc::scoped_refptr<internal::AudioState> audio_state( + rtc::make_ref_counted<internal::AudioState>(helper.config())); + + MockAudioSendStream stream; + audio_state->AddSendingStream(&stream, 8000, 2); + + EXPECT_CALL( + stream, + SendAudioDataForMock(::testing::AllOf( + ::testing::Field(&AudioFrame::sample_rate_hz_, ::testing::Eq(8000)), + ::testing::Field(&AudioFrame::num_channels_, ::testing::Eq(2u))))) + .WillOnce( + // Verify that channels are not swapped by default. + ::testing::Invoke([](AudioFrame* audio_frame) { + auto levels = ComputeChannelLevels(audio_frame); + EXPECT_LT(0u, levels[0]); + EXPECT_EQ(0u, levels[1]); + })); + MockAudioProcessing* ap = + GetParam().use_null_audio_processing + ? nullptr + : static_cast<MockAudioProcessing*>(audio_state->audio_processing()); + if (ap) { + EXPECT_CALL(*ap, set_stream_delay_ms(0)); + EXPECT_CALL(*ap, set_stream_key_pressed(false)); + EXPECT_CALL(*ap, ProcessStream(_, _, _, Matcher<int16_t*>(_))); + } + + constexpr int kSampleRate = 16000; + constexpr size_t kNumChannels = 2; + auto audio_data = Create10msTestData(kSampleRate, kNumChannels); + uint32_t new_mic_level = 667; + audio_state->audio_transport()->RecordedDataIsAvailable( + &audio_data[0], kSampleRate / 100, kNumChannels * 2, kNumChannels, + kSampleRate, 0, 0, 0, false, new_mic_level); + EXPECT_EQ(667u, new_mic_level); + + audio_state->RemoveSendingStream(&stream); +} + +TEST_P(AudioStateTest, RecordedAudioArrivesAtMultipleStreams) { + ConfigHelper helper(GetParam()); + + if (GetParam().use_async_audio_processing) { + EXPECT_CALL(helper.mock_audio_frame_processor(), SinkSet); + EXPECT_CALL(helper.mock_audio_frame_processor(), ProcessCalled); + EXPECT_CALL(helper.mock_audio_frame_processor(), SinkCleared); + } + + rtc::scoped_refptr<internal::AudioState> audio_state( + rtc::make_ref_counted<internal::AudioState>(helper.config())); + + MockAudioSendStream stream_1; + MockAudioSendStream stream_2; + audio_state->AddSendingStream(&stream_1, 8001, 2); + audio_state->AddSendingStream(&stream_2, 32000, 1); + + EXPECT_CALL( + stream_1, + SendAudioDataForMock(::testing::AllOf( + ::testing::Field(&AudioFrame::sample_rate_hz_, ::testing::Eq(16000)), + ::testing::Field(&AudioFrame::num_channels_, ::testing::Eq(1u))))) + .WillOnce( + // Verify that there is output signal. + ::testing::Invoke([](AudioFrame* audio_frame) { + auto levels = ComputeChannelLevels(audio_frame); + EXPECT_LT(0u, levels[0]); + })); + EXPECT_CALL( + stream_2, + SendAudioDataForMock(::testing::AllOf( + ::testing::Field(&AudioFrame::sample_rate_hz_, ::testing::Eq(16000)), + ::testing::Field(&AudioFrame::num_channels_, ::testing::Eq(1u))))) + .WillOnce( + // Verify that there is output signal. + ::testing::Invoke([](AudioFrame* audio_frame) { + auto levels = ComputeChannelLevels(audio_frame); + EXPECT_LT(0u, levels[0]); + })); + MockAudioProcessing* ap = + static_cast<MockAudioProcessing*>(audio_state->audio_processing()); + if (ap) { + EXPECT_CALL(*ap, set_stream_delay_ms(5)); + EXPECT_CALL(*ap, set_stream_key_pressed(true)); + EXPECT_CALL(*ap, ProcessStream(_, _, _, Matcher<int16_t*>(_))); + } + + constexpr int kSampleRate = 16000; + constexpr size_t kNumChannels = 1; + auto audio_data = Create10msTestData(kSampleRate, kNumChannels); + uint32_t new_mic_level = 667; + audio_state->audio_transport()->RecordedDataIsAvailable( + &audio_data[0], kSampleRate / 100, kNumChannels * 2, kNumChannels, + kSampleRate, 5, 0, 0, true, new_mic_level); + EXPECT_EQ(667u, new_mic_level); + + audio_state->RemoveSendingStream(&stream_1); + audio_state->RemoveSendingStream(&stream_2); +} + +TEST_P(AudioStateTest, EnableChannelSwap) { + constexpr int kSampleRate = 16000; + constexpr size_t kNumChannels = 2; + + ConfigHelper helper(GetParam()); + + if (GetParam().use_async_audio_processing) { + EXPECT_CALL(helper.mock_audio_frame_processor(), SinkSet); + EXPECT_CALL(helper.mock_audio_frame_processor(), ProcessCalled); + EXPECT_CALL(helper.mock_audio_frame_processor(), SinkCleared); + } + + rtc::scoped_refptr<internal::AudioState> audio_state( + rtc::make_ref_counted<internal::AudioState>(helper.config())); + + audio_state->SetStereoChannelSwapping(true); + + MockAudioSendStream stream; + audio_state->AddSendingStream(&stream, kSampleRate, kNumChannels); + + EXPECT_CALL(stream, SendAudioDataForMock(_)) + .WillOnce( + // Verify that channels are swapped. + ::testing::Invoke([](AudioFrame* audio_frame) { + auto levels = ComputeChannelLevels(audio_frame); + EXPECT_EQ(0u, levels[0]); + EXPECT_LT(0u, levels[1]); + })); + + auto audio_data = Create10msTestData(kSampleRate, kNumChannels); + uint32_t new_mic_level = 667; + audio_state->audio_transport()->RecordedDataIsAvailable( + &audio_data[0], kSampleRate / 100, kNumChannels * 2, kNumChannels, + kSampleRate, 0, 0, 0, false, new_mic_level); + EXPECT_EQ(667u, new_mic_level); + + audio_state->RemoveSendingStream(&stream); +} + +TEST_P(AudioStateTest, + QueryingTransportForAudioShouldResultInGetAudioCallOnMixerSource) { + ConfigHelper helper(GetParam()); + auto audio_state = AudioState::Create(helper.config()); + + FakeAudioSource fake_source; + helper.mixer()->AddSource(&fake_source); + + EXPECT_CALL(fake_source, GetAudioFrameWithInfo(_, _)) + .WillOnce( + ::testing::Invoke([](int sample_rate_hz, AudioFrame* audio_frame) { + audio_frame->sample_rate_hz_ = sample_rate_hz; + audio_frame->samples_per_channel_ = sample_rate_hz / 100; + audio_frame->num_channels_ = kNumberOfChannels; + return AudioMixer::Source::AudioFrameInfo::kNormal; + })); + + int16_t audio_buffer[kSampleRate / 100 * kNumberOfChannels]; + size_t n_samples_out; + int64_t elapsed_time_ms; + int64_t ntp_time_ms; + audio_state->audio_transport()->NeedMorePlayData( + kSampleRate / 100, kNumberOfChannels * 2, kNumberOfChannels, kSampleRate, + audio_buffer, n_samples_out, &elapsed_time_ms, &ntp_time_ms); +} + +INSTANTIATE_TEST_SUITE_P(AudioStateTest, + AudioStateTest, + Values(ConfigHelper::Params({false, false}), + ConfigHelper::Params({true, false}), + ConfigHelper::Params({false, true}), + ConfigHelper::Params({true, true}))); + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/audio_transport_impl.cc b/third_party/libwebrtc/audio/audio_transport_impl.cc new file mode 100644 index 0000000000..42a81d5b4a --- /dev/null +++ b/third_party/libwebrtc/audio/audio_transport_impl.cc @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/audio_transport_impl.h" + +#include <algorithm> +#include <memory> +#include <utility> + +#include "audio/remix_resample.h" +#include "audio/utility/audio_frame_operations.h" +#include "call/audio_sender.h" +#include "modules/async_audio_processing/async_audio_processing.h" +#include "modules/audio_processing/include/audio_frame_proxies.h" +#include "rtc_base/checks.h" +#include "rtc_base/trace_event.h" + +namespace webrtc { + +namespace { + +// We want to process at the lowest sample rate and channel count possible +// without losing information. Choose the lowest native rate at least equal to +// the minimum of input and codec rates, choose lowest channel count, and +// configure the audio frame. +void InitializeCaptureFrame(int input_sample_rate, + int send_sample_rate_hz, + size_t input_num_channels, + size_t send_num_channels, + AudioFrame* audio_frame) { + RTC_DCHECK(audio_frame); + int min_processing_rate_hz = std::min(input_sample_rate, send_sample_rate_hz); + for (int native_rate_hz : AudioProcessing::kNativeSampleRatesHz) { + audio_frame->sample_rate_hz_ = native_rate_hz; + if (audio_frame->sample_rate_hz_ >= min_processing_rate_hz) { + break; + } + } + audio_frame->num_channels_ = std::min(input_num_channels, send_num_channels); +} + +void ProcessCaptureFrame(uint32_t delay_ms, + bool key_pressed, + bool swap_stereo_channels, + AudioProcessing* audio_processing, + AudioFrame* audio_frame) { + RTC_DCHECK(audio_frame); + if (audio_processing) { + audio_processing->set_stream_delay_ms(delay_ms); + audio_processing->set_stream_key_pressed(key_pressed); + int error = ProcessAudioFrame(audio_processing, audio_frame); + + RTC_DCHECK_EQ(0, error) << "ProcessStream() error: " << error; + } + + if (swap_stereo_channels) { + AudioFrameOperations::SwapStereoChannels(audio_frame); + } +} + +// Resample audio in `frame` to given sample rate preserving the +// channel count and place the result in `destination`. +int Resample(const AudioFrame& frame, + const int destination_sample_rate, + PushResampler<int16_t>* resampler, + int16_t* destination) { + TRACE_EVENT2("webrtc", "Resample", "frame sample rate", frame.sample_rate_hz_, + "destination_sample_rate", destination_sample_rate); + const int number_of_channels = static_cast<int>(frame.num_channels_); + const int target_number_of_samples_per_channel = + destination_sample_rate / 100; + resampler->InitializeIfNeeded(frame.sample_rate_hz_, destination_sample_rate, + number_of_channels); + + // TODO(yujo): make resampler take an AudioFrame, and add special case + // handling of muted frames. + return resampler->Resample( + frame.data(), frame.samples_per_channel_ * number_of_channels, + destination, number_of_channels * target_number_of_samples_per_channel); +} +} // namespace + +AudioTransportImpl::AudioTransportImpl( + AudioMixer* mixer, + AudioProcessing* audio_processing, + AsyncAudioProcessing::Factory* async_audio_processing_factory) + : audio_processing_(audio_processing), + async_audio_processing_( + async_audio_processing_factory + ? async_audio_processing_factory->CreateAsyncAudioProcessing( + [this](std::unique_ptr<AudioFrame> frame) { + this->SendProcessedData(std::move(frame)); + }) + : nullptr), + mixer_(mixer) { + RTC_DCHECK(mixer); +} + +AudioTransportImpl::~AudioTransportImpl() {} + +int32_t AudioTransportImpl::RecordedDataIsAvailable( + const void* audio_data, + size_t number_of_frames, + size_t bytes_per_sample, + size_t number_of_channels, + uint32_t sample_rate, + uint32_t audio_delay_milliseconds, + int32_t clock_drift, + uint32_t volume, + bool key_pressed, + uint32_t& new_mic_volume) { // NOLINT: to avoid changing APIs + return RecordedDataIsAvailable( + audio_data, number_of_frames, bytes_per_sample, number_of_channels, + sample_rate, audio_delay_milliseconds, clock_drift, volume, key_pressed, + new_mic_volume, /*estimated_capture_time_ns=*/absl::nullopt); +} + +// Not used in Chromium. Process captured audio and distribute to all sending +// streams, and try to do this at the lowest possible sample rate. +int32_t AudioTransportImpl::RecordedDataIsAvailable( + const void* audio_data, + size_t number_of_frames, + size_t bytes_per_sample, + size_t number_of_channels, + uint32_t sample_rate, + uint32_t audio_delay_milliseconds, + int32_t /*clock_drift*/, + uint32_t /*volume*/, + bool key_pressed, + uint32_t& /*new_mic_volume*/, + absl::optional<int64_t> + estimated_capture_time_ns) { // NOLINT: to avoid changing APIs + RTC_DCHECK(audio_data); + RTC_DCHECK_GE(number_of_channels, 1); + RTC_DCHECK_LE(number_of_channels, 2); + RTC_DCHECK_EQ(2 * number_of_channels, bytes_per_sample); + RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz); + // 100 = 1 second / data duration (10 ms). + RTC_DCHECK_EQ(number_of_frames * 100, sample_rate); + RTC_DCHECK_LE(bytes_per_sample * number_of_frames * number_of_channels, + AudioFrame::kMaxDataSizeBytes); + + int send_sample_rate_hz = 0; + size_t send_num_channels = 0; + bool swap_stereo_channels = false; + { + MutexLock lock(&capture_lock_); + send_sample_rate_hz = send_sample_rate_hz_; + send_num_channels = send_num_channels_; + swap_stereo_channels = swap_stereo_channels_; + } + + std::unique_ptr<AudioFrame> audio_frame(new AudioFrame()); + InitializeCaptureFrame(sample_rate, send_sample_rate_hz, number_of_channels, + send_num_channels, audio_frame.get()); + voe::RemixAndResample(static_cast<const int16_t*>(audio_data), + number_of_frames, number_of_channels, sample_rate, + &capture_resampler_, audio_frame.get()); + ProcessCaptureFrame(audio_delay_milliseconds, key_pressed, + swap_stereo_channels, audio_processing_, + audio_frame.get()); + + if (estimated_capture_time_ns) { + audio_frame->set_absolute_capture_timestamp_ms(*estimated_capture_time_ns / + 1000000); + } + + RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0); + if (async_audio_processing_) + async_audio_processing_->Process(std::move(audio_frame)); + else + SendProcessedData(std::move(audio_frame)); + + return 0; +} + +void AudioTransportImpl::SendProcessedData( + std::unique_ptr<AudioFrame> audio_frame) { + TRACE_EVENT0("webrtc", "AudioTransportImpl::SendProcessedData"); + RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0); + MutexLock lock(&capture_lock_); + if (audio_senders_.empty()) + return; + + auto it = audio_senders_.begin(); + while (++it != audio_senders_.end()) { + auto audio_frame_copy = std::make_unique<AudioFrame>(); + audio_frame_copy->CopyFrom(*audio_frame); + (*it)->SendAudioData(std::move(audio_frame_copy)); + } + // Send the original frame to the first stream w/o copying. + (*audio_senders_.begin())->SendAudioData(std::move(audio_frame)); +} + +// Mix all received streams, feed the result to the AudioProcessing module, then +// resample the result to the requested output rate. +int32_t AudioTransportImpl::NeedMorePlayData(const size_t nSamples, + const size_t nBytesPerSample, + const size_t nChannels, + const uint32_t samplesPerSec, + void* audioSamples, + size_t& nSamplesOut, + int64_t* elapsed_time_ms, + int64_t* ntp_time_ms) { + TRACE_EVENT0("webrtc", "AudioTransportImpl::SendProcessedData"); + RTC_DCHECK_EQ(sizeof(int16_t) * nChannels, nBytesPerSample); + RTC_DCHECK_GE(nChannels, 1); + RTC_DCHECK_LE(nChannels, 2); + RTC_DCHECK_GE( + samplesPerSec, + static_cast<uint32_t>(AudioProcessing::NativeRate::kSampleRate8kHz)); + + // 100 = 1 second / data duration (10 ms). + RTC_DCHECK_EQ(nSamples * 100, samplesPerSec); + RTC_DCHECK_LE(nBytesPerSample * nSamples * nChannels, + AudioFrame::kMaxDataSizeBytes); + + mixer_->Mix(nChannels, &mixed_frame_); + *elapsed_time_ms = mixed_frame_.elapsed_time_ms_; + *ntp_time_ms = mixed_frame_.ntp_time_ms_; + + if (audio_processing_) { + const auto error = + ProcessReverseAudioFrame(audio_processing_, &mixed_frame_); + RTC_DCHECK_EQ(error, AudioProcessing::kNoError); + } + + nSamplesOut = Resample(mixed_frame_, samplesPerSec, &render_resampler_, + static_cast<int16_t*>(audioSamples)); + RTC_DCHECK_EQ(nSamplesOut, nChannels * nSamples); + return 0; +} + +// Used by Chromium - same as NeedMorePlayData() but because Chrome has its +// own APM instance, does not call audio_processing_->ProcessReverseStream(). +void AudioTransportImpl::PullRenderData(int bits_per_sample, + int sample_rate, + size_t number_of_channels, + size_t number_of_frames, + void* audio_data, + int64_t* elapsed_time_ms, + int64_t* ntp_time_ms) { + TRACE_EVENT2("webrtc", "AudioTransportImpl::PullRenderData", "sample_rate", + sample_rate, "number_of_frames", number_of_frames); + RTC_DCHECK_EQ(bits_per_sample, 16); + RTC_DCHECK_GE(number_of_channels, 1); + RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz); + + // 100 = 1 second / data duration (10 ms). + RTC_DCHECK_EQ(number_of_frames * 100, sample_rate); + + // 8 = bits per byte. + RTC_DCHECK_LE(bits_per_sample / 8 * number_of_frames * number_of_channels, + AudioFrame::kMaxDataSizeBytes); + mixer_->Mix(number_of_channels, &mixed_frame_); + *elapsed_time_ms = mixed_frame_.elapsed_time_ms_; + *ntp_time_ms = mixed_frame_.ntp_time_ms_; + + auto output_samples = Resample(mixed_frame_, sample_rate, &render_resampler_, + static_cast<int16_t*>(audio_data)); + RTC_DCHECK_EQ(output_samples, number_of_channels * number_of_frames); +} + +void AudioTransportImpl::UpdateAudioSenders(std::vector<AudioSender*> senders, + int send_sample_rate_hz, + size_t send_num_channels) { + MutexLock lock(&capture_lock_); + audio_senders_ = std::move(senders); + send_sample_rate_hz_ = send_sample_rate_hz; + send_num_channels_ = send_num_channels; +} + +void AudioTransportImpl::SetStereoChannelSwapping(bool enable) { + MutexLock lock(&capture_lock_); + swap_stereo_channels_ = enable; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/audio_transport_impl.h b/third_party/libwebrtc/audio/audio_transport_impl.h new file mode 100644 index 0000000000..24b09d2140 --- /dev/null +++ b/third_party/libwebrtc/audio/audio_transport_impl.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_AUDIO_TRANSPORT_IMPL_H_ +#define AUDIO_AUDIO_TRANSPORT_IMPL_H_ + +#include <memory> +#include <vector> + +#include "api/audio/audio_mixer.h" +#include "api/scoped_refptr.h" +#include "common_audio/resampler/include/push_resampler.h" +#include "modules/async_audio_processing/async_audio_processing.h" +#include "modules/audio_device/include/audio_device.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/thread_annotations.h" + +namespace webrtc { + +class AudioSender; + +class AudioTransportImpl : public AudioTransport { + public: + AudioTransportImpl( + AudioMixer* mixer, + AudioProcessing* audio_processing, + AsyncAudioProcessing::Factory* async_audio_processing_factory); + + AudioTransportImpl() = delete; + AudioTransportImpl(const AudioTransportImpl&) = delete; + AudioTransportImpl& operator=(const AudioTransportImpl&) = delete; + + ~AudioTransportImpl() override; + + // TODO(bugs.webrtc.org/13620) Deprecate this function + int32_t RecordedDataIsAvailable(const void* audioSamples, + size_t nSamples, + size_t nBytesPerSample, + size_t nChannels, + uint32_t samplesPerSec, + uint32_t totalDelayMS, + int32_t clockDrift, + uint32_t currentMicLevel, + bool keyPressed, + uint32_t& newMicLevel) override; + + int32_t RecordedDataIsAvailable( + const void* audioSamples, + size_t nSamples, + size_t nBytesPerSample, + size_t nChannels, + uint32_t samplesPerSec, + uint32_t totalDelayMS, + int32_t clockDrift, + uint32_t currentMicLevel, + bool keyPressed, + uint32_t& newMicLevel, + absl::optional<int64_t> estimated_capture_time_ns) override; + + int32_t NeedMorePlayData(size_t nSamples, + size_t nBytesPerSample, + size_t nChannels, + uint32_t samplesPerSec, + void* audioSamples, + size_t& nSamplesOut, + int64_t* elapsed_time_ms, + int64_t* ntp_time_ms) override; + + void PullRenderData(int bits_per_sample, + int sample_rate, + size_t number_of_channels, + size_t number_of_frames, + void* audio_data, + int64_t* elapsed_time_ms, + int64_t* ntp_time_ms) override; + + void UpdateAudioSenders(std::vector<AudioSender*> senders, + int send_sample_rate_hz, + size_t send_num_channels); + void SetStereoChannelSwapping(bool enable); + + private: + void SendProcessedData(std::unique_ptr<AudioFrame> audio_frame); + + // Shared. + AudioProcessing* audio_processing_ = nullptr; + + // Capture side. + + // Thread-safe. + const std::unique_ptr<AsyncAudioProcessing> async_audio_processing_; + + mutable Mutex capture_lock_; + std::vector<AudioSender*> audio_senders_ RTC_GUARDED_BY(capture_lock_); + int send_sample_rate_hz_ RTC_GUARDED_BY(capture_lock_) = 8000; + size_t send_num_channels_ RTC_GUARDED_BY(capture_lock_) = 1; + bool swap_stereo_channels_ RTC_GUARDED_BY(capture_lock_) = false; + PushResampler<int16_t> capture_resampler_; + + // Render side. + + rtc::scoped_refptr<AudioMixer> mixer_; + AudioFrame mixed_frame_; + // Converts mixed audio to the audio device output rate. + PushResampler<int16_t> render_resampler_; +}; +} // namespace webrtc + +#endif // AUDIO_AUDIO_TRANSPORT_IMPL_H_ diff --git a/third_party/libwebrtc/audio/channel_receive.cc b/third_party/libwebrtc/audio/channel_receive.cc new file mode 100644 index 0000000000..c714b1dd4d --- /dev/null +++ b/third_party/libwebrtc/audio/channel_receive.cc @@ -0,0 +1,1129 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/channel_receive.h" + +#include <algorithm> +#include <map> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +#include "api/crypto/frame_decryptor_interface.h" +#include "api/frame_transformer_interface.h" +#include "api/rtc_event_log/rtc_event_log.h" +#include "api/sequence_checker.h" +#include "api/task_queue/pending_task_safety_flag.h" +#include "api/task_queue/task_queue_base.h" +#include "api/units/time_delta.h" +#include "audio/audio_level.h" +#include "audio/channel_receive_frame_transformer_delegate.h" +#include "audio/channel_send.h" +#include "audio/utility/audio_frame_operations.h" +#include "logging/rtc_event_log/events/rtc_event_audio_playout.h" +#include "logging/rtc_event_log/events/rtc_event_neteq_set_minimum_delay.h" +#include "modules/audio_coding/acm2/acm_receiver.h" +#include "modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor_config.h" +#include "modules/audio_device/include/audio_device.h" +#include "modules/pacing/packet_router.h" +#include "modules/rtp_rtcp/include/receive_statistics.h" +#include "modules/rtp_rtcp/include/remote_ntp_time_estimator.h" +#include "modules/rtp_rtcp/source/absolute_capture_time_interpolator.h" +#include "modules/rtp_rtcp/source/capture_clock_offset_updater.h" +#include "modules/rtp_rtcp/source/rtp_header_extensions.h" +#include "modules/rtp_rtcp/source/rtp_packet_received.h" +#include "modules/rtp_rtcp/source/rtp_rtcp_config.h" +#include "modules/rtp_rtcp/source/rtp_rtcp_impl2.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "rtc_base/numerics/sequence_number_unwrapper.h" +#include "rtc_base/race_checker.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/system/no_unique_address.h" +#include "rtc_base/time_utils.h" +#include "rtc_base/trace_event.h" +#include "system_wrappers/include/metrics.h" +#include "system_wrappers/include/ntp_time.h" + +namespace webrtc { +namespace voe { + +namespace { + +constexpr double kAudioSampleDurationSeconds = 0.01; + +// Video Sync. +constexpr int kVoiceEngineMinMinPlayoutDelayMs = 0; +constexpr int kVoiceEngineMaxMinPlayoutDelayMs = 10000; + +acm2::AcmReceiver::Config AcmConfig( + NetEqFactory* neteq_factory, + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory, + absl::optional<AudioCodecPairId> codec_pair_id, + size_t jitter_buffer_max_packets, + bool jitter_buffer_fast_playout, + int jitter_buffer_min_delay_ms) { + acm2::AcmReceiver::Config acm_config; + acm_config.neteq_factory = neteq_factory; + acm_config.decoder_factory = decoder_factory; + acm_config.neteq_config.codec_pair_id = codec_pair_id; + acm_config.neteq_config.max_packets_in_buffer = jitter_buffer_max_packets; + acm_config.neteq_config.enable_fast_accelerate = jitter_buffer_fast_playout; + acm_config.neteq_config.enable_muted_state = true; + acm_config.neteq_config.min_delay_ms = jitter_buffer_min_delay_ms; + + return acm_config; +} + +class ChannelReceive : public ChannelReceiveInterface, + public RtcpPacketTypeCounterObserver { + public: + // Used for receive streams. + ChannelReceive( + Clock* clock, + NetEqFactory* neteq_factory, + AudioDeviceModule* audio_device_module, + Transport* rtcp_send_transport, + RtcEventLog* rtc_event_log, + uint32_t local_ssrc, + uint32_t remote_ssrc, + size_t jitter_buffer_max_packets, + bool jitter_buffer_fast_playout, + int jitter_buffer_min_delay_ms, + bool enable_non_sender_rtt, + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory, + absl::optional<AudioCodecPairId> codec_pair_id, + rtc::scoped_refptr<FrameDecryptorInterface> frame_decryptor, + const webrtc::CryptoOptions& crypto_options, + rtc::scoped_refptr<FrameTransformerInterface> frame_transformer, + RtcpEventObserver* rtcp_event_observer); + ~ChannelReceive() override; + + void SetSink(AudioSinkInterface* sink) override; + + void SetReceiveCodecs(const std::map<int, SdpAudioFormat>& codecs) override; + + // API methods + + void StartPlayout() override; + void StopPlayout() override; + + // Codecs + absl::optional<std::pair<int, SdpAudioFormat>> GetReceiveCodec() + const override; + + void ReceivedRTCPPacket(const uint8_t* data, size_t length) override; + + // RtpPacketSinkInterface. + void OnRtpPacket(const RtpPacketReceived& packet) override; + + // Muting, Volume and Level. + void SetChannelOutputVolumeScaling(float scaling) override; + int GetSpeechOutputLevelFullRange() const override; + // See description of "totalAudioEnergy" in the WebRTC stats spec: + // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy + double GetTotalOutputEnergy() const override; + double GetTotalOutputDuration() const override; + + // Stats. + NetworkStatistics GetNetworkStatistics( + bool get_and_clear_legacy_stats) const override; + AudioDecodingCallStats GetDecodingCallStatistics() const override; + + // Audio+Video Sync. + uint32_t GetDelayEstimate() const override; + bool SetMinimumPlayoutDelay(int delayMs) override; + bool GetPlayoutRtpTimestamp(uint32_t* rtp_timestamp, + int64_t* time_ms) const override; + void SetEstimatedPlayoutNtpTimestampMs(int64_t ntp_timestamp_ms, + int64_t time_ms) override; + absl::optional<int64_t> GetCurrentEstimatedPlayoutNtpTimestampMs( + int64_t now_ms) const override; + + // Audio quality. + bool SetBaseMinimumPlayoutDelayMs(int delay_ms) override; + int GetBaseMinimumPlayoutDelayMs() const override; + + // Produces the transport-related timestamps; current_delay_ms is left unset. + absl::optional<Syncable::Info> GetSyncInfo() const override; + + void RegisterReceiverCongestionControlObjects( + PacketRouter* packet_router) override; + void ResetReceiverCongestionControlObjects() override; + + CallReceiveStatistics GetRTCPStatistics() const override; + void SetNACKStatus(bool enable, int maxNumberOfPackets) override; + void SetNonSenderRttMeasurement(bool enabled) override; + + AudioMixer::Source::AudioFrameInfo GetAudioFrameWithInfo( + int sample_rate_hz, + AudioFrame* audio_frame) override; + + int PreferredSampleRate() const override; + + void SetSourceTracker(SourceTracker* source_tracker) override; + + // Associate to a send channel. + // Used for obtaining RTT for a receive-only channel. + void SetAssociatedSendChannel(const ChannelSendInterface* channel) override; + + // Sets a frame transformer between the depacketizer and the decoder, to + // transform the received frames before decoding them. + void SetDepacketizerToDecoderFrameTransformer( + rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer) + override; + + void SetFrameDecryptor(rtc::scoped_refptr<webrtc::FrameDecryptorInterface> + frame_decryptor) override; + + void OnLocalSsrcChange(uint32_t local_ssrc) override; + uint32_t GetLocalSsrc() const override; + + void RtcpPacketTypesCounterUpdated( + uint32_t ssrc, + const RtcpPacketTypeCounter& packet_counter) override; + + private: + void ReceivePacket(const uint8_t* packet, + size_t packet_length, + const RTPHeader& header) + RTC_RUN_ON(worker_thread_checker_); + int ResendPackets(const uint16_t* sequence_numbers, int length); + void UpdatePlayoutTimestamp(bool rtcp, int64_t now_ms) + RTC_RUN_ON(worker_thread_checker_); + + int GetRtpTimestampRateHz() const; + + void OnReceivedPayloadData(rtc::ArrayView<const uint8_t> payload, + const RTPHeader& rtpHeader) + RTC_RUN_ON(worker_thread_checker_); + + void InitFrameTransformerDelegate( + rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer) + RTC_RUN_ON(worker_thread_checker_); + + // Thread checkers document and lock usage of some methods to specific threads + // we know about. The goal is to eventually split up voe::ChannelReceive into + // parts with single-threaded semantics, and thereby reduce the need for + // locks. + RTC_NO_UNIQUE_ADDRESS SequenceChecker worker_thread_checker_; + RTC_NO_UNIQUE_ADDRESS SequenceChecker network_thread_checker_; + + TaskQueueBase* const worker_thread_; + ScopedTaskSafety worker_safety_; + + // Methods accessed from audio and video threads are checked for sequential- + // only access. We don't necessarily own and control these threads, so thread + // checkers cannot be used. E.g. Chromium may transfer "ownership" from one + // audio thread to another, but access is still sequential. + rtc::RaceChecker audio_thread_race_checker_; + Mutex callback_mutex_; + Mutex volume_settings_mutex_; + + bool playing_ RTC_GUARDED_BY(worker_thread_checker_) = false; + + RtcEventLog* const event_log_; + + // Indexed by payload type. + std::map<uint8_t, int> payload_type_frequencies_; + + std::unique_ptr<ReceiveStatistics> rtp_receive_statistics_; + std::unique_ptr<ModuleRtpRtcpImpl2> rtp_rtcp_; + const uint32_t remote_ssrc_; + SourceTracker* source_tracker_ = nullptr; + + // Info for GetSyncInfo is updated on network or worker thread, and queried on + // the worker thread. + absl::optional<uint32_t> last_received_rtp_timestamp_ + RTC_GUARDED_BY(&worker_thread_checker_); + absl::optional<int64_t> last_received_rtp_system_time_ms_ + RTC_GUARDED_BY(&worker_thread_checker_); + + // The AcmReceiver is thread safe, using its own lock. + acm2::AcmReceiver acm_receiver_; + AudioSinkInterface* audio_sink_ = nullptr; + AudioLevel _outputAudioLevel; + + Clock* const clock_; + RemoteNtpTimeEstimator ntp_estimator_ RTC_GUARDED_BY(ts_stats_lock_); + + // Timestamp of the audio pulled from NetEq. + absl::optional<uint32_t> jitter_buffer_playout_timestamp_; + + uint32_t playout_timestamp_rtp_ RTC_GUARDED_BY(worker_thread_checker_); + absl::optional<int64_t> playout_timestamp_rtp_time_ms_ + RTC_GUARDED_BY(worker_thread_checker_); + uint32_t playout_delay_ms_ RTC_GUARDED_BY(worker_thread_checker_); + absl::optional<int64_t> playout_timestamp_ntp_ + RTC_GUARDED_BY(worker_thread_checker_); + absl::optional<int64_t> playout_timestamp_ntp_time_ms_ + RTC_GUARDED_BY(worker_thread_checker_); + + mutable Mutex ts_stats_lock_; + + webrtc::RtpTimestampUnwrapper rtp_ts_wraparound_handler_; + // The rtp timestamp of the first played out audio frame. + int64_t capture_start_rtp_time_stamp_; + // The capture ntp time (in local timebase) of the first played out audio + // frame. + int64_t capture_start_ntp_time_ms_ RTC_GUARDED_BY(ts_stats_lock_); + + AudioDeviceModule* _audioDeviceModulePtr; + float _outputGain RTC_GUARDED_BY(volume_settings_mutex_); + + const ChannelSendInterface* associated_send_channel_ + RTC_GUARDED_BY(network_thread_checker_); + + PacketRouter* packet_router_ = nullptr; + + SequenceChecker construction_thread_; + + // E2EE Audio Frame Decryption + rtc::scoped_refptr<FrameDecryptorInterface> frame_decryptor_ + RTC_GUARDED_BY(worker_thread_checker_); + webrtc::CryptoOptions crypto_options_; + + webrtc::AbsoluteCaptureTimeInterpolator absolute_capture_time_interpolator_ + RTC_GUARDED_BY(worker_thread_checker_); + + webrtc::CaptureClockOffsetUpdater capture_clock_offset_updater_ + RTC_GUARDED_BY(ts_stats_lock_); + + rtc::scoped_refptr<ChannelReceiveFrameTransformerDelegate> + frame_transformer_delegate_; + + // Counter that's used to control the frequency of reporting histograms + // from the `GetAudioFrameWithInfo` callback. + int audio_frame_interval_count_ RTC_GUARDED_BY(audio_thread_race_checker_) = + 0; + // Controls how many callbacks we let pass by before reporting callback stats. + // A value of 100 means 100 callbacks, each one of which represents 10ms worth + // of data, so the stats reporting frequency will be 1Hz (modulo failures). + constexpr static int kHistogramReportingInterval = 100; + + mutable Mutex rtcp_counter_mutex_; + RtcpPacketTypeCounter rtcp_packet_type_counter_ + RTC_GUARDED_BY(rtcp_counter_mutex_); +}; + +void ChannelReceive::OnReceivedPayloadData( + rtc::ArrayView<const uint8_t> payload, + const RTPHeader& rtpHeader) { + if (!playing_) { + // Avoid inserting into NetEQ when we are not playing. Count the + // packet as discarded. + + // If we have a source_tracker_, tell it that the frame has been + // "delivered". Normally, this happens in AudioReceiveStreamInterface when + // audio frames are pulled out, but when playout is muted, nothing is + // pulling frames. The downside of this approach is that frames delivered + // this way won't be delayed for playout, and therefore will be + // unsynchronized with (a) audio delay when playing and (b) any audio/video + // synchronization. But the alternative is that muting playout also stops + // the SourceTracker from updating RtpSource information. + if (source_tracker_) { + RtpPacketInfos::vector_type packet_vector = { + RtpPacketInfo(rtpHeader, clock_->CurrentTime())}; + source_tracker_->OnFrameDelivered(RtpPacketInfos(packet_vector)); + } + + return; + } + + // Push the incoming payload (parsed and ready for decoding) into the ACM + if (acm_receiver_.InsertPacket(rtpHeader, payload) != 0) { + RTC_DLOG(LS_ERROR) << "ChannelReceive::OnReceivedPayloadData() unable to " + "push data to the ACM"; + return; + } + + TimeDelta round_trip_time = rtp_rtcp_->LastRtt().value_or(TimeDelta::Zero()); + + std::vector<uint16_t> nack_list = + acm_receiver_.GetNackList(round_trip_time.ms()); + if (!nack_list.empty()) { + // Can't use nack_list.data() since it's not supported by all + // compilers. + ResendPackets(&(nack_list[0]), static_cast<int>(nack_list.size())); + } +} + +void ChannelReceive::InitFrameTransformerDelegate( + rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer) { + RTC_DCHECK(frame_transformer); + RTC_DCHECK(!frame_transformer_delegate_); + RTC_DCHECK(worker_thread_->IsCurrent()); + + // Pass a callback to ChannelReceive::OnReceivedPayloadData, to be called by + // the delegate to receive transformed audio. + ChannelReceiveFrameTransformerDelegate::ReceiveFrameCallback + receive_audio_callback = [this](rtc::ArrayView<const uint8_t> packet, + const RTPHeader& header) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + OnReceivedPayloadData(packet, header); + }; + frame_transformer_delegate_ = + rtc::make_ref_counted<ChannelReceiveFrameTransformerDelegate>( + std::move(receive_audio_callback), std::move(frame_transformer), + worker_thread_); + frame_transformer_delegate_->Init(); +} + +AudioMixer::Source::AudioFrameInfo ChannelReceive::GetAudioFrameWithInfo( + int sample_rate_hz, + AudioFrame* audio_frame) { + TRACE_EVENT_BEGIN1("webrtc", "ChannelReceive::GetAudioFrameWithInfo", + "sample_rate_hz", sample_rate_hz); + RTC_DCHECK_RUNS_SERIALIZED(&audio_thread_race_checker_); + audio_frame->sample_rate_hz_ = sample_rate_hz; + + event_log_->Log(std::make_unique<RtcEventAudioPlayout>(remote_ssrc_)); + + // Get 10ms raw PCM data from the ACM (mixer limits output frequency) + bool muted; + if (acm_receiver_.GetAudio(audio_frame->sample_rate_hz_, audio_frame, + &muted) == -1) { + RTC_DLOG(LS_ERROR) + << "ChannelReceive::GetAudioFrame() PlayoutData10Ms() failed!"; + // In all likelihood, the audio in this frame is garbage. We return an + // error so that the audio mixer module doesn't add it to the mix. As + // a result, it won't be played out and the actions skipped here are + // irrelevant. + + TRACE_EVENT_END1("webrtc", "ChannelReceive::GetAudioFrameWithInfo", "error", + 1); + return AudioMixer::Source::AudioFrameInfo::kError; + } + + if (muted) { + // TODO(henrik.lundin): We should be able to do better than this. But we + // will have to go through all the cases below where the audio samples may + // be used, and handle the muted case in some way. + AudioFrameOperations::Mute(audio_frame); + } + + { + // Pass the audio buffers to an optional sink callback, before applying + // scaling/panning, as that applies to the mix operation. + // External recipients of the audio (e.g. via AudioTrack), will do their + // own mixing/dynamic processing. + MutexLock lock(&callback_mutex_); + if (audio_sink_) { + AudioSinkInterface::Data data( + audio_frame->data(), audio_frame->samples_per_channel_, + audio_frame->sample_rate_hz_, audio_frame->num_channels_, + audio_frame->timestamp_); + audio_sink_->OnData(data); + } + } + + float output_gain = 1.0f; + { + MutexLock lock(&volume_settings_mutex_); + output_gain = _outputGain; + } + + // Output volume scaling + if (output_gain < 0.99f || output_gain > 1.01f) { + // TODO(solenberg): Combine with mute state - this can cause clicks! + AudioFrameOperations::ScaleWithSat(output_gain, audio_frame); + } + + // Measure audio level (0-9) + // TODO(henrik.lundin) Use the `muted` information here too. + // TODO(deadbeef): Use RmsLevel for `_outputAudioLevel` (see + // https://crbug.com/webrtc/7517). + _outputAudioLevel.ComputeLevel(*audio_frame, kAudioSampleDurationSeconds); + + if (capture_start_rtp_time_stamp_ < 0 && audio_frame->timestamp_ != 0) { + // The first frame with a valid rtp timestamp. + capture_start_rtp_time_stamp_ = audio_frame->timestamp_; + } + + if (capture_start_rtp_time_stamp_ >= 0) { + // audio_frame.timestamp_ should be valid from now on. + // Compute elapsed time. + int64_t unwrap_timestamp = + rtp_ts_wraparound_handler_.Unwrap(audio_frame->timestamp_); + audio_frame->elapsed_time_ms_ = + (unwrap_timestamp - capture_start_rtp_time_stamp_) / + (GetRtpTimestampRateHz() / 1000); + + { + MutexLock lock(&ts_stats_lock_); + // Compute ntp time. + audio_frame->ntp_time_ms_ = + ntp_estimator_.Estimate(audio_frame->timestamp_); + // `ntp_time_ms_` won't be valid until at least 2 RTCP SRs are received. + if (audio_frame->ntp_time_ms_ > 0) { + // Compute `capture_start_ntp_time_ms_` so that + // `capture_start_ntp_time_ms_` + `elapsed_time_ms_` == `ntp_time_ms_` + capture_start_ntp_time_ms_ = + audio_frame->ntp_time_ms_ - audio_frame->elapsed_time_ms_; + } + } + } + + // Fill in local capture clock offset in `audio_frame->packet_infos_`. + RtpPacketInfos::vector_type packet_infos; + for (auto& packet_info : audio_frame->packet_infos_) { + RtpPacketInfo new_packet_info(packet_info); + if (packet_info.absolute_capture_time().has_value()) { + MutexLock lock(&ts_stats_lock_); + new_packet_info.set_local_capture_clock_offset( + capture_clock_offset_updater_.ConvertsToTimeDela( + capture_clock_offset_updater_.AdjustEstimatedCaptureClockOffset( + packet_info.absolute_capture_time() + ->estimated_capture_clock_offset))); + } + packet_infos.push_back(std::move(new_packet_info)); + } + audio_frame->packet_infos_ = RtpPacketInfos(packet_infos); + + ++audio_frame_interval_count_; + if (audio_frame_interval_count_ >= kHistogramReportingInterval) { + audio_frame_interval_count_ = 0; + worker_thread_->PostTask(SafeTask(worker_safety_.flag(), [this]() { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + RTC_HISTOGRAM_COUNTS_1000("WebRTC.Audio.TargetJitterBufferDelayMs", + acm_receiver_.TargetDelayMs()); + const int jitter_buffer_delay = acm_receiver_.FilteredCurrentDelayMs(); + RTC_HISTOGRAM_COUNTS_1000("WebRTC.Audio.ReceiverDelayEstimateMs", + jitter_buffer_delay + playout_delay_ms_); + RTC_HISTOGRAM_COUNTS_1000("WebRTC.Audio.ReceiverJitterBufferDelayMs", + jitter_buffer_delay); + RTC_HISTOGRAM_COUNTS_1000("WebRTC.Audio.ReceiverDeviceDelayMs", + playout_delay_ms_); + })); + } + + TRACE_EVENT_END2("webrtc", "ChannelReceive::GetAudioFrameWithInfo", "gain", + output_gain, "muted", muted); + return muted ? AudioMixer::Source::AudioFrameInfo::kMuted + : AudioMixer::Source::AudioFrameInfo::kNormal; +} + +int ChannelReceive::PreferredSampleRate() const { + RTC_DCHECK_RUNS_SERIALIZED(&audio_thread_race_checker_); + // Return the bigger of playout and receive frequency in the ACM. + return std::max(acm_receiver_.last_packet_sample_rate_hz().value_or(0), + acm_receiver_.last_output_sample_rate_hz()); +} + +void ChannelReceive::SetSourceTracker(SourceTracker* source_tracker) { + source_tracker_ = source_tracker; +} + +ChannelReceive::ChannelReceive( + Clock* clock, + NetEqFactory* neteq_factory, + AudioDeviceModule* audio_device_module, + Transport* rtcp_send_transport, + RtcEventLog* rtc_event_log, + uint32_t local_ssrc, + uint32_t remote_ssrc, + size_t jitter_buffer_max_packets, + bool jitter_buffer_fast_playout, + int jitter_buffer_min_delay_ms, + bool enable_non_sender_rtt, + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory, + absl::optional<AudioCodecPairId> codec_pair_id, + rtc::scoped_refptr<FrameDecryptorInterface> frame_decryptor, + const webrtc::CryptoOptions& crypto_options, + rtc::scoped_refptr<FrameTransformerInterface> frame_transformer, + RtcpEventObserver* rtcp_event_observer) + : worker_thread_(TaskQueueBase::Current()), + event_log_(rtc_event_log), + rtp_receive_statistics_(ReceiveStatistics::Create(clock)), + remote_ssrc_(remote_ssrc), + acm_receiver_(AcmConfig(neteq_factory, + decoder_factory, + codec_pair_id, + jitter_buffer_max_packets, + jitter_buffer_fast_playout, + jitter_buffer_min_delay_ms)), + _outputAudioLevel(), + clock_(clock), + ntp_estimator_(clock), + playout_timestamp_rtp_(0), + playout_delay_ms_(0), + capture_start_rtp_time_stamp_(-1), + capture_start_ntp_time_ms_(-1), + _audioDeviceModulePtr(audio_device_module), + _outputGain(1.0f), + associated_send_channel_(nullptr), + frame_decryptor_(frame_decryptor), + crypto_options_(crypto_options), + absolute_capture_time_interpolator_(clock) { + RTC_DCHECK(audio_device_module); + + network_thread_checker_.Detach(); + + acm_receiver_.ResetInitialDelay(); + acm_receiver_.SetMinimumDelay(0); + acm_receiver_.SetMaximumDelay(0); + acm_receiver_.FlushBuffers(); + + _outputAudioLevel.ResetLevelFullRange(); + + rtp_receive_statistics_->EnableRetransmitDetection(remote_ssrc_, true); + RtpRtcpInterface::Configuration configuration; + configuration.clock = clock; + configuration.audio = true; + configuration.receiver_only = true; + configuration.outgoing_transport = rtcp_send_transport; + configuration.receive_statistics = rtp_receive_statistics_.get(); + configuration.event_log = event_log_; + configuration.local_media_ssrc = local_ssrc; + configuration.rtcp_packet_type_counter_observer = this; + configuration.non_sender_rtt_measurement = enable_non_sender_rtt; + configuration.rtcp_event_observer = rtcp_event_observer; + + if (frame_transformer) + InitFrameTransformerDelegate(std::move(frame_transformer)); + + rtp_rtcp_ = ModuleRtpRtcpImpl2::Create(configuration); + rtp_rtcp_->SetRemoteSSRC(remote_ssrc_); + + // Ensure that RTCP is enabled for the created channel. + rtp_rtcp_->SetRTCPStatus(RtcpMode::kCompound); +} + +ChannelReceive::~ChannelReceive() { + RTC_DCHECK_RUN_ON(&construction_thread_); + + // Resets the delegate's callback to ChannelReceive::OnReceivedPayloadData. + if (frame_transformer_delegate_) + frame_transformer_delegate_->Reset(); + + StopPlayout(); +} + +void ChannelReceive::SetSink(AudioSinkInterface* sink) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + MutexLock lock(&callback_mutex_); + audio_sink_ = sink; +} + +void ChannelReceive::StartPlayout() { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + playing_ = true; +} + +void ChannelReceive::StopPlayout() { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + playing_ = false; + _outputAudioLevel.ResetLevelFullRange(); + acm_receiver_.FlushBuffers(); +} + +absl::optional<std::pair<int, SdpAudioFormat>> ChannelReceive::GetReceiveCodec() + const { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + return acm_receiver_.LastDecoder(); +} + +void ChannelReceive::SetReceiveCodecs( + const std::map<int, SdpAudioFormat>& codecs) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + for (const auto& kv : codecs) { + RTC_DCHECK_GE(kv.second.clockrate_hz, 1000); + payload_type_frequencies_[kv.first] = kv.second.clockrate_hz; + } + acm_receiver_.SetCodecs(codecs); +} + +void ChannelReceive::OnRtpPacket(const RtpPacketReceived& packet) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + // TODO(bugs.webrtc.org/11993): Expect to be called exclusively on the + // network thread. Once that's done, the same applies to + // UpdatePlayoutTimestamp and + int64_t now_ms = rtc::TimeMillis(); + + last_received_rtp_timestamp_ = packet.Timestamp(); + last_received_rtp_system_time_ms_ = now_ms; + + // Store playout timestamp for the received RTP packet + UpdatePlayoutTimestamp(false, now_ms); + + const auto& it = payload_type_frequencies_.find(packet.PayloadType()); + if (it == payload_type_frequencies_.end()) + return; + // TODO(bugs.webrtc.org/7135): Set payload_type_frequency earlier, when packet + // is parsed. + RtpPacketReceived packet_copy(packet); + packet_copy.set_payload_type_frequency(it->second); + + rtp_receive_statistics_->OnRtpPacket(packet_copy); + + RTPHeader header; + packet_copy.GetHeader(&header); + + // Interpolates absolute capture timestamp RTP header extension. + header.extension.absolute_capture_time = + absolute_capture_time_interpolator_.OnReceivePacket( + AbsoluteCaptureTimeInterpolator::GetSource(header.ssrc, + header.arrOfCSRCs), + header.timestamp, + rtc::saturated_cast<uint32_t>(packet_copy.payload_type_frequency()), + header.extension.absolute_capture_time); + + ReceivePacket(packet_copy.data(), packet_copy.size(), header); +} + +void ChannelReceive::ReceivePacket(const uint8_t* packet, + size_t packet_length, + const RTPHeader& header) { + const uint8_t* payload = packet + header.headerLength; + RTC_DCHECK_GE(packet_length, header.headerLength); + size_t payload_length = packet_length - header.headerLength; + + size_t payload_data_length = payload_length - header.paddingLength; + + // E2EE Custom Audio Frame Decryption (This is optional). + // Keep this buffer around for the lifetime of the OnReceivedPayloadData call. + rtc::Buffer decrypted_audio_payload; + if (frame_decryptor_ != nullptr) { + const size_t max_plaintext_size = frame_decryptor_->GetMaxPlaintextByteSize( + cricket::MEDIA_TYPE_AUDIO, payload_length); + decrypted_audio_payload.SetSize(max_plaintext_size); + + const std::vector<uint32_t> csrcs(header.arrOfCSRCs, + header.arrOfCSRCs + header.numCSRCs); + const FrameDecryptorInterface::Result decrypt_result = + frame_decryptor_->Decrypt( + cricket::MEDIA_TYPE_AUDIO, csrcs, + /*additional_data=*/nullptr, + rtc::ArrayView<const uint8_t>(payload, payload_data_length), + decrypted_audio_payload); + + if (decrypt_result.IsOk()) { + decrypted_audio_payload.SetSize(decrypt_result.bytes_written); + } else { + // Interpret failures as a silent frame. + decrypted_audio_payload.SetSize(0); + } + + payload = decrypted_audio_payload.data(); + payload_data_length = decrypted_audio_payload.size(); + } else if (crypto_options_.sframe.require_frame_encryption) { + RTC_DLOG(LS_ERROR) + << "FrameDecryptor required but not set, dropping packet"; + payload_data_length = 0; + } + + rtc::ArrayView<const uint8_t> payload_data(payload, payload_data_length); + if (frame_transformer_delegate_) { + // Asynchronously transform the received payload. After the payload is + // transformed, the delegate will call OnReceivedPayloadData to handle it. + frame_transformer_delegate_->Transform(payload_data, header, remote_ssrc_); + } else { + OnReceivedPayloadData(payload_data, header); + } +} + +void ChannelReceive::ReceivedRTCPPacket(const uint8_t* data, size_t length) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + // TODO(bugs.webrtc.org/11993): Expect to be called exclusively on the + // network thread. + + // Store playout timestamp for the received RTCP packet + UpdatePlayoutTimestamp(true, rtc::TimeMillis()); + + // Deliver RTCP packet to RTP/RTCP module for parsing + rtp_rtcp_->IncomingRtcpPacket(rtc::MakeArrayView(data, length)); + + absl::optional<TimeDelta> rtt = rtp_rtcp_->LastRtt(); + if (!rtt.has_value()) { + // Waiting for valid RTT. + return; + } + + absl::optional<RtpRtcpInterface::SenderReportStats> last_sr = + rtp_rtcp_->GetSenderReportStats(); + if (!last_sr.has_value()) { + // Waiting for RTCP. + return; + } + + { + MutexLock lock(&ts_stats_lock_); + ntp_estimator_.UpdateRtcpTimestamp(*rtt, last_sr->last_remote_timestamp, + last_sr->last_remote_rtp_timestamp); + absl::optional<int64_t> remote_to_local_clock_offset = + ntp_estimator_.EstimateRemoteToLocalClockOffset(); + if (remote_to_local_clock_offset.has_value()) { + capture_clock_offset_updater_.SetRemoteToLocalClockOffset( + *remote_to_local_clock_offset); + } + } +} + +int ChannelReceive::GetSpeechOutputLevelFullRange() const { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + return _outputAudioLevel.LevelFullRange(); +} + +double ChannelReceive::GetTotalOutputEnergy() const { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + return _outputAudioLevel.TotalEnergy(); +} + +double ChannelReceive::GetTotalOutputDuration() const { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + return _outputAudioLevel.TotalDuration(); +} + +void ChannelReceive::SetChannelOutputVolumeScaling(float scaling) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + MutexLock lock(&volume_settings_mutex_); + _outputGain = scaling; +} + +void ChannelReceive::RegisterReceiverCongestionControlObjects( + PacketRouter* packet_router) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + RTC_DCHECK(packet_router); + RTC_DCHECK(!packet_router_); + constexpr bool remb_candidate = false; + packet_router->AddReceiveRtpModule(rtp_rtcp_.get(), remb_candidate); + packet_router_ = packet_router; +} + +void ChannelReceive::ResetReceiverCongestionControlObjects() { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + RTC_DCHECK(packet_router_); + packet_router_->RemoveReceiveRtpModule(rtp_rtcp_.get()); + packet_router_ = nullptr; +} + +CallReceiveStatistics ChannelReceive::GetRTCPStatistics() const { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + CallReceiveStatistics stats; + + // The jitter statistics is updated for each received RTP packet and is based + // on received packets. + RtpReceiveStats rtp_stats; + StreamStatistician* statistician = + rtp_receive_statistics_->GetStatistician(remote_ssrc_); + if (statistician) { + rtp_stats = statistician->GetStats(); + } + + stats.cumulativeLost = rtp_stats.packets_lost; + stats.jitterSamples = rtp_stats.jitter; + + // Data counters. + if (statistician) { + stats.payload_bytes_received = rtp_stats.packet_counter.payload_bytes; + + stats.header_and_padding_bytes_received = + rtp_stats.packet_counter.header_bytes + + rtp_stats.packet_counter.padding_bytes; + stats.packetsReceived = rtp_stats.packet_counter.packets; + stats.last_packet_received = rtp_stats.last_packet_received; + } else { + stats.payload_bytes_received = 0; + stats.header_and_padding_bytes_received = 0; + stats.packetsReceived = 0; + stats.last_packet_received = absl::nullopt; + } + + { + MutexLock lock(&rtcp_counter_mutex_); + stats.nacks_sent = rtcp_packet_type_counter_.nack_packets; + } + + // Timestamps. + { + MutexLock lock(&ts_stats_lock_); + stats.capture_start_ntp_time_ms_ = capture_start_ntp_time_ms_; + } + + absl::optional<RtpRtcpInterface::SenderReportStats> rtcp_sr_stats = + rtp_rtcp_->GetSenderReportStats(); + if (rtcp_sr_stats.has_value()) { + stats.last_sender_report_timestamp_ms = + rtcp_sr_stats->last_arrival_timestamp.ToMs() - + rtc::kNtpJan1970Millisecs; + stats.last_sender_report_remote_timestamp_ms = + rtcp_sr_stats->last_remote_timestamp.ToMs() - rtc::kNtpJan1970Millisecs; + stats.sender_reports_packets_sent = rtcp_sr_stats->packets_sent; + stats.sender_reports_bytes_sent = rtcp_sr_stats->bytes_sent; + stats.sender_reports_reports_count = rtcp_sr_stats->reports_count; + } + + absl::optional<RtpRtcpInterface::NonSenderRttStats> non_sender_rtt_stats = + rtp_rtcp_->GetNonSenderRttStats(); + if (non_sender_rtt_stats.has_value()) { + stats.round_trip_time = non_sender_rtt_stats->round_trip_time; + stats.round_trip_time_measurements = + non_sender_rtt_stats->round_trip_time_measurements; + stats.total_round_trip_time = non_sender_rtt_stats->total_round_trip_time; + } + + return stats; +} + +void ChannelReceive::SetNACKStatus(bool enable, int max_packets) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + // None of these functions can fail. + if (enable) { + rtp_receive_statistics_->SetMaxReorderingThreshold(max_packets); + acm_receiver_.EnableNack(max_packets); + } else { + rtp_receive_statistics_->SetMaxReorderingThreshold( + kDefaultMaxReorderingThreshold); + acm_receiver_.DisableNack(); + } +} + +void ChannelReceive::SetNonSenderRttMeasurement(bool enabled) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + rtp_rtcp_->SetNonSenderRttMeasurement(enabled); +} + +// Called when we are missing one or more packets. +int ChannelReceive::ResendPackets(const uint16_t* sequence_numbers, + int length) { + return rtp_rtcp_->SendNACK(sequence_numbers, length); +} + +void ChannelReceive::RtcpPacketTypesCounterUpdated( + uint32_t ssrc, + const RtcpPacketTypeCounter& packet_counter) { + if (ssrc != remote_ssrc_) { + return; + } + MutexLock lock(&rtcp_counter_mutex_); + rtcp_packet_type_counter_ = packet_counter; +} + +void ChannelReceive::SetAssociatedSendChannel( + const ChannelSendInterface* channel) { + RTC_DCHECK_RUN_ON(&network_thread_checker_); + associated_send_channel_ = channel; +} + +void ChannelReceive::SetDepacketizerToDecoderFrameTransformer( + rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + // Depending on when the channel is created, the transformer might be set + // twice. Don't replace the delegate if it was already initialized. + if (!frame_transformer || frame_transformer_delegate_) { + RTC_DCHECK_NOTREACHED() << "Not setting the transformer?"; + return; + } + + InitFrameTransformerDelegate(std::move(frame_transformer)); +} + +void ChannelReceive::SetFrameDecryptor( + rtc::scoped_refptr<webrtc::FrameDecryptorInterface> frame_decryptor) { + // TODO(bugs.webrtc.org/11993): Expect to be called on the network thread. + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + frame_decryptor_ = std::move(frame_decryptor); +} + +void ChannelReceive::OnLocalSsrcChange(uint32_t local_ssrc) { + // TODO(bugs.webrtc.org/11993): Expect to be called on the network thread. + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + rtp_rtcp_->SetLocalSsrc(local_ssrc); +} + +uint32_t ChannelReceive::GetLocalSsrc() const { + // TODO(bugs.webrtc.org/11993): Expect to be called on the network thread. + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + return rtp_rtcp_->local_media_ssrc(); +} + +NetworkStatistics ChannelReceive::GetNetworkStatistics( + bool get_and_clear_legacy_stats) const { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + NetworkStatistics stats; + acm_receiver_.GetNetworkStatistics(&stats, get_and_clear_legacy_stats); + return stats; +} + +AudioDecodingCallStats ChannelReceive::GetDecodingCallStatistics() const { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + AudioDecodingCallStats stats; + acm_receiver_.GetDecodingCallStatistics(&stats); + return stats; +} + +uint32_t ChannelReceive::GetDelayEstimate() const { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + // Return the current jitter buffer delay + playout delay. + return acm_receiver_.FilteredCurrentDelayMs() + playout_delay_ms_; +} + +bool ChannelReceive::SetMinimumPlayoutDelay(int delay_ms) { + // TODO(bugs.webrtc.org/11993): This should run on the network thread. + // We get here via RtpStreamsSynchronizer. Once that's done, many (all?) of + // these locks aren't needed. + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + // Limit to range accepted by both VoE and ACM, so we're at least getting as + // close as possible, instead of failing. + delay_ms = rtc::SafeClamp(delay_ms, kVoiceEngineMinMinPlayoutDelayMs, + kVoiceEngineMaxMinPlayoutDelayMs); + if (acm_receiver_.SetMinimumDelay(delay_ms) != 0) { + RTC_DLOG(LS_ERROR) + << "SetMinimumPlayoutDelay() failed to set min playout delay"; + return false; + } + return true; +} + +bool ChannelReceive::GetPlayoutRtpTimestamp(uint32_t* rtp_timestamp, + int64_t* time_ms) const { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + if (!playout_timestamp_rtp_time_ms_) + return false; + *rtp_timestamp = playout_timestamp_rtp_; + *time_ms = playout_timestamp_rtp_time_ms_.value(); + return true; +} + +void ChannelReceive::SetEstimatedPlayoutNtpTimestampMs(int64_t ntp_timestamp_ms, + int64_t time_ms) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + playout_timestamp_ntp_ = ntp_timestamp_ms; + playout_timestamp_ntp_time_ms_ = time_ms; +} + +absl::optional<int64_t> +ChannelReceive::GetCurrentEstimatedPlayoutNtpTimestampMs(int64_t now_ms) const { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + if (!playout_timestamp_ntp_ || !playout_timestamp_ntp_time_ms_) + return absl::nullopt; + + int64_t elapsed_ms = now_ms - *playout_timestamp_ntp_time_ms_; + return *playout_timestamp_ntp_ + elapsed_ms; +} + +bool ChannelReceive::SetBaseMinimumPlayoutDelayMs(int delay_ms) { + event_log_->Log( + std::make_unique<RtcEventNetEqSetMinimumDelay>(remote_ssrc_, delay_ms)); + return acm_receiver_.SetBaseMinimumDelayMs(delay_ms); +} + +int ChannelReceive::GetBaseMinimumPlayoutDelayMs() const { + return acm_receiver_.GetBaseMinimumDelayMs(); +} + +absl::optional<Syncable::Info> ChannelReceive::GetSyncInfo() const { + // TODO(bugs.webrtc.org/11993): This should run on the network thread. + // We get here via RtpStreamsSynchronizer. Once that's done, many of + // these locks aren't needed. + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + Syncable::Info info; + absl::optional<RtpRtcpInterface::SenderReportStats> last_sr = + rtp_rtcp_->GetSenderReportStats(); + if (!last_sr.has_value()) { + return absl::nullopt; + } + info.capture_time_ntp_secs = last_sr->last_remote_timestamp.seconds(); + info.capture_time_ntp_frac = last_sr->last_remote_timestamp.fractions(); + info.capture_time_source_clock = last_sr->last_remote_rtp_timestamp; + + if (!last_received_rtp_timestamp_ || !last_received_rtp_system_time_ms_) { + return absl::nullopt; + } + info.latest_received_capture_timestamp = *last_received_rtp_timestamp_; + info.latest_receive_time_ms = *last_received_rtp_system_time_ms_; + + int jitter_buffer_delay = acm_receiver_.FilteredCurrentDelayMs(); + info.current_delay_ms = jitter_buffer_delay + playout_delay_ms_; + + return info; +} + +void ChannelReceive::UpdatePlayoutTimestamp(bool rtcp, int64_t now_ms) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + // TODO(bugs.webrtc.org/11993): Expect to be called exclusively on the + // network thread. Once that's done, we won't need video_sync_lock_. + + jitter_buffer_playout_timestamp_ = acm_receiver_.GetPlayoutTimestamp(); + + if (!jitter_buffer_playout_timestamp_) { + // This can happen if this channel has not received any RTP packets. In + // this case, NetEq is not capable of computing a playout timestamp. + return; + } + + uint16_t delay_ms = 0; + if (_audioDeviceModulePtr->PlayoutDelay(&delay_ms) == -1) { + RTC_DLOG(LS_WARNING) + << "ChannelReceive::UpdatePlayoutTimestamp() failed to read" + " playout delay from the ADM"; + return; + } + + RTC_DCHECK(jitter_buffer_playout_timestamp_); + uint32_t playout_timestamp = *jitter_buffer_playout_timestamp_; + + // Remove the playout delay. + playout_timestamp -= (delay_ms * (GetRtpTimestampRateHz() / 1000)); + + if (!rtcp && playout_timestamp != playout_timestamp_rtp_) { + playout_timestamp_rtp_ = playout_timestamp; + playout_timestamp_rtp_time_ms_ = now_ms; + } + playout_delay_ms_ = delay_ms; +} + +int ChannelReceive::GetRtpTimestampRateHz() const { + const auto decoder = acm_receiver_.LastDecoder(); + // Default to the playout frequency if we've not gotten any packets yet. + // TODO(ossu): Zero clockrate can only happen if we've added an external + // decoder for a format we don't support internally. Remove once that way of + // adding decoders is gone! + // TODO(kwiberg): `decoder->second.clockrate_hz` is an RTP clockrate as it + // should, but `acm_receiver_.last_output_sample_rate_hz()` is a codec sample + // rate, which is not always the same thing. + return (decoder && decoder->second.clockrate_hz != 0) + ? decoder->second.clockrate_hz + : acm_receiver_.last_output_sample_rate_hz(); +} + +} // namespace + +std::unique_ptr<ChannelReceiveInterface> CreateChannelReceive( + Clock* clock, + NetEqFactory* neteq_factory, + AudioDeviceModule* audio_device_module, + Transport* rtcp_send_transport, + RtcEventLog* rtc_event_log, + uint32_t local_ssrc, + uint32_t remote_ssrc, + size_t jitter_buffer_max_packets, + bool jitter_buffer_fast_playout, + int jitter_buffer_min_delay_ms, + bool enable_non_sender_rtt, + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory, + absl::optional<AudioCodecPairId> codec_pair_id, + rtc::scoped_refptr<FrameDecryptorInterface> frame_decryptor, + const webrtc::CryptoOptions& crypto_options, + rtc::scoped_refptr<FrameTransformerInterface> frame_transformer, + RtcpEventObserver* rtcp_event_observer) { + return std::make_unique<ChannelReceive>( + clock, neteq_factory, audio_device_module, rtcp_send_transport, + rtc_event_log, local_ssrc, remote_ssrc, jitter_buffer_max_packets, + jitter_buffer_fast_playout, jitter_buffer_min_delay_ms, + enable_non_sender_rtt, decoder_factory, codec_pair_id, + std::move(frame_decryptor), crypto_options, std::move(frame_transformer), + rtcp_event_observer); +} + +} // namespace voe +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/channel_receive.h b/third_party/libwebrtc/audio/channel_receive.h new file mode 100644 index 0000000000..5713d97aaa --- /dev/null +++ b/third_party/libwebrtc/audio/channel_receive.h @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_CHANNEL_RECEIVE_H_ +#define AUDIO_CHANNEL_RECEIVE_H_ + +#include <map> +#include <memory> +#include <utility> +#include <vector> + +#include "absl/types/optional.h" +#include "api/audio/audio_mixer.h" +#include "api/audio_codecs/audio_decoder_factory.h" +#include "api/call/audio_sink.h" +#include "api/call/transport.h" +#include "api/crypto/crypto_options.h" +#include "api/frame_transformer_interface.h" +#include "api/neteq/neteq_factory.h" +#include "api/transport/rtp/rtp_source.h" +#include "call/rtp_packet_sink_interface.h" +#include "call/syncable.h" +#include "modules/audio_coding/include/audio_coding_module_typedefs.h" +#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" +#include "modules/rtp_rtcp/source/source_tracker.h" +#include "system_wrappers/include/clock.h" + +// TODO(solenberg, nisse): This file contains a few NOLINT marks, to silence +// warnings about use of unsigned short. +// These need cleanup, in a separate cl. + +namespace rtc { +class TimestampWrapAroundHandler; +} + +namespace webrtc { + +class AudioDeviceModule; +class FrameDecryptorInterface; +class PacketRouter; +class RateLimiter; +class ReceiveStatistics; +class RtcEventLog; +class RtpPacketReceived; +class RtpRtcp; + +struct CallReceiveStatistics { + int cumulativeLost; + unsigned int jitterSamples; + int64_t payload_bytes_received = 0; + int64_t header_and_padding_bytes_received = 0; + int packetsReceived; + uint32_t nacks_sent = 0; + // The capture NTP time (in local timebase) of the first played out audio + // frame. + int64_t capture_start_ntp_time_ms_; + // The timestamp at which the last packet was received, i.e. the time of the + // local clock when it was received - not the RTP timestamp of that packet. + // https://w3c.github.io/webrtc-stats/#dom-rtcinboundrtpstreamstats-lastpacketreceivedtimestamp + absl::optional<Timestamp> last_packet_received; + // Remote outbound stats derived by the received RTCP sender reports. + // Note that the timestamps below correspond to the time elapsed since the + // Unix epoch. + // https://w3c.github.io/webrtc-stats/#remoteoutboundrtpstats-dict* + absl::optional<int64_t> last_sender_report_timestamp_ms; + absl::optional<int64_t> last_sender_report_remote_timestamp_ms; + uint64_t sender_reports_packets_sent = 0; + uint64_t sender_reports_bytes_sent = 0; + uint64_t sender_reports_reports_count = 0; + absl::optional<TimeDelta> round_trip_time; + TimeDelta total_round_trip_time = TimeDelta::Zero(); + int round_trip_time_measurements; +}; + +namespace voe { + +class ChannelSendInterface; + +// Interface class needed for AudioReceiveStreamInterface tests that use a +// MockChannelReceive. + +class ChannelReceiveInterface : public RtpPacketSinkInterface { + public: + virtual ~ChannelReceiveInterface() = default; + + virtual void SetSink(AudioSinkInterface* sink) = 0; + + virtual void SetReceiveCodecs( + const std::map<int, SdpAudioFormat>& codecs) = 0; + + virtual void StartPlayout() = 0; + virtual void StopPlayout() = 0; + + // Payload type and format of last received RTP packet, if any. + virtual absl::optional<std::pair<int, SdpAudioFormat>> GetReceiveCodec() + const = 0; + + virtual void ReceivedRTCPPacket(const uint8_t* data, size_t length) = 0; + + virtual void SetChannelOutputVolumeScaling(float scaling) = 0; + virtual int GetSpeechOutputLevelFullRange() const = 0; + // See description of "totalAudioEnergy" in the WebRTC stats spec: + // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy + virtual double GetTotalOutputEnergy() const = 0; + virtual double GetTotalOutputDuration() const = 0; + + // Stats. + virtual NetworkStatistics GetNetworkStatistics( + bool get_and_clear_legacy_stats) const = 0; + virtual AudioDecodingCallStats GetDecodingCallStatistics() const = 0; + + // Audio+Video Sync. + virtual uint32_t GetDelayEstimate() const = 0; + virtual bool SetMinimumPlayoutDelay(int delay_ms) = 0; + virtual bool GetPlayoutRtpTimestamp(uint32_t* rtp_timestamp, + int64_t* time_ms) const = 0; + virtual void SetEstimatedPlayoutNtpTimestampMs(int64_t ntp_timestamp_ms, + int64_t time_ms) = 0; + virtual absl::optional<int64_t> GetCurrentEstimatedPlayoutNtpTimestampMs( + int64_t now_ms) const = 0; + + // Audio quality. + // Base minimum delay sets lower bound on minimum delay value which + // determines minimum delay until audio playout. + virtual bool SetBaseMinimumPlayoutDelayMs(int delay_ms) = 0; + virtual int GetBaseMinimumPlayoutDelayMs() const = 0; + + // Produces the transport-related timestamps; current_delay_ms is left unset. + virtual absl::optional<Syncable::Info> GetSyncInfo() const = 0; + + virtual void RegisterReceiverCongestionControlObjects( + PacketRouter* packet_router) = 0; + virtual void ResetReceiverCongestionControlObjects() = 0; + + virtual CallReceiveStatistics GetRTCPStatistics() const = 0; + virtual void SetNACKStatus(bool enable, int max_packets) = 0; + virtual void SetNonSenderRttMeasurement(bool enabled) = 0; + + virtual AudioMixer::Source::AudioFrameInfo GetAudioFrameWithInfo( + int sample_rate_hz, + AudioFrame* audio_frame) = 0; + + virtual int PreferredSampleRate() const = 0; + + // Sets the source tracker to notify about "delivered" packets when output is + // muted. + virtual void SetSourceTracker(SourceTracker* source_tracker) = 0; + + // Associate to a send channel. + // Used for obtaining RTT for a receive-only channel. + virtual void SetAssociatedSendChannel( + const ChannelSendInterface* channel) = 0; + + // Sets a frame transformer between the depacketizer and the decoder, to + // transform the received frames before decoding them. + virtual void SetDepacketizerToDecoderFrameTransformer( + rtc::scoped_refptr<webrtc::FrameTransformerInterface> + frame_transformer) = 0; + + virtual void SetFrameDecryptor( + rtc::scoped_refptr<webrtc::FrameDecryptorInterface> frame_decryptor) = 0; + + virtual void OnLocalSsrcChange(uint32_t local_ssrc) = 0; + virtual uint32_t GetLocalSsrc() const = 0; +}; + +std::unique_ptr<ChannelReceiveInterface> CreateChannelReceive( + Clock* clock, + NetEqFactory* neteq_factory, + AudioDeviceModule* audio_device_module, + Transport* rtcp_send_transport, + RtcEventLog* rtc_event_log, + uint32_t local_ssrc, + uint32_t remote_ssrc, + size_t jitter_buffer_max_packets, + bool jitter_buffer_fast_playout, + int jitter_buffer_min_delay_ms, + bool enable_non_sender_rtt, + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory, + absl::optional<AudioCodecPairId> codec_pair_id, + rtc::scoped_refptr<FrameDecryptorInterface> frame_decryptor, + const webrtc::CryptoOptions& crypto_options, + rtc::scoped_refptr<FrameTransformerInterface> frame_transformer, + RtcpEventObserver* rtcp_event_observer); + +} // namespace voe +} // namespace webrtc + +#endif // AUDIO_CHANNEL_RECEIVE_H_ diff --git a/third_party/libwebrtc/audio/channel_receive_frame_transformer_delegate.cc b/third_party/libwebrtc/audio/channel_receive_frame_transformer_delegate.cc new file mode 100644 index 0000000000..2d2893b8f7 --- /dev/null +++ b/third_party/libwebrtc/audio/channel_receive_frame_transformer_delegate.cc @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/channel_receive_frame_transformer_delegate.h" + +#include <utility> + +#include "rtc_base/buffer.h" + +namespace webrtc { +namespace { + +class TransformableIncomingAudioFrame + : public TransformableAudioFrameInterface { + public: + TransformableIncomingAudioFrame(rtc::ArrayView<const uint8_t> payload, + const RTPHeader& header, + uint32_t ssrc) + : payload_(payload.data(), payload.size()), + header_(header), + ssrc_(ssrc) {} + ~TransformableIncomingAudioFrame() override = default; + rtc::ArrayView<const uint8_t> GetData() const override { return payload_; } + + void SetData(rtc::ArrayView<const uint8_t> data) override { + payload_.SetData(data.data(), data.size()); + } + + void SetRTPTimestamp(uint32_t timestamp) override { + header_.timestamp = timestamp; + } + + uint8_t GetPayloadType() const override { return header_.payloadType; } + uint32_t GetSsrc() const override { return ssrc_; } + uint32_t GetTimestamp() const override { return header_.timestamp; } + rtc::ArrayView<const uint32_t> GetContributingSources() const override { + return rtc::ArrayView<const uint32_t>(header_.arrOfCSRCs, header_.numCSRCs); + } + Direction GetDirection() const override { return Direction::kReceiver; } + + const absl::optional<uint16_t> SequenceNumber() const override { + return header_.sequenceNumber; + } + + absl::optional<uint64_t> AbsoluteCaptureTimestamp() const override { + // This could be extracted from received header extensions + extrapolation, + // if required in future, eg for being able to re-send received frames. + return absl::nullopt; + } + const RTPHeader& Header() const { return header_; } + + FrameType Type() const override { + return header_.extension.voiceActivity ? FrameType::kAudioFrameSpeech + : FrameType::kAudioFrameCN; + } + + private: + rtc::Buffer payload_; + RTPHeader header_; + uint32_t ssrc_; +}; +} // namespace + +ChannelReceiveFrameTransformerDelegate::ChannelReceiveFrameTransformerDelegate( + ReceiveFrameCallback receive_frame_callback, + rtc::scoped_refptr<FrameTransformerInterface> frame_transformer, + TaskQueueBase* channel_receive_thread) + : receive_frame_callback_(receive_frame_callback), + frame_transformer_(std::move(frame_transformer)), + channel_receive_thread_(channel_receive_thread) {} + +void ChannelReceiveFrameTransformerDelegate::Init() { + RTC_DCHECK_RUN_ON(&sequence_checker_); + frame_transformer_->RegisterTransformedFrameCallback( + rtc::scoped_refptr<TransformedFrameCallback>(this)); +} + +void ChannelReceiveFrameTransformerDelegate::Reset() { + RTC_DCHECK_RUN_ON(&sequence_checker_); + frame_transformer_->UnregisterTransformedFrameCallback(); + frame_transformer_ = nullptr; + receive_frame_callback_ = ReceiveFrameCallback(); +} + +void ChannelReceiveFrameTransformerDelegate::Transform( + rtc::ArrayView<const uint8_t> packet, + const RTPHeader& header, + uint32_t ssrc) { + RTC_DCHECK_RUN_ON(&sequence_checker_); + frame_transformer_->Transform( + std::make_unique<TransformableIncomingAudioFrame>(packet, header, ssrc)); +} + +void ChannelReceiveFrameTransformerDelegate::OnTransformedFrame( + std::unique_ptr<TransformableFrameInterface> frame) { + rtc::scoped_refptr<ChannelReceiveFrameTransformerDelegate> delegate(this); + channel_receive_thread_->PostTask( + [delegate = std::move(delegate), frame = std::move(frame)]() mutable { + delegate->ReceiveFrame(std::move(frame)); + }); +} + +void ChannelReceiveFrameTransformerDelegate::ReceiveFrame( + std::unique_ptr<TransformableFrameInterface> frame) const { + RTC_DCHECK_RUN_ON(&sequence_checker_); + if (!receive_frame_callback_) + return; + + RTPHeader header; + if (frame->GetDirection() == + TransformableFrameInterface::Direction::kSender) { + auto* transformed_frame = + static_cast<TransformableAudioFrameInterface*>(frame.get()); + header.payloadType = transformed_frame->GetPayloadType(); + header.timestamp = transformed_frame->GetTimestamp(); + header.ssrc = transformed_frame->GetSsrc(); + if (transformed_frame->AbsoluteCaptureTimestamp().has_value()) { + header.extension.absolute_capture_time = AbsoluteCaptureTime(); + header.extension.absolute_capture_time->absolute_capture_timestamp = + transformed_frame->AbsoluteCaptureTimestamp().value(); + } + } else { + auto* transformed_frame = + static_cast<TransformableIncomingAudioFrame*>(frame.get()); + header = transformed_frame->Header(); + } + + // TODO(crbug.com/1464860): Take an explicit struct with the required + // information rather than the RTPHeader to make it easier to + // construct the required information when injecting transformed frames not + // originally from this receiver. + receive_frame_callback_(frame->GetData(), header); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/channel_receive_frame_transformer_delegate.h b/third_party/libwebrtc/audio/channel_receive_frame_transformer_delegate.h new file mode 100644 index 0000000000..04ad7c4695 --- /dev/null +++ b/third_party/libwebrtc/audio/channel_receive_frame_transformer_delegate.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_CHANNEL_RECEIVE_FRAME_TRANSFORMER_DELEGATE_H_ +#define AUDIO_CHANNEL_RECEIVE_FRAME_TRANSFORMER_DELEGATE_H_ + +#include <memory> + +#include "api/frame_transformer_interface.h" +#include "api/sequence_checker.h" +#include "rtc_base/system/no_unique_address.h" +#include "rtc_base/task_queue.h" +#include "rtc_base/thread.h" + +namespace webrtc { + +// Delegates calls to FrameTransformerInterface to transform frames, and to +// ChannelReceive to receive the transformed frames using the +// `receive_frame_callback_` on the `channel_receive_thread_`. +class ChannelReceiveFrameTransformerDelegate : public TransformedFrameCallback { + public: + using ReceiveFrameCallback = + std::function<void(rtc::ArrayView<const uint8_t> packet, + const RTPHeader& header)>; + ChannelReceiveFrameTransformerDelegate( + ReceiveFrameCallback receive_frame_callback, + rtc::scoped_refptr<FrameTransformerInterface> frame_transformer, + TaskQueueBase* channel_receive_thread); + + // Registers `this` as callback for `frame_transformer_`, to get the + // transformed frames. + void Init(); + + // Unregisters and releases the `frame_transformer_` reference, and resets + // `receive_frame_callback_` on `channel_receive_thread_`. Called from + // ChannelReceive destructor to prevent running the callback on a dangling + // channel. + void Reset(); + + // Delegates the call to FrameTransformerInterface::Transform, to transform + // the frame asynchronously. + void Transform(rtc::ArrayView<const uint8_t> packet, + const RTPHeader& header, + uint32_t ssrc); + + // Implements TransformedFrameCallback. Can be called on any thread. + void OnTransformedFrame( + std::unique_ptr<TransformableFrameInterface> frame) override; + + // Delegates the call to ChannelReceive::OnReceivedPayloadData on the + // `channel_receive_thread_`, by calling `receive_frame_callback_`. + void ReceiveFrame(std::unique_ptr<TransformableFrameInterface> frame) const; + + protected: + ~ChannelReceiveFrameTransformerDelegate() override = default; + + private: + RTC_NO_UNIQUE_ADDRESS SequenceChecker sequence_checker_; + ReceiveFrameCallback receive_frame_callback_ + RTC_GUARDED_BY(sequence_checker_); + rtc::scoped_refptr<FrameTransformerInterface> frame_transformer_ + RTC_GUARDED_BY(sequence_checker_); + TaskQueueBase* const channel_receive_thread_; +}; + +} // namespace webrtc +#endif // AUDIO_CHANNEL_RECEIVE_FRAME_TRANSFORMER_DELEGATE_H_ diff --git a/third_party/libwebrtc/audio/channel_receive_frame_transformer_delegate_unittest.cc b/third_party/libwebrtc/audio/channel_receive_frame_transformer_delegate_unittest.cc new file mode 100644 index 0000000000..38ceb6d96d --- /dev/null +++ b/third_party/libwebrtc/audio/channel_receive_frame_transformer_delegate_unittest.cc @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/channel_receive_frame_transformer_delegate.h" + +#include <memory> +#include <utility> + +#include "audio/channel_send_frame_transformer_delegate.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/mock_frame_transformer.h" +#include "test/mock_transformable_frame.h" + +namespace webrtc { +namespace { + +using ::testing::_; +using ::testing::ElementsAre; +using ::testing::NiceMock; +using ::testing::SaveArg; + +class MockChannelReceive { + public: + MOCK_METHOD(void, + ReceiveFrame, + (rtc::ArrayView<const uint8_t> packet, const RTPHeader& header)); + + ChannelReceiveFrameTransformerDelegate::ReceiveFrameCallback callback() { + return [this](rtc::ArrayView<const uint8_t> packet, + const RTPHeader& header) { ReceiveFrame(packet, header); }; + } +}; + +// Test that the delegate registers itself with the frame transformer on Init(). +TEST(ChannelReceiveFrameTransformerDelegateTest, + RegisterTransformedFrameCallbackOnInit) { + rtc::scoped_refptr<MockFrameTransformer> mock_frame_transformer = + rtc::make_ref_counted<MockFrameTransformer>(); + rtc::scoped_refptr<ChannelReceiveFrameTransformerDelegate> delegate = + rtc::make_ref_counted<ChannelReceiveFrameTransformerDelegate>( + ChannelReceiveFrameTransformerDelegate::ReceiveFrameCallback(), + mock_frame_transformer, nullptr); + EXPECT_CALL(*mock_frame_transformer, RegisterTransformedFrameCallback); + delegate->Init(); +} + +// Test that the delegate unregisters itself from the frame transformer on +// Reset(). +TEST(ChannelReceiveFrameTransformerDelegateTest, + UnregisterTransformedFrameCallbackOnReset) { + rtc::scoped_refptr<MockFrameTransformer> mock_frame_transformer = + rtc::make_ref_counted<MockFrameTransformer>(); + rtc::scoped_refptr<ChannelReceiveFrameTransformerDelegate> delegate = + rtc::make_ref_counted<ChannelReceiveFrameTransformerDelegate>( + ChannelReceiveFrameTransformerDelegate::ReceiveFrameCallback(), + mock_frame_transformer, nullptr); + EXPECT_CALL(*mock_frame_transformer, UnregisterTransformedFrameCallback); + delegate->Reset(); +} + +// Test that when the delegate receives a transformed frame from the frame +// transformer, it passes it to the channel using the ReceiveFrameCallback. +TEST(ChannelReceiveFrameTransformerDelegateTest, + TransformRunsChannelReceiveCallback) { + rtc::AutoThread main_thread; + rtc::scoped_refptr<MockFrameTransformer> mock_frame_transformer = + rtc::make_ref_counted<NiceMock<MockFrameTransformer>>(); + MockChannelReceive mock_channel; + rtc::scoped_refptr<ChannelReceiveFrameTransformerDelegate> delegate = + rtc::make_ref_counted<ChannelReceiveFrameTransformerDelegate>( + mock_channel.callback(), mock_frame_transformer, + rtc::Thread::Current()); + rtc::scoped_refptr<TransformedFrameCallback> callback; + EXPECT_CALL(*mock_frame_transformer, RegisterTransformedFrameCallback) + .WillOnce(SaveArg<0>(&callback)); + delegate->Init(); + ASSERT_TRUE(callback); + + const uint8_t data[] = {1, 2, 3, 4}; + rtc::ArrayView<const uint8_t> packet(data, sizeof(data)); + RTPHeader header; + EXPECT_CALL(mock_channel, ReceiveFrame); + ON_CALL(*mock_frame_transformer, Transform) + .WillByDefault( + [&callback](std::unique_ptr<TransformableFrameInterface> frame) { + callback->OnTransformedFrame(std::move(frame)); + }); + delegate->Transform(packet, header, 1111 /*ssrc*/); + rtc::ThreadManager::ProcessAllMessageQueuesForTesting(); +} + +// Test that when the delegate receives a Outgoing frame from the frame +// transformer, it passes it to the channel using the ReceiveFrameCallback. +TEST(ChannelReceiveFrameTransformerDelegateTest, + TransformRunsChannelReceiveCallbackForSenderFrame) { + rtc::AutoThread main_thread; + rtc::scoped_refptr<MockFrameTransformer> mock_frame_transformer = + rtc::make_ref_counted<NiceMock<MockFrameTransformer>>(); + MockChannelReceive mock_channel; + rtc::scoped_refptr<ChannelReceiveFrameTransformerDelegate> delegate = + rtc::make_ref_counted<ChannelReceiveFrameTransformerDelegate>( + mock_channel.callback(), mock_frame_transformer, + rtc::Thread::Current()); + rtc::scoped_refptr<TransformedFrameCallback> callback; + EXPECT_CALL(*mock_frame_transformer, RegisterTransformedFrameCallback) + .WillOnce(SaveArg<0>(&callback)); + delegate->Init(); + ASSERT_TRUE(callback); + + const uint8_t data[] = {1, 2, 3, 4}; + rtc::ArrayView<const uint8_t> packet(data, sizeof(data)); + RTPHeader header; + EXPECT_CALL(mock_channel, ReceiveFrame(ElementsAre(1, 2, 3, 4), _)); + ON_CALL(*mock_frame_transformer, Transform) + .WillByDefault([&callback]( + std::unique_ptr<TransformableFrameInterface> frame) { + auto* transformed_frame = + static_cast<TransformableAudioFrameInterface*>(frame.get()); + callback->OnTransformedFrame(CloneSenderAudioFrame(transformed_frame)); + }); + delegate->Transform(packet, header, 1111 /*ssrc*/); + rtc::ThreadManager::ProcessAllMessageQueuesForTesting(); +} + +// Test that if the delegate receives a transformed frame after it has been +// reset, it does not run the ReceiveFrameCallback, as the channel is destroyed +// after resetting the delegate. +TEST(ChannelReceiveFrameTransformerDelegateTest, + OnTransformedDoesNotRunChannelReceiveCallbackAfterReset) { + rtc::AutoThread main_thread; + rtc::scoped_refptr<MockFrameTransformer> mock_frame_transformer = + rtc::make_ref_counted<testing::NiceMock<MockFrameTransformer>>(); + MockChannelReceive mock_channel; + rtc::scoped_refptr<ChannelReceiveFrameTransformerDelegate> delegate = + rtc::make_ref_counted<ChannelReceiveFrameTransformerDelegate>( + mock_channel.callback(), mock_frame_transformer, + rtc::Thread::Current()); + + delegate->Reset(); + EXPECT_CALL(mock_channel, ReceiveFrame).Times(0); + delegate->OnTransformedFrame(std::make_unique<MockTransformableAudioFrame>()); + rtc::ThreadManager::ProcessAllMessageQueuesForTesting(); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/channel_receive_unittest.cc b/third_party/libwebrtc/audio/channel_receive_unittest.cc new file mode 100644 index 0000000000..4b7b7c0231 --- /dev/null +++ b/third_party/libwebrtc/audio/channel_receive_unittest.cc @@ -0,0 +1,231 @@ +/* + * Copyright 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/channel_receive.h" + +#include "absl/strings/escaping.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/crypto/frame_decryptor_interface.h" +#include "api/task_queue/default_task_queue_factory.h" +#include "logging/rtc_event_log/mock/mock_rtc_event_log.h" +#include "modules/audio_device/include/audio_device.h" +#include "modules/audio_device/include/mock_audio_device.h" +#include "modules/rtp_rtcp/source/byte_io.h" +#include "modules/rtp_rtcp/source/rtcp_packet/receiver_report.h" +#include "modules/rtp_rtcp/source/rtcp_packet/report_block.h" +#include "modules/rtp_rtcp/source/rtcp_packet/sdes.h" +#include "modules/rtp_rtcp/source/rtcp_packet/sender_report.h" +#include "modules/rtp_rtcp/source/rtp_packet_received.h" +#include "modules/rtp_rtcp/source/time_util.h" +#include "rtc_base/logging.h" +#include "rtc_base/thread.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/mock_audio_decoder_factory.h" +#include "test/mock_transport.h" +#include "test/time_controller/simulated_time_controller.h" + +namespace webrtc { +namespace voe { +namespace { + +using ::testing::NiceMock; +using ::testing::NotNull; +using ::testing::Return; +using ::testing::Test; + +constexpr uint32_t kLocalSsrc = 1111; +constexpr uint32_t kRemoteSsrc = 2222; +// We run RTP data with 8 kHz PCMA (fixed payload type 8). +constexpr char kPayloadName[] = "PCMA"; +constexpr int kPayloadType = 8; +constexpr int kSampleRateHz = 8000; + +class ChannelReceiveTest : public Test { + public: + ChannelReceiveTest() + : time_controller_(Timestamp::Seconds(5555)), + audio_device_module_(test::MockAudioDeviceModule::CreateNice()), + audio_decoder_factory_(CreateBuiltinAudioDecoderFactory()) { + ON_CALL(*audio_device_module_, PlayoutDelay).WillByDefault(Return(0)); + } + + std::unique_ptr<ChannelReceiveInterface> CreateTestChannelReceive() { + CryptoOptions crypto_options; + auto channel = CreateChannelReceive( + time_controller_.GetClock(), + /* neteq_factory= */ nullptr, audio_device_module_.get(), &transport_, + &event_log_, kLocalSsrc, kRemoteSsrc, + /* jitter_buffer_max_packets= */ 0, + /* jitter_buffer_fast_playout= */ false, + /* jitter_buffer_min_delay_ms= */ 0, + /* enable_non_sender_rtt= */ false, audio_decoder_factory_, + /* codec_pair_id= */ absl::nullopt, + /* frame_decryptor_interface= */ nullptr, crypto_options, + /* frame_transformer= */ nullptr); + channel->SetReceiveCodecs( + {{kPayloadType, {kPayloadName, kSampleRateHz, 1}}}); + return channel; + } + + NtpTime NtpNow() { return time_controller_.GetClock()->CurrentNtpTime(); } + + uint32_t RtpNow() { + // Note - the "random" offset of this timestamp is zero. + return rtc::TimeMillis() * 1000 / kSampleRateHz; + } + + RtpPacketReceived CreateRtpPacket() { + RtpPacketReceived packet; + packet.set_arrival_time(time_controller_.GetClock()->CurrentTime()); + packet.SetTimestamp(RtpNow()); + packet.SetSsrc(kLocalSsrc); + packet.SetPayloadType(kPayloadType); + // Packet size should be enough to give at least 10 ms of data. + // For PCMA, that's 80 bytes; this should be enough. + uint8_t* datapos = packet.SetPayloadSize(100); + memset(datapos, 0, 100); + return packet; + } + + std::vector<uint8_t> CreateRtcpSenderReport() { + std::vector<uint8_t> packet(1024); + size_t pos = 0; + rtcp::SenderReport report; + report.SetSenderSsrc(kRemoteSsrc); + report.SetNtp(NtpNow()); + report.SetRtpTimestamp(RtpNow()); + report.SetPacketCount(0); + report.SetOctetCount(0); + report.Create(&packet[0], &pos, packet.size(), nullptr); + // No report blocks. + packet.resize(pos); + return packet; + } + + std::vector<uint8_t> CreateRtcpReceiverReport() { + rtcp::ReportBlock block; + block.SetMediaSsrc(kLocalSsrc); + // Middle 32 bits of the NTP timestamp from received SR + block.SetLastSr(CompactNtp(NtpNow())); + block.SetDelayLastSr(0); + + rtcp::ReceiverReport report; + report.SetSenderSsrc(kRemoteSsrc); + report.AddReportBlock(block); + + std::vector<uint8_t> packet(1024); + size_t pos = 0; + report.Create(&packet[0], &pos, packet.size(), nullptr); + packet.resize(pos); + return packet; + } + + void HandleGeneratedRtcp(ChannelReceiveInterface& channel, + rtc::ArrayView<const uint8_t> packet) { + if (packet[1] == rtcp::ReceiverReport::kPacketType) { + // Ignore RR, it requires no response + } else { + RTC_LOG(LS_ERROR) << "Unexpected RTCP packet generated"; + RTC_LOG(LS_ERROR) << "Packet content " + << rtc::hex_encode_with_delimiter( + absl::string_view( + reinterpret_cast<char*>(packet.data()[0]), + packet.size()), + ' '); + } + } + + int64_t ProbeCaptureStartNtpTime(ChannelReceiveInterface& channel) { + // Computation of the capture_start_ntp_time_ms_ occurs when the + // audio data is pulled, not when it is received. So we need to + // inject an RTP packet, and then fetch its data. + AudioFrame audio_frame; + channel.OnRtpPacket(CreateRtpPacket()); + channel.GetAudioFrameWithInfo(kSampleRateHz, &audio_frame); + CallReceiveStatistics stats = channel.GetRTCPStatistics(); + return stats.capture_start_ntp_time_ms_; + } + + protected: + GlobalSimulatedTimeController time_controller_; + rtc::scoped_refptr<test::MockAudioDeviceModule> audio_device_module_; + rtc::scoped_refptr<AudioDecoderFactory> audio_decoder_factory_; + MockTransport transport_; + NiceMock<MockRtcEventLog> event_log_; +}; + +TEST_F(ChannelReceiveTest, CreateAndDestroy) { + auto channel = CreateTestChannelReceive(); + EXPECT_THAT(channel, NotNull()); +} + +TEST_F(ChannelReceiveTest, ReceiveReportGeneratedOnTime) { + auto channel = CreateTestChannelReceive(); + + bool receiver_report_sent = false; + EXPECT_CALL(transport_, SendRtcp) + .WillRepeatedly([&](rtc::ArrayView<const uint8_t> packet) { + if (packet.size() >= 2 && + packet[1] == rtcp::ReceiverReport::kPacketType) { + receiver_report_sent = true; + } + return true; + }); + // RFC 3550 section 6.2 mentions 5 seconds as a reasonable expectation + // for the interval between RTCP packets. + time_controller_.AdvanceTime(TimeDelta::Seconds(5)); + + EXPECT_TRUE(receiver_report_sent); +} + +TEST_F(ChannelReceiveTest, CaptureStartTimeBecomesValid) { + auto channel = CreateTestChannelReceive(); + + EXPECT_CALL(transport_, SendRtcp) + .WillRepeatedly([&](rtc::ArrayView<const uint8_t> packet) { + HandleGeneratedRtcp(*channel, packet); + return true; + }); + // Before any packets are sent, CaptureStartTime is invalid. + EXPECT_EQ(ProbeCaptureStartNtpTime(*channel), -1); + + // Must start playout, otherwise packet is discarded. + channel->StartPlayout(); + // Send one RTP packet. This causes registration of the SSRC. + channel->OnRtpPacket(CreateRtpPacket()); + EXPECT_EQ(ProbeCaptureStartNtpTime(*channel), -1); + + // Receive a sender report. + auto rtcp_packet_1 = CreateRtcpSenderReport(); + channel->ReceivedRTCPPacket(rtcp_packet_1.data(), rtcp_packet_1.size()); + EXPECT_EQ(ProbeCaptureStartNtpTime(*channel), -1); + + time_controller_.AdvanceTime(TimeDelta::Seconds(5)); + + // Receive a receiver report. This is necessary, which is odd. + // Presumably it is because the receiver needs to know the RTT + // before it can compute the capture start NTP time. + // The receiver report must happen before the second sender report. + auto rtcp_rr = CreateRtcpReceiverReport(); + channel->ReceivedRTCPPacket(rtcp_rr.data(), rtcp_rr.size()); + EXPECT_EQ(ProbeCaptureStartNtpTime(*channel), -1); + + // Receive another sender report after 5 seconds. + // This should be enough to establish the capture start NTP time. + auto rtcp_packet_2 = CreateRtcpSenderReport(); + channel->ReceivedRTCPPacket(rtcp_packet_2.data(), rtcp_packet_2.size()); + + EXPECT_NE(ProbeCaptureStartNtpTime(*channel), -1); +} + +} // namespace +} // namespace voe +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/channel_send.cc b/third_party/libwebrtc/audio/channel_send.cc new file mode 100644 index 0000000000..ee94760b6f --- /dev/null +++ b/third_party/libwebrtc/audio/channel_send.cc @@ -0,0 +1,922 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/channel_send.h" + +#include <algorithm> +#include <map> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +#include "api/array_view.h" +#include "api/call/transport.h" +#include "api/crypto/frame_encryptor_interface.h" +#include "api/rtc_event_log/rtc_event_log.h" +#include "api/sequence_checker.h" +#include "audio/channel_send_frame_transformer_delegate.h" +#include "audio/utility/audio_frame_operations.h" +#include "call/rtp_transport_controller_send_interface.h" +#include "logging/rtc_event_log/events/rtc_event_audio_playout.h" +#include "modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor_config.h" +#include "modules/audio_coding/include/audio_coding_module.h" +#include "modules/audio_processing/rms_level.h" +#include "modules/pacing/packet_router.h" +#include "modules/rtp_rtcp/source/rtp_rtcp_impl2.h" +#include "rtc_base/checks.h" +#include "rtc_base/event.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/race_checker.h" +#include "rtc_base/rate_limiter.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/task_queue.h" +#include "rtc_base/time_utils.h" +#include "rtc_base/trace_event.h" +#include "system_wrappers/include/clock.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { +namespace voe { + +namespace { + +constexpr int64_t kMaxRetransmissionWindowMs = 1000; +constexpr int64_t kMinRetransmissionWindowMs = 30; + +class RtpPacketSenderProxy; +class TransportSequenceNumberProxy; + +class RtcpCounterObserver : public RtcpPacketTypeCounterObserver { + public: + explicit RtcpCounterObserver(uint32_t ssrc) : ssrc_(ssrc) {} + + void RtcpPacketTypesCounterUpdated( + uint32_t ssrc, const RtcpPacketTypeCounter& packet_counter) override { + if (ssrc_ != ssrc) { + return; + } + + MutexLock lock(&mutex_); + packet_counter_ = packet_counter; + } + + RtcpPacketTypeCounter GetCounts() { + MutexLock lock(&mutex_); + return packet_counter_; + } + + private: + Mutex mutex_; + const uint32_t ssrc_; + RtcpPacketTypeCounter packet_counter_; +}; + +class ChannelSend : public ChannelSendInterface, + public AudioPacketizationCallback, // receive encoded + // packets from the ACM + public RtcpPacketTypeCounterObserver, + public ReportBlockDataObserver { + public: + ChannelSend(Clock* clock, + TaskQueueFactory* task_queue_factory, + Transport* rtp_transport, + RtcpRttStats* rtcp_rtt_stats, + RtcEventLog* rtc_event_log, + FrameEncryptorInterface* frame_encryptor, + const webrtc::CryptoOptions& crypto_options, + bool extmap_allow_mixed, + int rtcp_report_interval_ms, + uint32_t ssrc, + rtc::scoped_refptr<FrameTransformerInterface> frame_transformer, + RtpTransportControllerSendInterface* transport_controller, + const FieldTrialsView& field_trials); + + ~ChannelSend() override; + + // Send using this encoder, with this payload type. + void SetEncoder(int payload_type, + std::unique_ptr<AudioEncoder> encoder) override; + void ModifyEncoder(rtc::FunctionView<void(std::unique_ptr<AudioEncoder>*)> + modifier) override; + void CallEncoder(rtc::FunctionView<void(AudioEncoder*)> modifier) override; + + // API methods + void StartSend() override; + void StopSend() override; + + // Codecs + void OnBitrateAllocation(BitrateAllocationUpdate update) override; + int GetTargetBitrate() const override; + + // Network + void ReceivedRTCPPacket(const uint8_t* data, size_t length) override; + + // Muting, Volume and Level. + void SetInputMute(bool enable) override; + + // Stats. + ANAStats GetANAStatistics() const override; + + // Used by AudioSendStream. + RtpRtcpInterface* GetRtpRtcp() const override; + + void RegisterCngPayloadType(int payload_type, int payload_frequency) override; + + // DTMF. + bool SendTelephoneEventOutband(int event, int duration_ms) override; + void SetSendTelephoneEventPayloadType(int payload_type, + int payload_frequency) override; + + // RTP+RTCP + void SetSendAudioLevelIndicationStatus(bool enable, int id) override; + + void RegisterSenderCongestionControlObjects( + RtpTransportControllerSendInterface* transport) override; + void ResetSenderCongestionControlObjects() override; + void SetRTCP_CNAME(absl::string_view c_name) override; + std::vector<ReportBlockData> GetRemoteRTCPReportBlocks() const override; + CallSendStatistics GetRTCPStatistics() const override; + + // ProcessAndEncodeAudio() posts a task on the shared encoder task queue, + // which in turn calls (on the queue) ProcessAndEncodeAudioOnTaskQueue() where + // the actual processing of the audio takes place. The processing mainly + // consists of encoding and preparing the result for sending by adding it to a + // send queue. + // The main reason for using a task queue here is to release the native, + // OS-specific, audio capture thread as soon as possible to ensure that it + // can go back to sleep and be prepared to deliver an new captured audio + // packet. + void ProcessAndEncodeAudio(std::unique_ptr<AudioFrame> audio_frame) override; + + int64_t GetRTT() const override; + + // E2EE Custom Audio Frame Encryption + void SetFrameEncryptor( + rtc::scoped_refptr<FrameEncryptorInterface> frame_encryptor) override; + + // Sets a frame transformer between encoder and packetizer, to transform + // encoded frames before sending them out the network. + void SetEncoderToPacketizerFrameTransformer( + rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer) + override; + + // RtcpPacketTypeCounterObserver. + void RtcpPacketTypesCounterUpdated( + uint32_t ssrc, + const RtcpPacketTypeCounter& packet_counter) override; + + // ReportBlockDataObserver. + void OnReportBlockDataUpdated(ReportBlockData report_block) override; + + private: + // From AudioPacketizationCallback in the ACM + int32_t SendData(AudioFrameType frameType, + uint8_t payloadType, + uint32_t rtp_timestamp, + const uint8_t* payloadData, + size_t payloadSize, + int64_t absolute_capture_timestamp_ms) override; + + bool InputMute() const; + + int32_t SendRtpAudio(AudioFrameType frameType, + uint8_t payloadType, + uint32_t rtp_timestamp_without_offset, + rtc::ArrayView<const uint8_t> payload, + int64_t absolute_capture_timestamp_ms) + RTC_RUN_ON(encoder_queue_); + + void OnReceivedRtt(int64_t rtt_ms); + + void InitFrameTransformerDelegate( + rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer); + + // Thread checkers document and lock usage of some methods on voe::Channel to + // specific threads we know about. The goal is to eventually split up + // voe::Channel into parts with single-threaded semantics, and thereby reduce + // the need for locks. + SequenceChecker worker_thread_checker_; + // Methods accessed from audio and video threads are checked for sequential- + // only access. We don't necessarily own and control these threads, so thread + // checkers cannot be used. E.g. Chromium may transfer "ownership" from one + // audio thread to another, but access is still sequential. + rtc::RaceChecker audio_thread_race_checker_; + + mutable Mutex volume_settings_mutex_; + + const uint32_t ssrc_; + bool sending_ RTC_GUARDED_BY(&worker_thread_checker_) = false; + + RtcEventLog* const event_log_; + + std::unique_ptr<ModuleRtpRtcpImpl2> rtp_rtcp_; + std::unique_ptr<RTPSenderAudio> rtp_sender_audio_; + + std::unique_ptr<AudioCodingModule> audio_coding_; + + // This is just an offset, RTP module will add its own random offset. + uint32_t timestamp_ RTC_GUARDED_BY(audio_thread_race_checker_) = 0; + absl::optional<int64_t> last_capture_timestamp_ms_ + RTC_GUARDED_BY(audio_thread_race_checker_); + + RmsLevel rms_level_ RTC_GUARDED_BY(encoder_queue_); + bool input_mute_ RTC_GUARDED_BY(volume_settings_mutex_) = false; + bool previous_frame_muted_ RTC_GUARDED_BY(encoder_queue_) = false; + + const std::unique_ptr<RtcpCounterObserver> rtcp_counter_observer_; + + PacketRouter* packet_router_ RTC_GUARDED_BY(&worker_thread_checker_) = + nullptr; + const std::unique_ptr<RtpPacketSenderProxy> rtp_packet_pacer_proxy_; + const std::unique_ptr<RateLimiter> retransmission_rate_limiter_; + + SequenceChecker construction_thread_; + + std::atomic<bool> include_audio_level_indication_ = false; + std::atomic<bool> encoder_queue_is_active_ = false; + std::atomic<bool> first_frame_ = true; + + // E2EE Audio Frame Encryption + rtc::scoped_refptr<FrameEncryptorInterface> frame_encryptor_ + RTC_GUARDED_BY(encoder_queue_); + // E2EE Frame Encryption Options + const webrtc::CryptoOptions crypto_options_; + + // Delegates calls to a frame transformer to transform audio, and + // receives callbacks with the transformed frames; delegates calls to + // ChannelSend::SendRtpAudio to send the transformed audio. + rtc::scoped_refptr<ChannelSendFrameTransformerDelegate> + frame_transformer_delegate_ RTC_GUARDED_BY(encoder_queue_); + + mutable Mutex rtcp_counter_mutex_; + RtcpPacketTypeCounter rtcp_packet_type_counter_ + RTC_GUARDED_BY(rtcp_counter_mutex_); + + // Defined last to ensure that there are no running tasks when the other + // members are destroyed. + rtc::TaskQueue encoder_queue_; +}; + +const int kTelephoneEventAttenuationdB = 10; + +class RtpPacketSenderProxy : public RtpPacketSender { + public: + RtpPacketSenderProxy() : rtp_packet_pacer_(nullptr) {} + + void SetPacketPacer(RtpPacketSender* rtp_packet_pacer) { + RTC_DCHECK(thread_checker_.IsCurrent()); + MutexLock lock(&mutex_); + rtp_packet_pacer_ = rtp_packet_pacer; + } + + void EnqueuePackets( + std::vector<std::unique_ptr<RtpPacketToSend>> packets) override { + MutexLock lock(&mutex_); + if (rtp_packet_pacer_) { + rtp_packet_pacer_->EnqueuePackets(std::move(packets)); + } + } + + void RemovePacketsForSsrc(uint32_t ssrc) override { + MutexLock lock(&mutex_); + if (rtp_packet_pacer_) { + rtp_packet_pacer_->RemovePacketsForSsrc(ssrc); + } + } + + private: + SequenceChecker thread_checker_; + Mutex mutex_; + RtpPacketSender* rtp_packet_pacer_ RTC_GUARDED_BY(&mutex_); +}; + +int32_t ChannelSend::SendData(AudioFrameType frameType, + uint8_t payloadType, + uint32_t rtp_timestamp, + const uint8_t* payloadData, + size_t payloadSize, + int64_t absolute_capture_timestamp_ms) { + RTC_DCHECK_RUN_ON(&encoder_queue_); + rtc::ArrayView<const uint8_t> payload(payloadData, payloadSize); + if (frame_transformer_delegate_) { + // Asynchronously transform the payload before sending it. After the payload + // is transformed, the delegate will call SendRtpAudio to send it. + frame_transformer_delegate_->Transform( + frameType, payloadType, rtp_timestamp + rtp_rtcp_->StartTimestamp(), + payloadData, payloadSize, absolute_capture_timestamp_ms, + rtp_rtcp_->SSRC()); + return 0; + } + return SendRtpAudio(frameType, payloadType, rtp_timestamp, payload, + absolute_capture_timestamp_ms); +} + +int32_t ChannelSend::SendRtpAudio(AudioFrameType frameType, + uint8_t payloadType, + uint32_t rtp_timestamp_without_offset, + rtc::ArrayView<const uint8_t> payload, + int64_t absolute_capture_timestamp_ms) { + // E2EE Custom Audio Frame Encryption (This is optional). + // Keep this buffer around for the lifetime of the send call. + rtc::Buffer encrypted_audio_payload; + // We don't invoke encryptor if payload is empty, which means we are to send + // DTMF, or the encoder entered DTX. + // TODO(minyue): see whether DTMF packets should be encrypted or not. In + // current implementation, they are not. + if (!payload.empty()) { + if (frame_encryptor_ != nullptr) { + // TODO(benwright@webrtc.org) - Allocate enough to always encrypt inline. + // Allocate a buffer to hold the maximum possible encrypted payload. + size_t max_ciphertext_size = frame_encryptor_->GetMaxCiphertextByteSize( + cricket::MEDIA_TYPE_AUDIO, payload.size()); + encrypted_audio_payload.SetSize(max_ciphertext_size); + + // Encrypt the audio payload into the buffer. + size_t bytes_written = 0; + int encrypt_status = frame_encryptor_->Encrypt( + cricket::MEDIA_TYPE_AUDIO, rtp_rtcp_->SSRC(), + /*additional_data=*/nullptr, payload, encrypted_audio_payload, + &bytes_written); + if (encrypt_status != 0) { + RTC_DLOG(LS_ERROR) + << "Channel::SendData() failed encrypt audio payload: " + << encrypt_status; + return -1; + } + // Resize the buffer to the exact number of bytes actually used. + encrypted_audio_payload.SetSize(bytes_written); + // Rewrite the payloadData and size to the new encrypted payload. + payload = encrypted_audio_payload; + } else if (crypto_options_.sframe.require_frame_encryption) { + RTC_DLOG(LS_ERROR) << "Channel::SendData() failed sending audio payload: " + "A frame encryptor is required but one is not set."; + return -1; + } + } + + // Push data from ACM to RTP/RTCP-module to deliver audio frame for + // packetization. + if (!rtp_rtcp_->OnSendingRtpFrame(rtp_timestamp_without_offset, + // Leaving the time when this frame was + // received from the capture device as + // undefined for voice for now. + -1, payloadType, + /*force_sender_report=*/false)) { + return -1; + } + + // RTCPSender has it's own copy of the timestamp offset, added in + // RTCPSender::BuildSR, hence we must not add the in the offset for the above + // call. + // TODO(nisse): Delete RTCPSender:timestamp_offset_, and see if we can confine + // knowledge of the offset to a single place. + + // This call will trigger Transport::SendPacket() from the RTP/RTCP module. + RTPSenderAudio::RtpAudioFrame frame = { + .type = frameType, + .payload = payload, + .payload_id = payloadType, + .rtp_timestamp = + rtp_timestamp_without_offset + rtp_rtcp_->StartTimestamp()}; + if (absolute_capture_timestamp_ms > 0) { + frame.capture_time = Timestamp::Millis(absolute_capture_timestamp_ms); + } + if (include_audio_level_indication_.load()) { + frame.audio_level_dbov = rms_level_.Average(); + } + if (!rtp_sender_audio_->SendAudio(frame)) { + RTC_DLOG(LS_ERROR) + << "ChannelSend::SendData() failed to send data to RTP/RTCP module"; + return -1; + } + + return 0; +} + +ChannelSend::ChannelSend( + Clock* clock, + TaskQueueFactory* task_queue_factory, + Transport* rtp_transport, + RtcpRttStats* rtcp_rtt_stats, + RtcEventLog* rtc_event_log, + FrameEncryptorInterface* frame_encryptor, + const webrtc::CryptoOptions& crypto_options, + bool extmap_allow_mixed, + int rtcp_report_interval_ms, + uint32_t ssrc, + rtc::scoped_refptr<FrameTransformerInterface> frame_transformer, + RtpTransportControllerSendInterface* transport_controller, + const FieldTrialsView& field_trials) + : ssrc_(ssrc), + event_log_(rtc_event_log), + rtcp_counter_observer_(new RtcpCounterObserver(ssrc)), + rtp_packet_pacer_proxy_(new RtpPacketSenderProxy()), + retransmission_rate_limiter_( + new RateLimiter(clock, kMaxRetransmissionWindowMs)), + frame_encryptor_(frame_encryptor), + crypto_options_(crypto_options), + encoder_queue_(task_queue_factory->CreateTaskQueue( + "AudioEncoder", + TaskQueueFactory::Priority::NORMAL)) { + audio_coding_ = AudioCodingModule::Create(); + + RtpRtcpInterface::Configuration configuration; + configuration.report_block_data_observer = this; + configuration.network_link_rtcp_observer = + transport_controller->GetRtcpObserver(); + configuration.transport_feedback_callback = + transport_controller->transport_feedback_observer(); + configuration.clock = clock; + configuration.audio = true; + configuration.outgoing_transport = rtp_transport; + + configuration.paced_sender = rtp_packet_pacer_proxy_.get(); + + configuration.event_log = event_log_; + configuration.rtt_stats = rtcp_rtt_stats; + configuration.rtcp_packet_type_counter_observer = + rtcp_counter_observer_.get(); + if (field_trials.IsDisabled("WebRTC-DisableRtxRateLimiter")) { + configuration.retransmission_rate_limiter = + retransmission_rate_limiter_.get(); + } + configuration.extmap_allow_mixed = extmap_allow_mixed; + configuration.rtcp_report_interval_ms = rtcp_report_interval_ms; + configuration.rtcp_packet_type_counter_observer = this; + + configuration.local_media_ssrc = ssrc; + + rtp_rtcp_ = ModuleRtpRtcpImpl2::Create(configuration); + rtp_rtcp_->SetSendingMediaStatus(false); + + rtp_sender_audio_ = std::make_unique<RTPSenderAudio>(configuration.clock, + rtp_rtcp_->RtpSender()); + + // Ensure that RTCP is enabled by default for the created channel. + rtp_rtcp_->SetRTCPStatus(RtcpMode::kCompound); + + int error = audio_coding_->RegisterTransportCallback(this); + RTC_DCHECK_EQ(0, error); +} + +ChannelSend::~ChannelSend() { + RTC_DCHECK(construction_thread_.IsCurrent()); + + // Resets the delegate's callback to ChannelSend::SendRtpAudio. + if (frame_transformer_delegate_) + frame_transformer_delegate_->Reset(); + + StopSend(); + int error = audio_coding_->RegisterTransportCallback(NULL); + RTC_DCHECK_EQ(0, error); +} + +void ChannelSend::StartSend() { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + RTC_DCHECK(!sending_); + sending_ = true; + + RTC_DCHECK(packet_router_); + packet_router_->AddSendRtpModule(rtp_rtcp_.get(), /*remb_candidate=*/false); + rtp_rtcp_->SetSendingMediaStatus(true); + int ret = rtp_rtcp_->SetSendingStatus(true); + RTC_DCHECK_EQ(0, ret); + + // It is now OK to start processing on the encoder task queue. + first_frame_.store(true); + encoder_queue_is_active_.store(true); +} + +void ChannelSend::StopSend() { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + if (!sending_) { + return; + } + sending_ = false; + encoder_queue_is_active_.store(false); + + // Wait until all pending encode tasks are executed and clear any remaining + // buffers in the encoder. + rtc::Event flush; + encoder_queue_.PostTask([this, &flush]() { + RTC_DCHECK_RUN_ON(&encoder_queue_); + CallEncoder([](AudioEncoder* encoder) { encoder->Reset(); }); + flush.Set(); + }); + flush.Wait(rtc::Event::kForever); + + // Reset sending SSRC and sequence number and triggers direct transmission + // of RTCP BYE + if (rtp_rtcp_->SetSendingStatus(false) == -1) { + RTC_DLOG(LS_ERROR) << "StartSend() RTP/RTCP failed to stop sending"; + } + rtp_rtcp_->SetSendingMediaStatus(false); + + RTC_DCHECK(packet_router_); + packet_router_->RemoveSendRtpModule(rtp_rtcp_.get()); + rtp_packet_pacer_proxy_->RemovePacketsForSsrc(rtp_rtcp_->SSRC()); +} + +void ChannelSend::SetEncoder(int payload_type, + std::unique_ptr<AudioEncoder> encoder) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + RTC_DCHECK_GE(payload_type, 0); + RTC_DCHECK_LE(payload_type, 127); + + // The RTP/RTCP module needs to know the RTP timestamp rate (i.e. clockrate) + // as well as some other things, so we collect this info and send it along. + rtp_rtcp_->RegisterSendPayloadFrequency(payload_type, + encoder->RtpTimestampRateHz()); + rtp_sender_audio_->RegisterAudioPayload("audio", payload_type, + encoder->RtpTimestampRateHz(), + encoder->NumChannels(), 0); + + audio_coding_->SetEncoder(std::move(encoder)); +} + +void ChannelSend::ModifyEncoder( + rtc::FunctionView<void(std::unique_ptr<AudioEncoder>*)> modifier) { + // This method can be called on the worker thread, module process thread + // or network thread. Audio coding is thread safe, so we do not need to + // enforce the calling thread. + audio_coding_->ModifyEncoder(modifier); +} + +void ChannelSend::CallEncoder(rtc::FunctionView<void(AudioEncoder*)> modifier) { + ModifyEncoder([modifier](std::unique_ptr<AudioEncoder>* encoder_ptr) { + if (*encoder_ptr) { + modifier(encoder_ptr->get()); + } else { + RTC_DLOG(LS_WARNING) << "Trying to call unset encoder."; + } + }); +} + +void ChannelSend::OnBitrateAllocation(BitrateAllocationUpdate update) { + // This method can be called on the worker thread, module process thread + // or on a TaskQueue via VideoSendStreamImpl::OnEncoderConfigurationChanged. + // TODO(solenberg): Figure out a good way to check this or enforce calling + // rules. + // RTC_DCHECK(worker_thread_checker_.IsCurrent() || + // module_process_thread_checker_.IsCurrent()); + CallEncoder([&](AudioEncoder* encoder) { + encoder->OnReceivedUplinkAllocation(update); + }); + retransmission_rate_limiter_->SetMaxRate(update.target_bitrate.bps()); +} + +int ChannelSend::GetTargetBitrate() const { + return audio_coding_->GetTargetBitrate(); +} + +void ChannelSend::OnReportBlockDataUpdated(ReportBlockData report_block) { + float packet_loss_rate = report_block.fraction_lost(); + CallEncoder([&](AudioEncoder* encoder) { + encoder->OnReceivedUplinkPacketLossFraction(packet_loss_rate); + }); +} + +void ChannelSend::ReceivedRTCPPacket(const uint8_t* data, size_t length) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + + // Deliver RTCP packet to RTP/RTCP module for parsing + rtp_rtcp_->IncomingRtcpPacket(rtc::MakeArrayView(data, length)); + + int64_t rtt = GetRTT(); + if (rtt == 0) { + // Waiting for valid RTT. + return; + } + + int64_t nack_window_ms = rtt; + if (nack_window_ms < kMinRetransmissionWindowMs) { + nack_window_ms = kMinRetransmissionWindowMs; + } else if (nack_window_ms > kMaxRetransmissionWindowMs) { + nack_window_ms = kMaxRetransmissionWindowMs; + } + retransmission_rate_limiter_->SetWindowSize(nack_window_ms); + + OnReceivedRtt(rtt); +} + +void ChannelSend::SetInputMute(bool enable) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + MutexLock lock(&volume_settings_mutex_); + input_mute_ = enable; +} + +bool ChannelSend::InputMute() const { + MutexLock lock(&volume_settings_mutex_); + return input_mute_; +} + +bool ChannelSend::SendTelephoneEventOutband(int event, int duration_ms) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + RTC_DCHECK_LE(0, event); + RTC_DCHECK_GE(255, event); + RTC_DCHECK_LE(0, duration_ms); + RTC_DCHECK_GE(65535, duration_ms); + if (!sending_) { + return false; + } + if (rtp_sender_audio_->SendTelephoneEvent( + event, duration_ms, kTelephoneEventAttenuationdB) != 0) { + RTC_DLOG(LS_ERROR) << "SendTelephoneEvent() failed to send event"; + return false; + } + return true; +} + +void ChannelSend::RegisterCngPayloadType(int payload_type, + int payload_frequency) { + rtp_rtcp_->RegisterSendPayloadFrequency(payload_type, payload_frequency); + rtp_sender_audio_->RegisterAudioPayload("CN", payload_type, payload_frequency, + 1, 0); +} + +void ChannelSend::SetSendTelephoneEventPayloadType(int payload_type, + int payload_frequency) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + RTC_DCHECK_LE(0, payload_type); + RTC_DCHECK_GE(127, payload_type); + rtp_rtcp_->RegisterSendPayloadFrequency(payload_type, payload_frequency); + rtp_sender_audio_->RegisterAudioPayload("telephone-event", payload_type, + payload_frequency, 0, 0); +} + +void ChannelSend::SetSendAudioLevelIndicationStatus(bool enable, int id) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + include_audio_level_indication_.store(enable); + if (enable) { + rtp_rtcp_->RegisterRtpHeaderExtension(webrtc::AudioLevel::Uri(), id); + } else { + rtp_rtcp_->DeregisterSendRtpHeaderExtension(webrtc::AudioLevel::Uri()); + } +} + +void ChannelSend::RegisterSenderCongestionControlObjects( + RtpTransportControllerSendInterface* transport) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + RtpPacketSender* rtp_packet_pacer = transport->packet_sender(); + PacketRouter* packet_router = transport->packet_router(); + + RTC_DCHECK(rtp_packet_pacer); + RTC_DCHECK(packet_router); + RTC_DCHECK(!packet_router_); + rtp_packet_pacer_proxy_->SetPacketPacer(rtp_packet_pacer); + rtp_rtcp_->SetStorePacketsStatus(true, 600); + packet_router_ = packet_router; +} + +void ChannelSend::ResetSenderCongestionControlObjects() { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + RTC_DCHECK(packet_router_); + rtp_rtcp_->SetStorePacketsStatus(false, 600); + packet_router_ = nullptr; + rtp_packet_pacer_proxy_->SetPacketPacer(nullptr); +} + +void ChannelSend::SetRTCP_CNAME(absl::string_view c_name) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + // Note: SetCNAME() accepts a c string of length at most 255. + const std::string c_name_limited(c_name.substr(0, 255)); + int ret = rtp_rtcp_->SetCNAME(c_name_limited.c_str()) != 0; + RTC_DCHECK_EQ(0, ret) << "SetRTCP_CNAME() failed to set RTCP CNAME"; +} + +std::vector<ReportBlockData> ChannelSend::GetRemoteRTCPReportBlocks() const { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + // Get the report blocks from the latest received RTCP Sender or Receiver + // Report. Each element in the vector contains the sender's SSRC and a + // report block according to RFC 3550. + return rtp_rtcp_->GetLatestReportBlockData(); +} + +CallSendStatistics ChannelSend::GetRTCPStatistics() const { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + CallSendStatistics stats = {0}; + stats.rttMs = GetRTT(); + stats.rtcp_packet_type_counts = rtcp_counter_observer_->GetCounts(); + + StreamDataCounters rtp_stats; + StreamDataCounters rtx_stats; + rtp_rtcp_->GetSendStreamDataCounters(&rtp_stats, &rtx_stats); + stats.payload_bytes_sent = + rtp_stats.transmitted.payload_bytes + rtx_stats.transmitted.payload_bytes; + stats.header_and_padding_bytes_sent = + rtp_stats.transmitted.padding_bytes + rtp_stats.transmitted.header_bytes + + rtx_stats.transmitted.padding_bytes + rtx_stats.transmitted.header_bytes; + + // TODO(https://crbug.com/webrtc/10555): RTX retransmissions should show up in + // separate outbound-rtp stream objects. + stats.retransmitted_bytes_sent = rtp_stats.retransmitted.payload_bytes; + stats.packetsSent = + rtp_stats.transmitted.packets + rtx_stats.transmitted.packets; + stats.total_packet_send_delay = rtp_stats.transmitted.total_packet_delay; + stats.retransmitted_packets_sent = rtp_stats.retransmitted.packets; + stats.report_block_datas = rtp_rtcp_->GetLatestReportBlockData(); + + { + MutexLock lock(&rtcp_counter_mutex_); + stats.nacks_received = rtcp_packet_type_counter_.nack_packets; + } + + return stats; +} + +void ChannelSend::RtcpPacketTypesCounterUpdated( + uint32_t ssrc, + const RtcpPacketTypeCounter& packet_counter) { + if (ssrc != ssrc_) { + return; + } + MutexLock lock(&rtcp_counter_mutex_); + rtcp_packet_type_counter_ = packet_counter; +} + +void ChannelSend::ProcessAndEncodeAudio( + std::unique_ptr<AudioFrame> audio_frame) { + TRACE_EVENT0("webrtc", "ChannelSend::ProcessAndEncodeAudio"); + + RTC_DCHECK_RUNS_SERIALIZED(&audio_thread_race_checker_); + RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0); + RTC_DCHECK_LE(audio_frame->num_channels_, 8); + + if (!encoder_queue_is_active_.load()) { + return; + } + + // Update `timestamp_` based on the capture timestamp for the first frame + // after sending is resumed. + if (first_frame_.load()) { + first_frame_.store(false); + if (last_capture_timestamp_ms_ && + audio_frame->absolute_capture_timestamp_ms()) { + int64_t diff_ms = *audio_frame->absolute_capture_timestamp_ms() - + *last_capture_timestamp_ms_; + // Truncate to whole frames and subtract one since `timestamp_` was + // incremented after the last frame. + int64_t diff_frames = diff_ms * audio_frame->sample_rate_hz() / 1000 / + audio_frame->samples_per_channel() - + 1; + timestamp_ += std::max<int64_t>( + diff_frames * audio_frame->samples_per_channel(), 0); + } + } + + audio_frame->timestamp_ = timestamp_; + timestamp_ += audio_frame->samples_per_channel_; + last_capture_timestamp_ms_ = audio_frame->absolute_capture_timestamp_ms(); + + // Profile time between when the audio frame is added to the task queue and + // when the task is actually executed. + audio_frame->UpdateProfileTimeStamp(); + encoder_queue_.PostTask( + [this, audio_frame = std::move(audio_frame)]() mutable { + RTC_DCHECK_RUN_ON(&encoder_queue_); + if (!encoder_queue_is_active_.load()) { + return; + } + // Measure time between when the audio frame is added to the task queue + // and when the task is actually executed. Goal is to keep track of + // unwanted extra latency added by the task queue. + RTC_HISTOGRAM_COUNTS_10000("WebRTC.Audio.EncodingTaskQueueLatencyMs", + audio_frame->ElapsedProfileTimeMs()); + + bool is_muted = InputMute(); + AudioFrameOperations::Mute(audio_frame.get(), previous_frame_muted_, + is_muted); + + if (include_audio_level_indication_.load()) { + size_t length = + audio_frame->samples_per_channel_ * audio_frame->num_channels_; + RTC_CHECK_LE(length, AudioFrame::kMaxDataSizeBytes); + if (is_muted && previous_frame_muted_) { + rms_level_.AnalyzeMuted(length); + } else { + rms_level_.Analyze( + rtc::ArrayView<const int16_t>(audio_frame->data(), length)); + } + } + previous_frame_muted_ = is_muted; + + // This call will trigger AudioPacketizationCallback::SendData if + // encoding is done and payload is ready for packetization and + // transmission. Otherwise, it will return without invoking the + // callback. + if (audio_coding_->Add10MsData(*audio_frame) < 0) { + RTC_DLOG(LS_ERROR) << "ACM::Add10MsData() failed."; + return; + } + }); +} + +ANAStats ChannelSend::GetANAStatistics() const { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + return audio_coding_->GetANAStats(); +} + +RtpRtcpInterface* ChannelSend::GetRtpRtcp() const { + return rtp_rtcp_.get(); +} + +int64_t ChannelSend::GetRTT() const { + std::vector<ReportBlockData> report_blocks = + rtp_rtcp_->GetLatestReportBlockData(); + if (report_blocks.empty()) { + return 0; + } + + // We don't know in advance the remote ssrc used by the other end's receiver + // reports, so use the first report block for the RTT. + return report_blocks.front().last_rtt().ms(); +} + +void ChannelSend::SetFrameEncryptor( + rtc::scoped_refptr<FrameEncryptorInterface> frame_encryptor) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + encoder_queue_.PostTask([this, frame_encryptor]() mutable { + RTC_DCHECK_RUN_ON(&encoder_queue_); + frame_encryptor_ = std::move(frame_encryptor); + }); +} + +void ChannelSend::SetEncoderToPacketizerFrameTransformer( + rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer) { + RTC_DCHECK_RUN_ON(&worker_thread_checker_); + if (!frame_transformer) + return; + + encoder_queue_.PostTask( + [this, frame_transformer = std::move(frame_transformer)]() mutable { + RTC_DCHECK_RUN_ON(&encoder_queue_); + InitFrameTransformerDelegate(std::move(frame_transformer)); + }); +} + +void ChannelSend::OnReceivedRtt(int64_t rtt_ms) { + // Invoke audio encoders OnReceivedRtt(). + CallEncoder( + [rtt_ms](AudioEncoder* encoder) { encoder->OnReceivedRtt(rtt_ms); }); +} + +void ChannelSend::InitFrameTransformerDelegate( + rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer) { + RTC_DCHECK_RUN_ON(&encoder_queue_); + RTC_DCHECK(frame_transformer); + RTC_DCHECK(!frame_transformer_delegate_); + + // Pass a callback to ChannelSend::SendRtpAudio, to be called by the delegate + // to send the transformed audio. + ChannelSendFrameTransformerDelegate::SendFrameCallback send_audio_callback = + [this](AudioFrameType frameType, uint8_t payloadType, + uint32_t rtp_timestamp_with_offset, + rtc::ArrayView<const uint8_t> payload, + int64_t absolute_capture_timestamp_ms) { + RTC_DCHECK_RUN_ON(&encoder_queue_); + return SendRtpAudio( + frameType, payloadType, + rtp_timestamp_with_offset - rtp_rtcp_->StartTimestamp(), payload, + absolute_capture_timestamp_ms); + }; + frame_transformer_delegate_ = + rtc::make_ref_counted<ChannelSendFrameTransformerDelegate>( + std::move(send_audio_callback), std::move(frame_transformer), + &encoder_queue_); + frame_transformer_delegate_->Init(); +} + +} // namespace + +std::unique_ptr<ChannelSendInterface> CreateChannelSend( + Clock* clock, + TaskQueueFactory* task_queue_factory, + Transport* rtp_transport, + RtcpRttStats* rtcp_rtt_stats, + RtcEventLog* rtc_event_log, + FrameEncryptorInterface* frame_encryptor, + const webrtc::CryptoOptions& crypto_options, + bool extmap_allow_mixed, + int rtcp_report_interval_ms, + uint32_t ssrc, + rtc::scoped_refptr<FrameTransformerInterface> frame_transformer, + RtpTransportControllerSendInterface* transport_controller, + const FieldTrialsView& field_trials) { + return std::make_unique<ChannelSend>( + clock, task_queue_factory, rtp_transport, rtcp_rtt_stats, rtc_event_log, + frame_encryptor, crypto_options, extmap_allow_mixed, + rtcp_report_interval_ms, ssrc, std::move(frame_transformer), + transport_controller, field_trials); +} + +} // namespace voe +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/channel_send.h b/third_party/libwebrtc/audio/channel_send.h new file mode 100644 index 0000000000..f0c9232296 --- /dev/null +++ b/third_party/libwebrtc/audio/channel_send.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_CHANNEL_SEND_H_ +#define AUDIO_CHANNEL_SEND_H_ + +#include <memory> +#include <string> +#include <vector> + +#include "api/audio/audio_frame.h" +#include "api/audio_codecs/audio_encoder.h" +#include "api/crypto/crypto_options.h" +#include "api/field_trials_view.h" +#include "api/frame_transformer_interface.h" +#include "api/function_view.h" +#include "api/task_queue/task_queue_factory.h" +#include "modules/rtp_rtcp/include/report_block_data.h" +#include "modules/rtp_rtcp/source/rtp_rtcp_interface.h" +#include "modules/rtp_rtcp/source/rtp_sender_audio.h" + +namespace webrtc { + +class FrameEncryptorInterface; +class RtcEventLog; +class RtpTransportControllerSendInterface; + +struct CallSendStatistics { + int64_t rttMs; + int64_t payload_bytes_sent; + int64_t header_and_padding_bytes_sent; + // https://w3c.github.io/webrtc-stats/#dom-rtcoutboundrtpstreamstats-retransmittedbytessent + uint64_t retransmitted_bytes_sent; + int packetsSent; + // https://w3c.github.io/webrtc-stats/#dom-rtcoutboundrtpstreamstats-totalpacketsenddelay + TimeDelta total_packet_send_delay = TimeDelta::Zero(); + // https://w3c.github.io/webrtc-stats/#dom-rtcoutboundrtpstreamstats-retransmittedpacketssent + uint64_t retransmitted_packets_sent; + RtcpPacketTypeCounter rtcp_packet_type_counts; + // A snapshot of Report Blocks with additional data of interest to statistics. + // Within this list, the sender-source SSRC pair is unique and per-pair the + // ReportBlockData represents the latest Report Block that was received for + // that pair. + std::vector<ReportBlockData> report_block_datas; + uint32_t nacks_received; +}; + +namespace voe { + +class ChannelSendInterface { + public: + virtual ~ChannelSendInterface() = default; + + virtual void ReceivedRTCPPacket(const uint8_t* packet, size_t length) = 0; + + virtual CallSendStatistics GetRTCPStatistics() const = 0; + + virtual void SetEncoder(int payload_type, + std::unique_ptr<AudioEncoder> encoder) = 0; + virtual void ModifyEncoder( + rtc::FunctionView<void(std::unique_ptr<AudioEncoder>*)> modifier) = 0; + virtual void CallEncoder(rtc::FunctionView<void(AudioEncoder*)> modifier) = 0; + + // Use 0 to indicate that the extension should not be registered. + virtual void SetRTCP_CNAME(absl::string_view c_name) = 0; + virtual void SetSendAudioLevelIndicationStatus(bool enable, int id) = 0; + virtual void RegisterSenderCongestionControlObjects( + RtpTransportControllerSendInterface* transport) = 0; + virtual void ResetSenderCongestionControlObjects() = 0; + virtual std::vector<ReportBlockData> GetRemoteRTCPReportBlocks() const = 0; + virtual ANAStats GetANAStatistics() const = 0; + virtual void RegisterCngPayloadType(int payload_type, + int payload_frequency) = 0; + virtual void SetSendTelephoneEventPayloadType(int payload_type, + int payload_frequency) = 0; + virtual bool SendTelephoneEventOutband(int event, int duration_ms) = 0; + virtual void OnBitrateAllocation(BitrateAllocationUpdate update) = 0; + virtual int GetTargetBitrate() const = 0; + virtual void SetInputMute(bool muted) = 0; + + virtual void ProcessAndEncodeAudio( + std::unique_ptr<AudioFrame> audio_frame) = 0; + virtual RtpRtcpInterface* GetRtpRtcp() const = 0; + + // In RTP we currently rely on RTCP packets (`ReceivedRTCPPacket`) to inform + // about RTT. + // In media transport we rely on the TargetTransferRateObserver instead. + // In other words, if you are using RTP, you should expect + // `ReceivedRTCPPacket` to be called, if you are using media transport, + // `OnTargetTransferRate` will be called. + // + // In future, RTP media will move to the media transport implementation and + // these conditions will be removed. + // Returns the RTT in milliseconds. + virtual int64_t GetRTT() const = 0; + virtual void StartSend() = 0; + virtual void StopSend() = 0; + + // E2EE Custom Audio Frame Encryption (Optional) + virtual void SetFrameEncryptor( + rtc::scoped_refptr<FrameEncryptorInterface> frame_encryptor) = 0; + + // Sets a frame transformer between encoder and packetizer, to transform + // encoded frames before sending them out the network. + virtual void SetEncoderToPacketizerFrameTransformer( + rtc::scoped_refptr<webrtc::FrameTransformerInterface> + frame_transformer) = 0; +}; + +std::unique_ptr<ChannelSendInterface> CreateChannelSend( + Clock* clock, + TaskQueueFactory* task_queue_factory, + Transport* rtp_transport, + RtcpRttStats* rtcp_rtt_stats, + RtcEventLog* rtc_event_log, + FrameEncryptorInterface* frame_encryptor, + const webrtc::CryptoOptions& crypto_options, + bool extmap_allow_mixed, + int rtcp_report_interval_ms, + uint32_t ssrc, + rtc::scoped_refptr<FrameTransformerInterface> frame_transformer, + RtpTransportControllerSendInterface* transport_controller, + const FieldTrialsView& field_trials); + +} // namespace voe +} // namespace webrtc + +#endif // AUDIO_CHANNEL_SEND_H_ diff --git a/third_party/libwebrtc/audio/channel_send_frame_transformer_delegate.cc b/third_party/libwebrtc/audio/channel_send_frame_transformer_delegate.cc new file mode 100644 index 0000000000..0f85216e92 --- /dev/null +++ b/third_party/libwebrtc/audio/channel_send_frame_transformer_delegate.cc @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/channel_send_frame_transformer_delegate.h" + +#include <utility> + +namespace webrtc { +namespace { + +using IfaceFrameType = TransformableAudioFrameInterface::FrameType; + +IfaceFrameType InternalFrameTypeToInterfaceFrameType( + const AudioFrameType frame_type) { + switch (frame_type) { + case AudioFrameType::kEmptyFrame: + return IfaceFrameType::kEmptyFrame; + case AudioFrameType::kAudioFrameSpeech: + return IfaceFrameType::kAudioFrameSpeech; + case AudioFrameType::kAudioFrameCN: + return IfaceFrameType::kAudioFrameCN; + } + RTC_DCHECK_NOTREACHED(); + return IfaceFrameType::kEmptyFrame; +} + +AudioFrameType InterfaceFrameTypeToInternalFrameType( + const IfaceFrameType frame_type) { + switch (frame_type) { + case IfaceFrameType::kEmptyFrame: + return AudioFrameType::kEmptyFrame; + case IfaceFrameType::kAudioFrameSpeech: + return AudioFrameType::kAudioFrameSpeech; + case IfaceFrameType::kAudioFrameCN: + return AudioFrameType::kAudioFrameCN; + } + RTC_DCHECK_NOTREACHED(); + return AudioFrameType::kEmptyFrame; +} + +class TransformableOutgoingAudioFrame + : public TransformableAudioFrameInterface { + public: + TransformableOutgoingAudioFrame( + AudioFrameType frame_type, + uint8_t payload_type, + uint32_t rtp_timestamp_with_offset, + const uint8_t* payload_data, + size_t payload_size, + absl::optional<uint64_t> absolute_capture_timestamp_ms, + uint32_t ssrc) + : frame_type_(frame_type), + payload_type_(payload_type), + rtp_timestamp_with_offset_(rtp_timestamp_with_offset), + payload_(payload_data, payload_size), + absolute_capture_timestamp_ms_(absolute_capture_timestamp_ms), + ssrc_(ssrc) {} + ~TransformableOutgoingAudioFrame() override = default; + rtc::ArrayView<const uint8_t> GetData() const override { return payload_; } + void SetData(rtc::ArrayView<const uint8_t> data) override { + payload_.SetData(data.data(), data.size()); + } + uint32_t GetTimestamp() const override { return rtp_timestamp_with_offset_; } + uint32_t GetSsrc() const override { return ssrc_; } + + IfaceFrameType Type() const override { + return InternalFrameTypeToInterfaceFrameType(frame_type_); + } + + uint8_t GetPayloadType() const override { return payload_type_; } + Direction GetDirection() const override { return Direction::kSender; } + + rtc::ArrayView<const uint32_t> GetContributingSources() const override { + return {}; + } + + const absl::optional<uint16_t> SequenceNumber() const override { + return absl::nullopt; + } + + void SetRTPTimestamp(uint32_t rtp_timestamp_with_offset) override { + rtp_timestamp_with_offset_ = rtp_timestamp_with_offset; + } + + absl::optional<uint64_t> AbsoluteCaptureTimestamp() const override { + return absolute_capture_timestamp_ms_; + } + + private: + AudioFrameType frame_type_; + uint8_t payload_type_; + uint32_t rtp_timestamp_with_offset_; + rtc::Buffer payload_; + absl::optional<uint64_t> absolute_capture_timestamp_ms_; + uint32_t ssrc_; +}; +} // namespace + +ChannelSendFrameTransformerDelegate::ChannelSendFrameTransformerDelegate( + SendFrameCallback send_frame_callback, + rtc::scoped_refptr<FrameTransformerInterface> frame_transformer, + rtc::TaskQueue* encoder_queue) + : send_frame_callback_(send_frame_callback), + frame_transformer_(std::move(frame_transformer)), + encoder_queue_(encoder_queue) {} + +void ChannelSendFrameTransformerDelegate::Init() { + frame_transformer_->RegisterTransformedFrameCallback( + rtc::scoped_refptr<TransformedFrameCallback>(this)); +} + +void ChannelSendFrameTransformerDelegate::Reset() { + frame_transformer_->UnregisterTransformedFrameCallback(); + frame_transformer_ = nullptr; + + MutexLock lock(&send_lock_); + send_frame_callback_ = SendFrameCallback(); +} + +void ChannelSendFrameTransformerDelegate::Transform( + AudioFrameType frame_type, + uint8_t payload_type, + uint32_t rtp_timestamp, + const uint8_t* payload_data, + size_t payload_size, + int64_t absolute_capture_timestamp_ms, + uint32_t ssrc) { + frame_transformer_->Transform( + std::make_unique<TransformableOutgoingAudioFrame>( + frame_type, payload_type, rtp_timestamp, payload_data, payload_size, + absolute_capture_timestamp_ms, ssrc)); +} + +void ChannelSendFrameTransformerDelegate::OnTransformedFrame( + std::unique_ptr<TransformableFrameInterface> frame) { + MutexLock lock(&send_lock_); + if (!send_frame_callback_) + return; + rtc::scoped_refptr<ChannelSendFrameTransformerDelegate> delegate(this); + encoder_queue_->PostTask( + [delegate = std::move(delegate), frame = std::move(frame)]() mutable { + delegate->SendFrame(std::move(frame)); + }); +} + +void ChannelSendFrameTransformerDelegate::SendFrame( + std::unique_ptr<TransformableFrameInterface> frame) const { + MutexLock lock(&send_lock_); + RTC_DCHECK_RUN_ON(encoder_queue_); + if (!send_frame_callback_) + return; + auto* transformed_frame = + static_cast<TransformableAudioFrameInterface*>(frame.get()); + send_frame_callback_( + InterfaceFrameTypeToInternalFrameType(transformed_frame->Type()), + transformed_frame->GetPayloadType(), transformed_frame->GetTimestamp(), + transformed_frame->GetData(), + transformed_frame->AbsoluteCaptureTimestamp() + ? *transformed_frame->AbsoluteCaptureTimestamp() + : 0); +} + +std::unique_ptr<TransformableAudioFrameInterface> CloneSenderAudioFrame( + TransformableAudioFrameInterface* original) { + return std::make_unique<TransformableOutgoingAudioFrame>( + InterfaceFrameTypeToInternalFrameType(original->Type()), + original->GetPayloadType(), original->GetTimestamp(), + original->GetData().data(), original->GetData().size(), + original->AbsoluteCaptureTimestamp(), original->GetSsrc()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/channel_send_frame_transformer_delegate.h b/third_party/libwebrtc/audio/channel_send_frame_transformer_delegate.h new file mode 100644 index 0000000000..eb0027e4c8 --- /dev/null +++ b/third_party/libwebrtc/audio/channel_send_frame_transformer_delegate.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_CHANNEL_SEND_FRAME_TRANSFORMER_DELEGATE_H_ +#define AUDIO_CHANNEL_SEND_FRAME_TRANSFORMER_DELEGATE_H_ + +#include <memory> + +#include "api/frame_transformer_interface.h" +#include "api/sequence_checker.h" +#include "modules/audio_coding/include/audio_coding_module_typedefs.h" +#include "rtc_base/buffer.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/task_queue.h" + +namespace webrtc { + +// Delegates calls to FrameTransformerInterface to transform frames, and to +// ChannelSend to send the transformed frames using `send_frame_callback_` on +// the `encoder_queue_`. +// OnTransformedFrame() can be called from any thread, the delegate ensures +// thread-safe access to the ChannelSend callback. +class ChannelSendFrameTransformerDelegate : public TransformedFrameCallback { + public: + using SendFrameCallback = + std::function<int32_t(AudioFrameType frameType, + uint8_t payloadType, + uint32_t rtp_timestamp_with_offset, + rtc::ArrayView<const uint8_t> payload, + int64_t absolute_capture_timestamp_ms)>; + ChannelSendFrameTransformerDelegate( + SendFrameCallback send_frame_callback, + rtc::scoped_refptr<FrameTransformerInterface> frame_transformer, + rtc::TaskQueue* encoder_queue); + + // Registers `this` as callback for `frame_transformer_`, to get the + // transformed frames. + void Init(); + + // Unregisters and releases the `frame_transformer_` reference, and resets + // `send_frame_callback_` under lock. Called from ChannelSend destructor to + // prevent running the callback on a dangling channel. + void Reset(); + + // Delegates the call to FrameTransformerInterface::TransformFrame, to + // transform the frame asynchronously. + void Transform(AudioFrameType frame_type, + uint8_t payload_type, + uint32_t rtp_timestamp, + const uint8_t* payload_data, + size_t payload_size, + int64_t absolute_capture_timestamp_ms, + uint32_t ssrc); + + // Implements TransformedFrameCallback. Can be called on any thread. + void OnTransformedFrame( + std::unique_ptr<TransformableFrameInterface> frame) override; + + // Delegates the call to ChannelSend::SendRtpAudio on the `encoder_queue_`, + // by calling `send_audio_callback_`. + void SendFrame(std::unique_ptr<TransformableFrameInterface> frame) const; + + protected: + ~ChannelSendFrameTransformerDelegate() override = default; + + private: + mutable Mutex send_lock_; + SendFrameCallback send_frame_callback_ RTC_GUARDED_BY(send_lock_); + rtc::scoped_refptr<FrameTransformerInterface> frame_transformer_; + rtc::TaskQueue* encoder_queue_ RTC_GUARDED_BY(send_lock_); +}; + +std::unique_ptr<TransformableAudioFrameInterface> CloneSenderAudioFrame( + TransformableAudioFrameInterface* original); + +} // namespace webrtc +#endif // AUDIO_CHANNEL_SEND_FRAME_TRANSFORMER_DELEGATE_H_ diff --git a/third_party/libwebrtc/audio/channel_send_frame_transformer_delegate_unittest.cc b/third_party/libwebrtc/audio/channel_send_frame_transformer_delegate_unittest.cc new file mode 100644 index 0000000000..f75d4a8ab7 --- /dev/null +++ b/third_party/libwebrtc/audio/channel_send_frame_transformer_delegate_unittest.cc @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/channel_send_frame_transformer_delegate.h" + +#include <memory> +#include <utility> + +#include "rtc_base/task_queue_for_test.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/mock_frame_transformer.h" +#include "test/mock_transformable_frame.h" + +namespace webrtc { +namespace { + +using ::testing::_; +using ::testing::ElementsAre; +using ::testing::NiceMock; +using ::testing::Return; +using ::testing::SaveArg; + +class MockChannelSend { + public: + MockChannelSend() = default; + ~MockChannelSend() = default; + + MOCK_METHOD(int32_t, + SendFrame, + (AudioFrameType frameType, + uint8_t payloadType, + uint32_t rtp_timestamp, + rtc::ArrayView<const uint8_t> payload, + int64_t absolute_capture_timestamp_ms)); + + ChannelSendFrameTransformerDelegate::SendFrameCallback callback() { + return [this](AudioFrameType frameType, uint8_t payloadType, + uint32_t rtp_timestamp, rtc::ArrayView<const uint8_t> payload, + int64_t absolute_capture_timestamp_ms) { + return SendFrame(frameType, payloadType, rtp_timestamp, payload, + absolute_capture_timestamp_ms); + }; + } +}; + +std::unique_ptr<MockTransformableAudioFrame> CreateMockReceiverFrame() { + const uint8_t mock_data[] = {1, 2, 3, 4}; + std::unique_ptr<MockTransformableAudioFrame> mock_frame = + std::make_unique<MockTransformableAudioFrame>(); + rtc::ArrayView<const uint8_t> payload(mock_data); + ON_CALL(*mock_frame, GetData).WillByDefault(Return(payload)); + ON_CALL(*mock_frame, GetPayloadType).WillByDefault(Return(0)); + ON_CALL(*mock_frame, GetDirection) + .WillByDefault(Return(TransformableFrameInterface::Direction::kReceiver)); + return mock_frame; +} + +// Test that the delegate registers itself with the frame transformer on Init(). +TEST(ChannelSendFrameTransformerDelegateTest, + RegisterTransformedFrameCallbackOnInit) { + rtc::scoped_refptr<MockFrameTransformer> mock_frame_transformer = + rtc::make_ref_counted<MockFrameTransformer>(); + rtc::scoped_refptr<ChannelSendFrameTransformerDelegate> delegate = + rtc::make_ref_counted<ChannelSendFrameTransformerDelegate>( + ChannelSendFrameTransformerDelegate::SendFrameCallback(), + mock_frame_transformer, nullptr); + EXPECT_CALL(*mock_frame_transformer, RegisterTransformedFrameCallback); + delegate->Init(); +} + +// Test that the delegate unregisters itself from the frame transformer on +// Reset(). +TEST(ChannelSendFrameTransformerDelegateTest, + UnregisterTransformedFrameCallbackOnReset) { + rtc::scoped_refptr<MockFrameTransformer> mock_frame_transformer = + rtc::make_ref_counted<MockFrameTransformer>(); + rtc::scoped_refptr<ChannelSendFrameTransformerDelegate> delegate = + rtc::make_ref_counted<ChannelSendFrameTransformerDelegate>( + ChannelSendFrameTransformerDelegate::SendFrameCallback(), + mock_frame_transformer, nullptr); + EXPECT_CALL(*mock_frame_transformer, UnregisterTransformedFrameCallback); + delegate->Reset(); +} + +// Test that when the delegate receives a transformed frame from the frame +// transformer, it passes it to the channel using the SendFrameCallback. +TEST(ChannelSendFrameTransformerDelegateTest, + TransformRunsChannelSendCallback) { + TaskQueueForTest channel_queue("channel_queue"); + rtc::scoped_refptr<MockFrameTransformer> mock_frame_transformer = + rtc::make_ref_counted<NiceMock<MockFrameTransformer>>(); + MockChannelSend mock_channel; + rtc::scoped_refptr<ChannelSendFrameTransformerDelegate> delegate = + rtc::make_ref_counted<ChannelSendFrameTransformerDelegate>( + mock_channel.callback(), mock_frame_transformer, &channel_queue); + rtc::scoped_refptr<TransformedFrameCallback> callback; + EXPECT_CALL(*mock_frame_transformer, RegisterTransformedFrameCallback) + .WillOnce(SaveArg<0>(&callback)); + delegate->Init(); + ASSERT_TRUE(callback); + + const uint8_t data[] = {1, 2, 3, 4}; + EXPECT_CALL(mock_channel, SendFrame); + ON_CALL(*mock_frame_transformer, Transform) + .WillByDefault( + [&callback](std::unique_ptr<TransformableFrameInterface> frame) { + callback->OnTransformedFrame(std::move(frame)); + }); + delegate->Transform(AudioFrameType::kEmptyFrame, 0, 0, data, sizeof(data), 0, + 0); + channel_queue.WaitForPreviouslyPostedTasks(); +} + +// Test that when the delegate receives a Incoming frame from the frame +// transformer, it passes it to the channel using the SendFrameCallback. +TEST(ChannelSendFrameTransformerDelegateTest, + TransformRunsChannelSendCallbackForIncomingFrame) { + TaskQueueForTest channel_queue("channel_queue"); + rtc::scoped_refptr<MockFrameTransformer> mock_frame_transformer = + rtc::make_ref_counted<NiceMock<MockFrameTransformer>>(); + MockChannelSend mock_channel; + rtc::scoped_refptr<ChannelSendFrameTransformerDelegate> delegate = + rtc::make_ref_counted<ChannelSendFrameTransformerDelegate>( + mock_channel.callback(), mock_frame_transformer, &channel_queue); + rtc::scoped_refptr<TransformedFrameCallback> callback; + EXPECT_CALL(*mock_frame_transformer, RegisterTransformedFrameCallback) + .WillOnce(SaveArg<0>(&callback)); + delegate->Init(); + ASSERT_TRUE(callback); + + const uint8_t data[] = {1, 2, 3, 4}; + EXPECT_CALL(mock_channel, SendFrame).Times(0); + EXPECT_CALL(mock_channel, SendFrame(_, 0, 0, ElementsAre(1, 2, 3, 4), _)); + ON_CALL(*mock_frame_transformer, Transform) + .WillByDefault( + [&callback](std::unique_ptr<TransformableFrameInterface> frame) { + callback->OnTransformedFrame(CreateMockReceiverFrame()); + }); + delegate->Transform(AudioFrameType::kEmptyFrame, 0, 0, data, sizeof(data), 0, + 0); + channel_queue.WaitForPreviouslyPostedTasks(); +} + +// Test that if the delegate receives a transformed frame after it has been +// reset, it does not run the SendFrameCallback, as the channel is destroyed +// after resetting the delegate. +TEST(ChannelSendFrameTransformerDelegateTest, + OnTransformedDoesNotRunChannelSendCallbackAfterReset) { + TaskQueueForTest channel_queue("channel_queue"); + rtc::scoped_refptr<MockFrameTransformer> mock_frame_transformer = + rtc::make_ref_counted<testing::NiceMock<MockFrameTransformer>>(); + MockChannelSend mock_channel; + rtc::scoped_refptr<ChannelSendFrameTransformerDelegate> delegate = + rtc::make_ref_counted<ChannelSendFrameTransformerDelegate>( + mock_channel.callback(), mock_frame_transformer, &channel_queue); + + delegate->Reset(); + EXPECT_CALL(mock_channel, SendFrame).Times(0); + delegate->OnTransformedFrame(std::make_unique<MockTransformableAudioFrame>()); + channel_queue.WaitForPreviouslyPostedTasks(); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/channel_send_unittest.cc b/third_party/libwebrtc/audio/channel_send_unittest.cc new file mode 100644 index 0000000000..b9406e1523 --- /dev/null +++ b/third_party/libwebrtc/audio/channel_send_unittest.cc @@ -0,0 +1,193 @@ +/* + * Copyright 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/channel_send.h" + +#include <utility> + +#include "api/audio/audio_frame.h" +#include "api/audio_codecs/builtin_audio_encoder_factory.h" +#include "api/rtc_event_log/rtc_event_log.h" +#include "api/scoped_refptr.h" +#include "api/units/time_delta.h" +#include "api/units/timestamp.h" +#include "call/rtp_transport_controller_send.h" +#include "rtc_base/gunit.h" +#include "test/gtest.h" +#include "test/mock_frame_transformer.h" +#include "test/mock_transport.h" +#include "test/scoped_key_value_config.h" +#include "test/time_controller/simulated_time_controller.h" + +namespace webrtc { +namespace voe { +namespace { + +using ::testing::Invoke; +using ::testing::NiceMock; +using ::testing::Return; +using ::testing::SaveArg; + +constexpr int kRtcpIntervalMs = 1000; +constexpr int kSsrc = 333; +constexpr int kPayloadType = 1; +constexpr int kSampleRateHz = 48000; +constexpr int kRtpRateHz = 48000; + +BitrateConstraints GetBitrateConfig() { + BitrateConstraints bitrate_config; + bitrate_config.min_bitrate_bps = 10000; + bitrate_config.start_bitrate_bps = 100000; + bitrate_config.max_bitrate_bps = 1000000; + return bitrate_config; +} + +class ChannelSendTest : public ::testing::Test { + protected: + ChannelSendTest() + : time_controller_(Timestamp::Seconds(1)), + transport_controller_( + time_controller_.GetClock(), + RtpTransportConfig{ + .bitrate_config = GetBitrateConfig(), + .event_log = &event_log_, + .task_queue_factory = time_controller_.GetTaskQueueFactory(), + .trials = &field_trials_, + }) { + channel_ = voe::CreateChannelSend( + time_controller_.GetClock(), time_controller_.GetTaskQueueFactory(), + &transport_, nullptr, &event_log_, nullptr, crypto_options_, false, + kRtcpIntervalMs, kSsrc, nullptr, &transport_controller_, field_trials_); + encoder_factory_ = CreateBuiltinAudioEncoderFactory(); + std::unique_ptr<AudioEncoder> encoder = encoder_factory_->MakeAudioEncoder( + kPayloadType, SdpAudioFormat("opus", kRtpRateHz, 2), {}); + channel_->SetEncoder(kPayloadType, std::move(encoder)); + transport_controller_.EnsureStarted(); + channel_->RegisterSenderCongestionControlObjects(&transport_controller_); + ON_CALL(transport_, SendRtcp).WillByDefault(Return(true)); + ON_CALL(transport_, SendRtp).WillByDefault(Return(true)); + } + + std::unique_ptr<AudioFrame> CreateAudioFrame() { + auto frame = std::make_unique<AudioFrame>(); + frame->sample_rate_hz_ = kSampleRateHz; + frame->samples_per_channel_ = kSampleRateHz / 100; + frame->num_channels_ = 1; + frame->set_absolute_capture_timestamp_ms( + time_controller_.GetClock()->TimeInMilliseconds()); + return frame; + } + + void ProcessNextFrame() { + channel_->ProcessAndEncodeAudio(CreateAudioFrame()); + // Advance time to process the task queue. + time_controller_.AdvanceTime(TimeDelta::Millis(10)); + } + + GlobalSimulatedTimeController time_controller_; + webrtc::test::ScopedKeyValueConfig field_trials_; + RtcEventLogNull event_log_; + NiceMock<MockTransport> transport_; + CryptoOptions crypto_options_; + RtpTransportControllerSend transport_controller_; + std::unique_ptr<ChannelSendInterface> channel_; + rtc::scoped_refptr<AudioEncoderFactory> encoder_factory_; +}; + +TEST_F(ChannelSendTest, StopSendShouldResetEncoder) { + channel_->StartSend(); + // Insert two frames which should trigger a new packet. + EXPECT_CALL(transport_, SendRtp).Times(1); + ProcessNextFrame(); + ProcessNextFrame(); + + EXPECT_CALL(transport_, SendRtp).Times(0); + ProcessNextFrame(); + // StopSend should clear the previous audio frame stored in the encoder. + channel_->StopSend(); + channel_->StartSend(); + // The following frame should not trigger a new packet since the encoder + // needs 20 ms audio. + EXPECT_CALL(transport_, SendRtp).Times(0); + ProcessNextFrame(); +} + +TEST_F(ChannelSendTest, IncreaseRtpTimestampByPauseDuration) { + channel_->StartSend(); + uint32_t timestamp; + int sent_packets = 0; + auto send_rtp = [&](rtc::ArrayView<const uint8_t> data, + const PacketOptions& options) { + ++sent_packets; + RtpPacketReceived packet; + packet.Parse(data); + timestamp = packet.Timestamp(); + return true; + }; + EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(send_rtp)); + ProcessNextFrame(); + ProcessNextFrame(); + EXPECT_EQ(sent_packets, 1); + uint32_t first_timestamp = timestamp; + channel_->StopSend(); + time_controller_.AdvanceTime(TimeDelta::Seconds(10)); + channel_->StartSend(); + + ProcessNextFrame(); + ProcessNextFrame(); + EXPECT_EQ(sent_packets, 2); + int64_t timestamp_gap_ms = + static_cast<int64_t>(timestamp - first_timestamp) * 1000 / kRtpRateHz; + EXPECT_EQ(timestamp_gap_ms, 10020); +} + +TEST_F(ChannelSendTest, FrameTransformerGetsCorrectTimestamp) { + rtc::scoped_refptr<MockFrameTransformer> mock_frame_transformer = + rtc::make_ref_counted<MockFrameTransformer>(); + channel_->SetEncoderToPacketizerFrameTransformer(mock_frame_transformer); + rtc::scoped_refptr<TransformedFrameCallback> callback; + EXPECT_CALL(*mock_frame_transformer, RegisterTransformedFrameCallback) + .WillOnce(SaveArg<0>(&callback)); + EXPECT_CALL(*mock_frame_transformer, UnregisterTransformedFrameCallback); + + absl::optional<uint32_t> sent_timestamp; + auto send_rtp = [&](rtc::ArrayView<const uint8_t> data, + const PacketOptions& options) { + RtpPacketReceived packet; + packet.Parse(data); + if (!sent_timestamp) { + sent_timestamp = packet.Timestamp(); + } + return true; + }; + EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(send_rtp)); + + channel_->StartSend(); + int64_t transformable_frame_timestamp = -1; + EXPECT_CALL(*mock_frame_transformer, Transform) + .WillOnce([&](std::unique_ptr<TransformableFrameInterface> frame) { + transformable_frame_timestamp = frame->GetTimestamp(); + callback->OnTransformedFrame(std::move(frame)); + }); + // Insert two frames which should trigger a new packet. + ProcessNextFrame(); + ProcessNextFrame(); + + // Ensure the RTP timestamp on the frame passed to the transformer + // includes the RTP offset and matches the actual RTP timestamp on the sent + // packet. + EXPECT_EQ_WAIT(transformable_frame_timestamp, + 0 + channel_->GetRtpRtcp()->StartTimestamp(), 1000); + EXPECT_TRUE_WAIT(sent_timestamp, 1000); + EXPECT_EQ(*sent_timestamp, transformable_frame_timestamp); +} +} // namespace +} // namespace voe +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/conversion.h b/third_party/libwebrtc/audio/conversion.h new file mode 100644 index 0000000000..dd71942f6a --- /dev/null +++ b/third_party/libwebrtc/audio/conversion.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_CONVERSION_H_ +#define AUDIO_CONVERSION_H_ + +#include <stddef.h> +#include <stdint.h> + +namespace webrtc { + +// Convert fixed point number with 8 bit fractional part, to floating point. +inline float Q8ToFloat(uint32_t v) { + return static_cast<float>(v) / (1 << 8); +} + +// Convert fixed point number with 14 bit fractional part, to floating point. +inline float Q14ToFloat(uint32_t v) { + return static_cast<float>(v) / (1 << 14); +} +} // namespace webrtc + +#endif // AUDIO_CONVERSION_H_ diff --git a/third_party/libwebrtc/audio/mock_voe_channel_proxy.h b/third_party/libwebrtc/audio/mock_voe_channel_proxy.h new file mode 100644 index 0000000000..29005173df --- /dev/null +++ b/third_party/libwebrtc/audio/mock_voe_channel_proxy.h @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_MOCK_VOE_CHANNEL_PROXY_H_ +#define AUDIO_MOCK_VOE_CHANNEL_PROXY_H_ + +#include <map> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +#include "api/crypto/frame_decryptor_interface.h" +#include "api/test/mock_frame_encryptor.h" +#include "audio/channel_receive.h" +#include "audio/channel_send.h" +#include "modules/rtp_rtcp/source/rtp_packet_received.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { + +class MockChannelReceive : public voe::ChannelReceiveInterface { + public: + MOCK_METHOD(void, SetNACKStatus, (bool enable, int max_packets), (override)); + MOCK_METHOD(void, SetNonSenderRttMeasurement, (bool enabled), (override)); + MOCK_METHOD(void, + RegisterReceiverCongestionControlObjects, + (PacketRouter*), + (override)); + MOCK_METHOD(void, ResetReceiverCongestionControlObjects, (), (override)); + MOCK_METHOD(CallReceiveStatistics, GetRTCPStatistics, (), (const, override)); + MOCK_METHOD(NetworkStatistics, + GetNetworkStatistics, + (bool), + (const, override)); + MOCK_METHOD(AudioDecodingCallStats, + GetDecodingCallStatistics, + (), + (const, override)); + MOCK_METHOD(int, GetSpeechOutputLevelFullRange, (), (const, override)); + MOCK_METHOD(double, GetTotalOutputEnergy, (), (const, override)); + MOCK_METHOD(double, GetTotalOutputDuration, (), (const, override)); + MOCK_METHOD(uint32_t, GetDelayEstimate, (), (const, override)); + MOCK_METHOD(void, SetSink, (AudioSinkInterface*), (override)); + MOCK_METHOD(void, OnRtpPacket, (const RtpPacketReceived& packet), (override)); + MOCK_METHOD(void, + ReceivedRTCPPacket, + (const uint8_t*, size_t length), + (override)); + MOCK_METHOD(void, SetChannelOutputVolumeScaling, (float scaling), (override)); + MOCK_METHOD(AudioMixer::Source::AudioFrameInfo, + GetAudioFrameWithInfo, + (int sample_rate_hz, AudioFrame*), + (override)); + MOCK_METHOD(int, PreferredSampleRate, (), (const, override)); + MOCK_METHOD(void, SetSourceTracker, (SourceTracker*), (override)); + MOCK_METHOD(void, + SetAssociatedSendChannel, + (const voe::ChannelSendInterface*), + (override)); + MOCK_METHOD(bool, + GetPlayoutRtpTimestamp, + (uint32_t*, int64_t*), + (const, override)); + MOCK_METHOD(void, + SetEstimatedPlayoutNtpTimestampMs, + (int64_t ntp_timestamp_ms, int64_t time_ms), + (override)); + MOCK_METHOD(absl::optional<int64_t>, + GetCurrentEstimatedPlayoutNtpTimestampMs, + (int64_t now_ms), + (const, override)); + MOCK_METHOD(absl::optional<Syncable::Info>, + GetSyncInfo, + (), + (const, override)); + MOCK_METHOD(bool, SetMinimumPlayoutDelay, (int delay_ms), (override)); + MOCK_METHOD(bool, SetBaseMinimumPlayoutDelayMs, (int delay_ms), (override)); + MOCK_METHOD(int, GetBaseMinimumPlayoutDelayMs, (), (const, override)); + MOCK_METHOD((absl::optional<std::pair<int, SdpAudioFormat>>), + GetReceiveCodec, + (), + (const, override)); + MOCK_METHOD(void, + SetReceiveCodecs, + ((const std::map<int, SdpAudioFormat>& codecs)), + (override)); + MOCK_METHOD(void, StartPlayout, (), (override)); + MOCK_METHOD(void, StopPlayout, (), (override)); + MOCK_METHOD( + void, + SetDepacketizerToDecoderFrameTransformer, + (rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer), + (override)); + MOCK_METHOD( + void, + SetFrameDecryptor, + (rtc::scoped_refptr<webrtc::FrameDecryptorInterface> frame_decryptor), + (override)); + MOCK_METHOD(void, OnLocalSsrcChange, (uint32_t local_ssrc), (override)); + MOCK_METHOD(uint32_t, GetLocalSsrc, (), (const, override)); +}; + +class MockChannelSend : public voe::ChannelSendInterface { + public: + MOCK_METHOD(void, + SetEncoder, + (int payload_type, std::unique_ptr<AudioEncoder> encoder), + (override)); + MOCK_METHOD( + void, + ModifyEncoder, + (rtc::FunctionView<void(std::unique_ptr<AudioEncoder>*)> modifier), + (override)); + MOCK_METHOD(void, + CallEncoder, + (rtc::FunctionView<void(AudioEncoder*)> modifier), + (override)); + MOCK_METHOD(void, SetRTCP_CNAME, (absl::string_view c_name), (override)); + MOCK_METHOD(void, + SetSendAudioLevelIndicationStatus, + (bool enable, int id), + (override)); + MOCK_METHOD(void, + RegisterSenderCongestionControlObjects, + (RtpTransportControllerSendInterface*), + (override)); + MOCK_METHOD(void, ResetSenderCongestionControlObjects, (), (override)); + MOCK_METHOD(CallSendStatistics, GetRTCPStatistics, (), (const, override)); + MOCK_METHOD(std::vector<ReportBlockData>, + GetRemoteRTCPReportBlocks, + (), + (const, override)); + MOCK_METHOD(ANAStats, GetANAStatistics, (), (const, override)); + MOCK_METHOD(void, + RegisterCngPayloadType, + (int payload_type, int payload_frequency), + (override)); + MOCK_METHOD(void, + SetSendTelephoneEventPayloadType, + (int payload_type, int payload_frequency), + (override)); + MOCK_METHOD(bool, + SendTelephoneEventOutband, + (int event, int duration_ms), + (override)); + MOCK_METHOD(void, + OnBitrateAllocation, + (BitrateAllocationUpdate update), + (override)); + MOCK_METHOD(void, SetInputMute, (bool muted), (override)); + MOCK_METHOD(void, + ReceivedRTCPPacket, + (const uint8_t*, size_t length), + (override)); + MOCK_METHOD(void, + ProcessAndEncodeAudio, + (std::unique_ptr<AudioFrame>), + (override)); + MOCK_METHOD(RtpRtcpInterface*, GetRtpRtcp, (), (const, override)); + MOCK_METHOD(int, GetTargetBitrate, (), (const, override)); + MOCK_METHOD(int64_t, GetRTT, (), (const, override)); + MOCK_METHOD(void, StartSend, (), (override)); + MOCK_METHOD(void, StopSend, (), (override)); + MOCK_METHOD(void, + SetFrameEncryptor, + (rtc::scoped_refptr<FrameEncryptorInterface> frame_encryptor), + (override)); + MOCK_METHOD( + void, + SetEncoderToPacketizerFrameTransformer, + (rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer), + (override)); +}; +} // namespace test +} // namespace webrtc + +#endif // AUDIO_MOCK_VOE_CHANNEL_PROXY_H_ diff --git a/third_party/libwebrtc/audio/remix_resample.cc b/third_party/libwebrtc/audio/remix_resample.cc new file mode 100644 index 0000000000..178af622a1 --- /dev/null +++ b/third_party/libwebrtc/audio/remix_resample.cc @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/remix_resample.h" + +#include "api/audio/audio_frame.h" +#include "audio/utility/audio_frame_operations.h" +#include "common_audio/resampler/include/push_resampler.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace voe { + +void RemixAndResample(const AudioFrame& src_frame, + PushResampler<int16_t>* resampler, + AudioFrame* dst_frame) { + RemixAndResample(src_frame.data(), src_frame.samples_per_channel_, + src_frame.num_channels_, src_frame.sample_rate_hz_, + resampler, dst_frame); + dst_frame->timestamp_ = src_frame.timestamp_; + dst_frame->elapsed_time_ms_ = src_frame.elapsed_time_ms_; + dst_frame->ntp_time_ms_ = src_frame.ntp_time_ms_; + dst_frame->packet_infos_ = src_frame.packet_infos_; +} + +void RemixAndResample(const int16_t* src_data, + size_t samples_per_channel, + size_t num_channels, + int sample_rate_hz, + PushResampler<int16_t>* resampler, + AudioFrame* dst_frame) { + const int16_t* audio_ptr = src_data; + size_t audio_ptr_num_channels = num_channels; + int16_t downmixed_audio[AudioFrame::kMaxDataSizeSamples]; + + // Downmix before resampling. + if (num_channels > dst_frame->num_channels_) { + RTC_DCHECK(num_channels == 2 || num_channels == 4) + << "num_channels: " << num_channels; + RTC_DCHECK(dst_frame->num_channels_ == 1 || dst_frame->num_channels_ == 2) + << "dst_frame->num_channels_: " << dst_frame->num_channels_; + + AudioFrameOperations::DownmixChannels( + src_data, num_channels, samples_per_channel, dst_frame->num_channels_, + downmixed_audio); + audio_ptr = downmixed_audio; + audio_ptr_num_channels = dst_frame->num_channels_; + } + + if (resampler->InitializeIfNeeded(sample_rate_hz, dst_frame->sample_rate_hz_, + audio_ptr_num_channels) == -1) { + RTC_FATAL() << "InitializeIfNeeded failed: sample_rate_hz = " + << sample_rate_hz << ", dst_frame->sample_rate_hz_ = " + << dst_frame->sample_rate_hz_ + << ", audio_ptr_num_channels = " << audio_ptr_num_channels; + } + + // TODO(yujo): for muted input frames, don't resample. Either 1) allow + // resampler to return output length without doing the resample, so we know + // how much to zero here; or 2) make resampler accept a hint that the input is + // zeroed. + const size_t src_length = samples_per_channel * audio_ptr_num_channels; + int out_length = + resampler->Resample(audio_ptr, src_length, dst_frame->mutable_data(), + AudioFrame::kMaxDataSizeSamples); + if (out_length == -1) { + RTC_FATAL() << "Resample failed: audio_ptr = " << audio_ptr + << ", src_length = " << src_length + << ", dst_frame->mutable_data() = " + << dst_frame->mutable_data(); + } + dst_frame->samples_per_channel_ = out_length / audio_ptr_num_channels; + + // Upmix after resampling. + if (num_channels == 1 && dst_frame->num_channels_ == 2) { + // The audio in dst_frame really is mono at this point; MonoToStereo will + // set this back to stereo. + dst_frame->num_channels_ = 1; + AudioFrameOperations::UpmixChannels(2, dst_frame); + } +} + +} // namespace voe +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/remix_resample.h b/third_party/libwebrtc/audio/remix_resample.h new file mode 100644 index 0000000000..bd8da76c6a --- /dev/null +++ b/third_party/libwebrtc/audio/remix_resample.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_REMIX_RESAMPLE_H_ +#define AUDIO_REMIX_RESAMPLE_H_ + +#include "api/audio/audio_frame.h" +#include "common_audio/resampler/include/push_resampler.h" + +namespace webrtc { +namespace voe { + +// Upmix or downmix and resample the audio to `dst_frame`. Expects `dst_frame` +// to have its sample rate and channels members set to the desired values. +// Updates the `samples_per_channel_` member accordingly. +// +// This version has an AudioFrame `src_frame` as input and sets the output +// `timestamp_`, `elapsed_time_ms_` and `ntp_time_ms_` members equals to the +// input ones. +void RemixAndResample(const AudioFrame& src_frame, + PushResampler<int16_t>* resampler, + AudioFrame* dst_frame); + +// This version has a pointer to the samples `src_data` as input and receives +// `samples_per_channel`, `num_channels` and `sample_rate_hz` of the data as +// parameters. +void RemixAndResample(const int16_t* src_data, + size_t samples_per_channel, + size_t num_channels, + int sample_rate_hz, + PushResampler<int16_t>* resampler, + AudioFrame* dst_frame); + +} // namespace voe +} // namespace webrtc + +#endif // AUDIO_REMIX_RESAMPLE_H_ diff --git a/third_party/libwebrtc/audio/remix_resample_unittest.cc b/third_party/libwebrtc/audio/remix_resample_unittest.cc new file mode 100644 index 0000000000..31dcfac1fe --- /dev/null +++ b/third_party/libwebrtc/audio/remix_resample_unittest.cc @@ -0,0 +1,276 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/remix_resample.h" + +#include <cmath> + +#include "common_audio/resampler/include/push_resampler.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" +#include "test/gtest.h" + +namespace webrtc { +namespace voe { +namespace { + +int GetFrameSize(int sample_rate_hz) { + return sample_rate_hz / 100; +} + +class UtilityTest : public ::testing::Test { + protected: + UtilityTest() { + src_frame_.sample_rate_hz_ = 16000; + src_frame_.samples_per_channel_ = src_frame_.sample_rate_hz_ / 100; + src_frame_.num_channels_ = 1; + dst_frame_.CopyFrom(src_frame_); + golden_frame_.CopyFrom(src_frame_); + } + + void RunResampleTest(int src_channels, + int src_sample_rate_hz, + int dst_channels, + int dst_sample_rate_hz); + + PushResampler<int16_t> resampler_; + AudioFrame src_frame_; + AudioFrame dst_frame_; + AudioFrame golden_frame_; +}; + +// Sets the signal value to increase by `data` with every sample. Floats are +// used so non-integer values result in rounding error, but not an accumulating +// error. +void SetMonoFrame(float data, int sample_rate_hz, AudioFrame* frame) { + frame->Mute(); + frame->num_channels_ = 1; + frame->sample_rate_hz_ = sample_rate_hz; + frame->samples_per_channel_ = GetFrameSize(sample_rate_hz); + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel_; i++) { + frame_data[i] = static_cast<int16_t>(data * i); + } +} + +// Keep the existing sample rate. +void SetMonoFrame(float data, AudioFrame* frame) { + SetMonoFrame(data, frame->sample_rate_hz_, frame); +} + +// Sets the signal value to increase by `left` and `right` with every sample in +// each channel respectively. +void SetStereoFrame(float left, + float right, + int sample_rate_hz, + AudioFrame* frame) { + frame->Mute(); + frame->num_channels_ = 2; + frame->sample_rate_hz_ = sample_rate_hz; + frame->samples_per_channel_ = GetFrameSize(sample_rate_hz); + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel_; i++) { + frame_data[i * 2] = static_cast<int16_t>(left * i); + frame_data[i * 2 + 1] = static_cast<int16_t>(right * i); + } +} + +// Keep the existing sample rate. +void SetStereoFrame(float left, float right, AudioFrame* frame) { + SetStereoFrame(left, right, frame->sample_rate_hz_, frame); +} + +// Sets the signal value to increase by `ch1`, `ch2`, `ch3`, `ch4` with every +// sample in each channel respectively. +void SetQuadFrame(float ch1, + float ch2, + float ch3, + float ch4, + int sample_rate_hz, + AudioFrame* frame) { + frame->Mute(); + frame->num_channels_ = 4; + frame->sample_rate_hz_ = sample_rate_hz; + frame->samples_per_channel_ = GetFrameSize(sample_rate_hz); + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel_; i++) { + frame_data[i * 4] = static_cast<int16_t>(ch1 * i); + frame_data[i * 4 + 1] = static_cast<int16_t>(ch2 * i); + frame_data[i * 4 + 2] = static_cast<int16_t>(ch3 * i); + frame_data[i * 4 + 3] = static_cast<int16_t>(ch4 * i); + } +} + +void VerifyParams(const AudioFrame& ref_frame, const AudioFrame& test_frame) { + EXPECT_EQ(ref_frame.num_channels_, test_frame.num_channels_); + EXPECT_EQ(ref_frame.samples_per_channel_, test_frame.samples_per_channel_); + EXPECT_EQ(ref_frame.sample_rate_hz_, test_frame.sample_rate_hz_); +} + +// Computes the best SNR based on the error between `ref_frame` and +// `test_frame`. It allows for up to a `max_delay` in samples between the +// signals to compensate for the resampling delay. +float ComputeSNR(const AudioFrame& ref_frame, + const AudioFrame& test_frame, + size_t max_delay) { + VerifyParams(ref_frame, test_frame); + float best_snr = 0; + size_t best_delay = 0; + for (size_t delay = 0; delay <= max_delay; delay++) { + float mse = 0; + float variance = 0; + const int16_t* ref_frame_data = ref_frame.data(); + const int16_t* test_frame_data = test_frame.data(); + for (size_t i = 0; + i < ref_frame.samples_per_channel_ * ref_frame.num_channels_ - delay; + i++) { + int error = ref_frame_data[i] - test_frame_data[i + delay]; + mse += error * error; + variance += ref_frame_data[i] * ref_frame_data[i]; + } + float snr = 100; // We assign 100 dB to the zero-error case. + if (mse > 0) + snr = 10 * std::log10(variance / mse); + if (snr > best_snr) { + best_snr = snr; + best_delay = delay; + } + } + printf("SNR=%.1f dB at delay=%zu\n", best_snr, best_delay); + return best_snr; +} + +void VerifyFramesAreEqual(const AudioFrame& ref_frame, + const AudioFrame& test_frame) { + VerifyParams(ref_frame, test_frame); + const int16_t* ref_frame_data = ref_frame.data(); + const int16_t* test_frame_data = test_frame.data(); + for (size_t i = 0; + i < ref_frame.samples_per_channel_ * ref_frame.num_channels_; i++) { + EXPECT_EQ(ref_frame_data[i], test_frame_data[i]); + } +} + +void UtilityTest::RunResampleTest(int src_channels, + int src_sample_rate_hz, + int dst_channels, + int dst_sample_rate_hz) { + PushResampler<int16_t> resampler; // Create a new one with every test. + const int16_t kSrcCh1 = 30; // Shouldn't overflow for any used sample rate. + const int16_t kSrcCh2 = 15; + const int16_t kSrcCh3 = 22; + const int16_t kSrcCh4 = 8; + const float resampling_factor = + (1.0 * src_sample_rate_hz) / dst_sample_rate_hz; + const float dst_ch1 = resampling_factor * kSrcCh1; + const float dst_ch2 = resampling_factor * kSrcCh2; + const float dst_ch3 = resampling_factor * kSrcCh3; + const float dst_ch4 = resampling_factor * kSrcCh4; + const float dst_stereo_to_mono = (dst_ch1 + dst_ch2) / 2; + const float dst_quad_to_mono = (dst_ch1 + dst_ch2 + dst_ch3 + dst_ch4) / 4; + const float dst_quad_to_stereo_ch1 = (dst_ch1 + dst_ch2) / 2; + const float dst_quad_to_stereo_ch2 = (dst_ch3 + dst_ch4) / 2; + if (src_channels == 1) + SetMonoFrame(kSrcCh1, src_sample_rate_hz, &src_frame_); + else if (src_channels == 2) + SetStereoFrame(kSrcCh1, kSrcCh2, src_sample_rate_hz, &src_frame_); + else + SetQuadFrame(kSrcCh1, kSrcCh2, kSrcCh3, kSrcCh4, src_sample_rate_hz, + &src_frame_); + + if (dst_channels == 1) { + SetMonoFrame(0, dst_sample_rate_hz, &dst_frame_); + if (src_channels == 1) + SetMonoFrame(dst_ch1, dst_sample_rate_hz, &golden_frame_); + else if (src_channels == 2) + SetMonoFrame(dst_stereo_to_mono, dst_sample_rate_hz, &golden_frame_); + else + SetMonoFrame(dst_quad_to_mono, dst_sample_rate_hz, &golden_frame_); + } else { + SetStereoFrame(0, 0, dst_sample_rate_hz, &dst_frame_); + if (src_channels == 1) + SetStereoFrame(dst_ch1, dst_ch1, dst_sample_rate_hz, &golden_frame_); + else if (src_channels == 2) + SetStereoFrame(dst_ch1, dst_ch2, dst_sample_rate_hz, &golden_frame_); + else + SetStereoFrame(dst_quad_to_stereo_ch1, dst_quad_to_stereo_ch2, + dst_sample_rate_hz, &golden_frame_); + } + + // The sinc resampler has a known delay, which we compute here. Multiplying by + // two gives us a crude maximum for any resampling, as the old resampler + // typically (but not always) has lower delay. + static const size_t kInputKernelDelaySamples = 16; + const size_t max_delay = static_cast<size_t>( + static_cast<double>(dst_sample_rate_hz) / src_sample_rate_hz * + kInputKernelDelaySamples * dst_channels * 2); + printf("(%d, %d Hz) -> (%d, %d Hz) ", // SNR reported on the same line later. + src_channels, src_sample_rate_hz, dst_channels, dst_sample_rate_hz); + RemixAndResample(src_frame_, &resampler, &dst_frame_); + + if (src_sample_rate_hz == 96000 && dst_sample_rate_hz <= 11025) { + // The sinc resampler gives poor SNR at this extreme conversion, but we + // expect to see this rarely in practice. + EXPECT_GT(ComputeSNR(golden_frame_, dst_frame_, max_delay), 14.0f); + } else { + EXPECT_GT(ComputeSNR(golden_frame_, dst_frame_, max_delay), 46.0f); + } +} + +TEST_F(UtilityTest, RemixAndResampleCopyFrameSucceeds) { + // Stereo -> stereo. + SetStereoFrame(10, 10, &src_frame_); + SetStereoFrame(0, 0, &dst_frame_); + RemixAndResample(src_frame_, &resampler_, &dst_frame_); + VerifyFramesAreEqual(src_frame_, dst_frame_); + + // Mono -> mono. + SetMonoFrame(20, &src_frame_); + SetMonoFrame(0, &dst_frame_); + RemixAndResample(src_frame_, &resampler_, &dst_frame_); + VerifyFramesAreEqual(src_frame_, dst_frame_); +} + +TEST_F(UtilityTest, RemixAndResampleMixingOnlySucceeds) { + // Stereo -> mono. + SetStereoFrame(0, 0, &dst_frame_); + SetMonoFrame(10, &src_frame_); + SetStereoFrame(10, 10, &golden_frame_); + RemixAndResample(src_frame_, &resampler_, &dst_frame_); + VerifyFramesAreEqual(dst_frame_, golden_frame_); + + // Mono -> stereo. + SetMonoFrame(0, &dst_frame_); + SetStereoFrame(10, 20, &src_frame_); + SetMonoFrame(15, &golden_frame_); + RemixAndResample(src_frame_, &resampler_, &dst_frame_); + VerifyFramesAreEqual(golden_frame_, dst_frame_); +} + +TEST_F(UtilityTest, RemixAndResampleSucceeds) { + const int kSampleRates[] = {8000, 11025, 16000, 22050, + 32000, 44100, 48000, 96000}; + const int kSrcChannels[] = {1, 2, 4}; + const int kDstChannels[] = {1, 2}; + + for (int src_rate : kSampleRates) { + for (int dst_rate : kSampleRates) { + for (size_t src_channels : kSrcChannels) { + for (size_t dst_channels : kDstChannels) { + RunResampleTest(src_channels, src_rate, dst_channels, dst_rate); + } + } + } + } +} + +} // namespace +} // namespace voe +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/test/audio_end_to_end_test.cc b/third_party/libwebrtc/audio/test/audio_end_to_end_test.cc new file mode 100644 index 0000000000..b1e2712f60 --- /dev/null +++ b/third_party/libwebrtc/audio/test/audio_end_to_end_test.cc @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/test/audio_end_to_end_test.h" + +#include <algorithm> +#include <memory> + +#include "api/task_queue/task_queue_base.h" +#include "call/fake_network_pipe.h" +#include "call/simulated_network.h" +#include "modules/audio_device/include/test_audio_device.h" +#include "system_wrappers/include/sleep.h" +#include "test/gtest.h" +#include "test/video_test_constants.h" + +namespace webrtc { +namespace test { +namespace { + +constexpr int kSampleRate = 48000; + +} // namespace + +AudioEndToEndTest::AudioEndToEndTest() + : EndToEndTest(VideoTestConstants::kDefaultTimeout) {} + +size_t AudioEndToEndTest::GetNumVideoStreams() const { + return 0; +} + +size_t AudioEndToEndTest::GetNumAudioStreams() const { + return 1; +} + +size_t AudioEndToEndTest::GetNumFlexfecStreams() const { + return 0; +} + +std::unique_ptr<TestAudioDeviceModule::Capturer> +AudioEndToEndTest::CreateCapturer() { + return TestAudioDeviceModule::CreatePulsedNoiseCapturer(32000, kSampleRate); +} + +std::unique_ptr<TestAudioDeviceModule::Renderer> +AudioEndToEndTest::CreateRenderer() { + return TestAudioDeviceModule::CreateDiscardRenderer(kSampleRate); +} + +void AudioEndToEndTest::OnFakeAudioDevicesCreated( + AudioDeviceModule* send_audio_device, + AudioDeviceModule* recv_audio_device) { + send_audio_device_ = send_audio_device; +} + +void AudioEndToEndTest::ModifyAudioConfigs( + AudioSendStream::Config* send_config, + std::vector<AudioReceiveStreamInterface::Config>* receive_configs) { + // Large bitrate by default. + const webrtc::SdpAudioFormat kDefaultFormat("opus", 48000, 2, + {{"stereo", "1"}}); + send_config->send_codec_spec = AudioSendStream::Config::SendCodecSpec( + test::VideoTestConstants::kAudioSendPayloadType, kDefaultFormat); + send_config->min_bitrate_bps = 32000; + send_config->max_bitrate_bps = 32000; +} + +void AudioEndToEndTest::OnAudioStreamsCreated( + AudioSendStream* send_stream, + const std::vector<AudioReceiveStreamInterface*>& receive_streams) { + ASSERT_NE(nullptr, send_stream); + ASSERT_EQ(1u, receive_streams.size()); + ASSERT_NE(nullptr, receive_streams[0]); + send_stream_ = send_stream; + receive_stream_ = receive_streams[0]; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/test/audio_end_to_end_test.h b/third_party/libwebrtc/audio/test/audio_end_to_end_test.h new file mode 100644 index 0000000000..d326b790ff --- /dev/null +++ b/third_party/libwebrtc/audio/test/audio_end_to_end_test.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef AUDIO_TEST_AUDIO_END_TO_END_TEST_H_ +#define AUDIO_TEST_AUDIO_END_TO_END_TEST_H_ + +#include <memory> +#include <string> +#include <vector> + +#include "api/task_queue/task_queue_base.h" +#include "api/test/simulated_network.h" +#include "modules/audio_device/include/audio_device.h" +#include "modules/audio_device/include/test_audio_device.h" +#include "test/call_test.h" + +namespace webrtc { +namespace test { + +class AudioEndToEndTest : public test::EndToEndTest { + public: + AudioEndToEndTest(); + + protected: + AudioDeviceModule* send_audio_device() { return send_audio_device_; } + const AudioSendStream* send_stream() const { return send_stream_; } + const AudioReceiveStreamInterface* receive_stream() const { + return receive_stream_; + } + + size_t GetNumVideoStreams() const override; + size_t GetNumAudioStreams() const override; + size_t GetNumFlexfecStreams() const override; + + std::unique_ptr<TestAudioDeviceModule::Capturer> CreateCapturer() override; + std::unique_ptr<TestAudioDeviceModule::Renderer> CreateRenderer() override; + + void OnFakeAudioDevicesCreated(AudioDeviceModule* send_audio_device, + AudioDeviceModule* recv_audio_device) override; + + void ModifyAudioConfigs(AudioSendStream::Config* send_config, + std::vector<AudioReceiveStreamInterface::Config>* + receive_configs) override; + void OnAudioStreamsCreated(AudioSendStream* send_stream, + const std::vector<AudioReceiveStreamInterface*>& + receive_streams) override; + + private: + AudioDeviceModule* send_audio_device_ = nullptr; + AudioSendStream* send_stream_ = nullptr; + AudioReceiveStreamInterface* receive_stream_ = nullptr; +}; + +} // namespace test +} // namespace webrtc + +#endif // AUDIO_TEST_AUDIO_END_TO_END_TEST_H_ diff --git a/third_party/libwebrtc/audio/test/audio_stats_test.cc b/third_party/libwebrtc/audio/test/audio_stats_test.cc new file mode 100644 index 0000000000..e8521cfe99 --- /dev/null +++ b/third_party/libwebrtc/audio/test/audio_stats_test.cc @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/test/audio_end_to_end_test.h" +#include "rtc_base/numerics/safe_compare.h" +#include "system_wrappers/include/sleep.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { +namespace { + +// Wait half a second between stopping sending and stopping receiving audio. +constexpr int kExtraRecordTimeMs = 500; + +bool IsNear(int reference, int v) { + // Margin is 10%. + const int error = reference / 10 + 1; + return std::abs(reference - v) <= error; +} + +class NoLossTest : public AudioEndToEndTest { + public: + const int kTestDurationMs = 8000; + const int kBytesSent = 69351; + const int32_t kPacketsSent = 400; + const int64_t kRttMs = 100; + + NoLossTest() = default; + + BuiltInNetworkBehaviorConfig GetSendTransportConfig() const override { + BuiltInNetworkBehaviorConfig pipe_config; + pipe_config.queue_delay_ms = kRttMs / 2; + return pipe_config; + } + + void PerformTest() override { + SleepMs(kTestDurationMs); + send_audio_device()->StopRecording(); + // and some extra time to account for network delay. + SleepMs(GetSendTransportConfig().queue_delay_ms + kExtraRecordTimeMs); + } + + void OnStreamsStopped() override { + AudioSendStream::Stats send_stats = send_stream()->GetStats(); + EXPECT_PRED2(IsNear, kBytesSent, send_stats.payload_bytes_sent); + EXPECT_PRED2(IsNear, kPacketsSent, send_stats.packets_sent); + EXPECT_EQ(0, send_stats.packets_lost); + EXPECT_EQ(0.0f, send_stats.fraction_lost); + EXPECT_EQ("opus", send_stats.codec_name); + // send_stats.jitter_ms + EXPECT_PRED2(IsNear, kRttMs, send_stats.rtt_ms); + // Send level is 0 because it is cleared in TransmitMixer::StopSend(). + EXPECT_EQ(0, send_stats.audio_level); + // send_stats.total_input_energy + // send_stats.total_input_duration + EXPECT_FALSE(send_stats.apm_statistics.delay_median_ms); + EXPECT_FALSE(send_stats.apm_statistics.delay_standard_deviation_ms); + EXPECT_FALSE(send_stats.apm_statistics.echo_return_loss); + EXPECT_FALSE(send_stats.apm_statistics.echo_return_loss_enhancement); + EXPECT_FALSE(send_stats.apm_statistics.residual_echo_likelihood); + EXPECT_FALSE(send_stats.apm_statistics.residual_echo_likelihood_recent_max); + + AudioReceiveStreamInterface::Stats recv_stats = + receive_stream()->GetStats(/*get_and_clear_legacy_stats=*/true); + EXPECT_PRED2(IsNear, kBytesSent, recv_stats.payload_bytes_received); + EXPECT_PRED2(IsNear, kPacketsSent, recv_stats.packets_received); + EXPECT_EQ(0, recv_stats.packets_lost); + EXPECT_EQ("opus", send_stats.codec_name); + // recv_stats.jitter_ms + // recv_stats.jitter_buffer_ms + EXPECT_EQ(20u, recv_stats.jitter_buffer_preferred_ms); + // recv_stats.delay_estimate_ms + // Receive level is 0 because it is cleared in Channel::StopPlayout(). + EXPECT_EQ(0, recv_stats.audio_level); + // recv_stats.total_output_energy + // recv_stats.total_samples_received + // recv_stats.total_output_duration + // recv_stats.concealed_samples + // recv_stats.expand_rate + // recv_stats.speech_expand_rate + EXPECT_EQ(0.0, recv_stats.secondary_decoded_rate); + EXPECT_EQ(0.0, recv_stats.secondary_discarded_rate); + EXPECT_EQ(0.0, recv_stats.accelerate_rate); + EXPECT_EQ(0.0, recv_stats.preemptive_expand_rate); + EXPECT_EQ(0, recv_stats.decoding_calls_to_silence_generator); + // recv_stats.decoding_calls_to_neteq + // recv_stats.decoding_normal + // recv_stats.decoding_plc + EXPECT_EQ(0, recv_stats.decoding_cng); + // recv_stats.decoding_plc_cng + // recv_stats.decoding_muted_output + // Capture start time is -1 because we do not have an associated send stream + // on the receiver side. + EXPECT_EQ(-1, recv_stats.capture_start_ntp_time_ms); + + // Match these stats between caller and receiver. + EXPECT_EQ(send_stats.local_ssrc, recv_stats.remote_ssrc); + EXPECT_EQ(*send_stats.codec_payload_type, *recv_stats.codec_payload_type); + } +}; +} // namespace + +using AudioStatsTest = CallTest; + +TEST_F(AudioStatsTest, DISABLED_NoLoss) { + NoLossTest test; + RunBaseTest(&test); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/test/nack_test.cc b/third_party/libwebrtc/audio/test/nack_test.cc new file mode 100644 index 0000000000..b36adf8991 --- /dev/null +++ b/third_party/libwebrtc/audio/test/nack_test.cc @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/test/audio_end_to_end_test.h" +#include "system_wrappers/include/sleep.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { + +using NackTest = CallTest; + +TEST_F(NackTest, ShouldNackInLossyNetwork) { + class NackTest : public AudioEndToEndTest { + public: + const int kTestDurationMs = 2000; + const int64_t kRttMs = 30; + const int64_t kLossPercent = 30; + const int kNackHistoryMs = 1000; + + BuiltInNetworkBehaviorConfig GetSendTransportConfig() const override { + BuiltInNetworkBehaviorConfig pipe_config; + pipe_config.queue_delay_ms = kRttMs / 2; + pipe_config.loss_percent = kLossPercent; + return pipe_config; + } + + void ModifyAudioConfigs(AudioSendStream::Config* send_config, + std::vector<AudioReceiveStreamInterface::Config>* + receive_configs) override { + ASSERT_EQ(receive_configs->size(), 1U); + (*receive_configs)[0].rtp.nack.rtp_history_ms = kNackHistoryMs; + AudioEndToEndTest::ModifyAudioConfigs(send_config, receive_configs); + } + + void PerformTest() override { SleepMs(kTestDurationMs); } + + void OnStreamsStopped() override { + AudioReceiveStreamInterface::Stats recv_stats = + receive_stream()->GetStats(/*get_and_clear_legacy_stats=*/true); + EXPECT_GT(recv_stats.nacks_sent, 0U); + AudioSendStream::Stats send_stats = send_stream()->GetStats(); + EXPECT_GT(send_stats.retransmitted_packets_sent, 0U); + EXPECT_GT(send_stats.nacks_received, 0U); + } + } test; + + RunBaseTest(&test); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/test/non_sender_rtt_test.cc b/third_party/libwebrtc/audio/test/non_sender_rtt_test.cc new file mode 100644 index 0000000000..278193e335 --- /dev/null +++ b/third_party/libwebrtc/audio/test/non_sender_rtt_test.cc @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/test/audio_end_to_end_test.h" +#include "rtc_base/gunit.h" +#include "rtc_base/task_queue_for_test.h" +#include "system_wrappers/include/sleep.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { + +using NonSenderRttTest = CallTest; + +TEST_F(NonSenderRttTest, NonSenderRttStats) { + class NonSenderRttTest : public AudioEndToEndTest { + public: + const int kLongTimeoutMs = 20000; + const int64_t kRttMs = 30; + + explicit NonSenderRttTest(TaskQueueBase* task_queue) + : task_queue_(task_queue) {} + + BuiltInNetworkBehaviorConfig GetSendTransportConfig() const override { + BuiltInNetworkBehaviorConfig pipe_config; + pipe_config.queue_delay_ms = kRttMs / 2; + return pipe_config; + } + + void ModifyAudioConfigs(AudioSendStream::Config* send_config, + std::vector<AudioReceiveStreamInterface::Config>* + receive_configs) override { + ASSERT_EQ(receive_configs->size(), 1U); + (*receive_configs)[0].enable_non_sender_rtt = true; + AudioEndToEndTest::ModifyAudioConfigs(send_config, receive_configs); + send_config->send_codec_spec->enable_non_sender_rtt = true; + } + + void PerformTest() override { + // Wait until we have an RTT measurement, but no longer than + // `kLongTimeoutMs`. This usually takes around 5 seconds, but in rare + // cases it can take more than 10 seconds. + EXPECT_TRUE_WAIT(HasRoundTripTimeMeasurement(), kLongTimeoutMs); + } + + void OnStreamsStopped() override { + AudioReceiveStreamInterface::Stats recv_stats = + receive_stream()->GetStats(/*get_and_clear_legacy_stats=*/true); + EXPECT_GT(recv_stats.round_trip_time_measurements, 0); + ASSERT_TRUE(recv_stats.round_trip_time.has_value()); + EXPECT_GT(recv_stats.round_trip_time->ms(), 0); + EXPECT_GE(recv_stats.total_round_trip_time.ms(), + recv_stats.round_trip_time->ms()); + } + + protected: + bool HasRoundTripTimeMeasurement() { + bool has_rtt = false; + // GetStats() can only be called on `task_queue_`, block while we check. + SendTask(task_queue_, [this, &has_rtt]() { + if (receive_stream() && + receive_stream()->GetStats(true).round_trip_time_measurements > 0) { + has_rtt = true; + } + }); + return has_rtt; + } + + private: + TaskQueueBase* task_queue_; + } test(task_queue()); + + RunBaseTest(&test); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/utility/BUILD.gn b/third_party/libwebrtc/audio/utility/BUILD.gn new file mode 100644 index 0000000000..983b6286e4 --- /dev/null +++ b/third_party/libwebrtc/audio/utility/BUILD.gn @@ -0,0 +1,56 @@ +# Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +import("../../webrtc.gni") + +group("utility") { + deps = [ ":audio_frame_operations" ] +} + +rtc_library("audio_frame_operations") { + visibility = [ "*" ] + sources = [ + "audio_frame_operations.cc", + "audio_frame_operations.h", + "channel_mixer.cc", + "channel_mixer.h", + "channel_mixing_matrix.cc", + "channel_mixing_matrix.h", + ] + + deps = [ + "../../api/audio:audio_frame_api", + "../../common_audio", + "../../rtc_base:checks", + "../../rtc_base:logging", + "../../rtc_base:safe_conversions", + "../../system_wrappers:field_trial", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/base:core_headers" ] +} + +if (rtc_include_tests) { + rtc_library("utility_tests") { + testonly = true + sources = [ + "audio_frame_operations_unittest.cc", + "channel_mixer_unittest.cc", + "channel_mixing_matrix_unittest.cc", + ] + deps = [ + ":audio_frame_operations", + "../../api/audio:audio_frame_api", + "../../rtc_base:checks", + "../../rtc_base:logging", + "../../rtc_base:macromagic", + "../../rtc_base:stringutils", + "../../test:field_trial", + "../../test:test_support", + "//testing/gtest", + ] + } +} diff --git a/third_party/libwebrtc/audio/utility/audio_frame_operations.cc b/third_party/libwebrtc/audio/utility/audio_frame_operations.cc new file mode 100644 index 0000000000..1b936c239b --- /dev/null +++ b/third_party/libwebrtc/audio/utility/audio_frame_operations.cc @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/utility/audio_frame_operations.h" + +#include <string.h> + +#include <algorithm> +#include <cstdint> +#include <utility> + +#include "common_audio/include/audio_util.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { +namespace { + +// 2.7ms @ 48kHz, 4ms @ 32kHz, 8ms @ 16kHz. +const size_t kMuteFadeFrames = 128; +const float kMuteFadeInc = 1.0f / kMuteFadeFrames; + +} // namespace + +void AudioFrameOperations::Add(const AudioFrame& frame_to_add, + AudioFrame* result_frame) { + // Sanity check. + RTC_DCHECK(result_frame); + RTC_DCHECK_GT(result_frame->num_channels_, 0); + RTC_DCHECK_EQ(result_frame->num_channels_, frame_to_add.num_channels_); + + bool no_previous_data = result_frame->muted(); + if (result_frame->samples_per_channel_ != frame_to_add.samples_per_channel_) { + // Special case we have no data to start with. + RTC_DCHECK_EQ(result_frame->samples_per_channel_, 0); + result_frame->samples_per_channel_ = frame_to_add.samples_per_channel_; + no_previous_data = true; + } + + if (result_frame->vad_activity_ == AudioFrame::kVadActive || + frame_to_add.vad_activity_ == AudioFrame::kVadActive) { + result_frame->vad_activity_ = AudioFrame::kVadActive; + } else if (result_frame->vad_activity_ == AudioFrame::kVadUnknown || + frame_to_add.vad_activity_ == AudioFrame::kVadUnknown) { + result_frame->vad_activity_ = AudioFrame::kVadUnknown; + } + + if (result_frame->speech_type_ != frame_to_add.speech_type_) + result_frame->speech_type_ = AudioFrame::kUndefined; + + if (!frame_to_add.muted()) { + const int16_t* in_data = frame_to_add.data(); + int16_t* out_data = result_frame->mutable_data(); + size_t length = + frame_to_add.samples_per_channel_ * frame_to_add.num_channels_; + if (no_previous_data) { + std::copy(in_data, in_data + length, out_data); + } else { + for (size_t i = 0; i < length; i++) { + const int32_t wrap_guard = static_cast<int32_t>(out_data[i]) + + static_cast<int32_t>(in_data[i]); + out_data[i] = rtc::saturated_cast<int16_t>(wrap_guard); + } + } + } +} + +int AudioFrameOperations::MonoToStereo(AudioFrame* frame) { + if (frame->num_channels_ != 1) { + return -1; + } + UpmixChannels(2, frame); + return 0; +} + +int AudioFrameOperations::StereoToMono(AudioFrame* frame) { + if (frame->num_channels_ != 2) { + return -1; + } + DownmixChannels(1, frame); + return frame->num_channels_ == 1 ? 0 : -1; +} + +void AudioFrameOperations::QuadToStereo(const int16_t* src_audio, + size_t samples_per_channel, + int16_t* dst_audio) { + for (size_t i = 0; i < samples_per_channel; i++) { + dst_audio[i * 2] = + (static_cast<int32_t>(src_audio[4 * i]) + src_audio[4 * i + 1]) >> 1; + dst_audio[i * 2 + 1] = + (static_cast<int32_t>(src_audio[4 * i + 2]) + src_audio[4 * i + 3]) >> + 1; + } +} + +int AudioFrameOperations::QuadToStereo(AudioFrame* frame) { + if (frame->num_channels_ != 4) { + return -1; + } + + RTC_DCHECK_LE(frame->samples_per_channel_ * 4, + AudioFrame::kMaxDataSizeSamples); + + if (!frame->muted()) { + QuadToStereo(frame->data(), frame->samples_per_channel_, + frame->mutable_data()); + } + frame->num_channels_ = 2; + + return 0; +} + +void AudioFrameOperations::DownmixChannels(const int16_t* src_audio, + size_t src_channels, + size_t samples_per_channel, + size_t dst_channels, + int16_t* dst_audio) { + if (src_channels > 1 && dst_channels == 1) { + DownmixInterleavedToMono(src_audio, samples_per_channel, src_channels, + dst_audio); + return; + } else if (src_channels == 4 && dst_channels == 2) { + QuadToStereo(src_audio, samples_per_channel, dst_audio); + return; + } + + RTC_DCHECK_NOTREACHED() << "src_channels: " << src_channels + << ", dst_channels: " << dst_channels; +} + +void AudioFrameOperations::DownmixChannels(size_t dst_channels, + AudioFrame* frame) { + RTC_DCHECK_LE(frame->samples_per_channel_ * frame->num_channels_, + AudioFrame::kMaxDataSizeSamples); + if (frame->num_channels_ > 1 && dst_channels == 1) { + if (!frame->muted()) { + DownmixInterleavedToMono(frame->data(), frame->samples_per_channel_, + frame->num_channels_, frame->mutable_data()); + } + frame->num_channels_ = 1; + } else if (frame->num_channels_ == 4 && dst_channels == 2) { + int err = QuadToStereo(frame); + RTC_DCHECK_EQ(err, 0); + } else { + RTC_DCHECK_NOTREACHED() << "src_channels: " << frame->num_channels_ + << ", dst_channels: " << dst_channels; + } +} + +void AudioFrameOperations::UpmixChannels(size_t target_number_of_channels, + AudioFrame* frame) { + RTC_DCHECK_EQ(frame->num_channels_, 1); + RTC_DCHECK_LE(frame->samples_per_channel_ * target_number_of_channels, + AudioFrame::kMaxDataSizeSamples); + + if (frame->num_channels_ != 1 || + frame->samples_per_channel_ * target_number_of_channels > + AudioFrame::kMaxDataSizeSamples) { + return; + } + + if (!frame->muted()) { + // Up-mixing done in place. Going backwards through the frame ensure nothing + // is irrevocably overwritten. + int16_t* frame_data = frame->mutable_data(); + for (int i = frame->samples_per_channel_ - 1; i >= 0; i--) { + for (size_t j = 0; j < target_number_of_channels; ++j) { + frame_data[target_number_of_channels * i + j] = frame_data[i]; + } + } + } + frame->num_channels_ = target_number_of_channels; +} + +void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) { + RTC_DCHECK(frame); + if (frame->num_channels_ != 2 || frame->muted()) { + return; + } + + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) { + std::swap(frame_data[i], frame_data[i + 1]); + } +} + +void AudioFrameOperations::Mute(AudioFrame* frame, + bool previous_frame_muted, + bool current_frame_muted) { + RTC_DCHECK(frame); + if (!previous_frame_muted && !current_frame_muted) { + // Not muted, don't touch. + } else if (previous_frame_muted && current_frame_muted) { + // Frame fully muted. + size_t total_samples = frame->samples_per_channel_ * frame->num_channels_; + RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples, total_samples); + frame->Mute(); + } else { + // Fade is a no-op on a muted frame. + if (frame->muted()) { + return; + } + + // Limit number of samples to fade, if frame isn't long enough. + size_t count = kMuteFadeFrames; + float inc = kMuteFadeInc; + if (frame->samples_per_channel_ < kMuteFadeFrames) { + count = frame->samples_per_channel_; + if (count > 0) { + inc = 1.0f / count; + } + } + + size_t start = 0; + size_t end = count; + float start_g = 0.0f; + if (current_frame_muted) { + // Fade out the last `count` samples of frame. + RTC_DCHECK(!previous_frame_muted); + start = frame->samples_per_channel_ - count; + end = frame->samples_per_channel_; + start_g = 1.0f; + inc = -inc; + } else { + // Fade in the first `count` samples of frame. + RTC_DCHECK(previous_frame_muted); + } + + // Perform fade. + int16_t* frame_data = frame->mutable_data(); + size_t channels = frame->num_channels_; + for (size_t j = 0; j < channels; ++j) { + float g = start_g; + for (size_t i = start * channels; i < end * channels; i += channels) { + g += inc; + frame_data[i + j] *= g; + } + } + } +} + +void AudioFrameOperations::Mute(AudioFrame* frame) { + Mute(frame, true, true); +} + +void AudioFrameOperations::ApplyHalfGain(AudioFrame* frame) { + RTC_DCHECK(frame); + RTC_DCHECK_GT(frame->num_channels_, 0); + if (frame->num_channels_ < 1 || frame->muted()) { + return; + } + + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_; + i++) { + frame_data[i] = frame_data[i] >> 1; + } +} + +int AudioFrameOperations::Scale(float left, float right, AudioFrame* frame) { + if (frame->num_channels_ != 2) { + return -1; + } else if (frame->muted()) { + return 0; + } + + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel_; i++) { + frame_data[2 * i] = static_cast<int16_t>(left * frame_data[2 * i]); + frame_data[2 * i + 1] = static_cast<int16_t>(right * frame_data[2 * i + 1]); + } + return 0; +} + +int AudioFrameOperations::ScaleWithSat(float scale, AudioFrame* frame) { + if (frame->muted()) { + return 0; + } + + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_; + i++) { + frame_data[i] = rtc::saturated_cast<int16_t>(scale * frame_data[i]); + } + return 0; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/utility/audio_frame_operations.h b/third_party/libwebrtc/audio/utility/audio_frame_operations.h new file mode 100644 index 0000000000..2a5f29f4f5 --- /dev/null +++ b/third_party/libwebrtc/audio/utility/audio_frame_operations.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_UTILITY_AUDIO_FRAME_OPERATIONS_H_ +#define AUDIO_UTILITY_AUDIO_FRAME_OPERATIONS_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "absl/base/attributes.h" +#include "api/audio/audio_frame.h" + +namespace webrtc { + +// TODO(andrew): consolidate this with utility.h and audio_frame_manipulator.h. +// Change reference parameters to pointers. Consider using a namespace rather +// than a class. +class AudioFrameOperations { + public: + // Add samples in `frame_to_add` with samples in `result_frame` + // putting the results in `results_frame`. The fields + // `vad_activity_` and `speech_type_` of the result frame are + // updated. If `result_frame` is empty (`samples_per_channel_`==0), + // the samples in `frame_to_add` are added to it. The number of + // channels and number of samples per channel must match except when + // `result_frame` is empty. + static void Add(const AudioFrame& frame_to_add, AudioFrame* result_frame); + + // `frame.num_channels_` will be updated. This version checks for sufficient + // buffer size and that `num_channels_` is mono. Use UpmixChannels + // instead. TODO(bugs.webrtc.org/8649): remove. + ABSL_DEPRECATED("bugs.webrtc.org/8649") + static int MonoToStereo(AudioFrame* frame); + + // `frame.num_channels_` will be updated. This version checks that + // `num_channels_` is stereo. Use DownmixChannels + // instead. TODO(bugs.webrtc.org/8649): remove. + ABSL_DEPRECATED("bugs.webrtc.org/8649") + static int StereoToMono(AudioFrame* frame); + + // Downmixes 4 channels `src_audio` to stereo `dst_audio`. This is an in-place + // operation, meaning `src_audio` and `dst_audio` may point to the same + // buffer. + static void QuadToStereo(const int16_t* src_audio, + size_t samples_per_channel, + int16_t* dst_audio); + + // `frame.num_channels_` will be updated. This version checks that + // `num_channels_` is 4 channels. + static int QuadToStereo(AudioFrame* frame); + + // Downmixes `src_channels` `src_audio` to `dst_channels` `dst_audio`. + // This is an in-place operation, meaning `src_audio` and `dst_audio` + // may point to the same buffer. Supported channel combinations are + // Stereo to Mono, Quad to Mono, and Quad to Stereo. + static void DownmixChannels(const int16_t* src_audio, + size_t src_channels, + size_t samples_per_channel, + size_t dst_channels, + int16_t* dst_audio); + + // `frame.num_channels_` will be updated. This version checks that + // `num_channels_` and `dst_channels` are valid and performs relevant downmix. + // Supported channel combinations are N channels to Mono, and Quad to Stereo. + static void DownmixChannels(size_t dst_channels, AudioFrame* frame); + + // `frame.num_channels_` will be updated. This version checks that + // `num_channels_` and `dst_channels` are valid and performs relevant + // downmix. Supported channel combinations are Mono to N + // channels. The single channel is replicated. + static void UpmixChannels(size_t target_number_of_channels, + AudioFrame* frame); + + // Swap the left and right channels of `frame`. Fails silently if `frame` is + // not stereo. + static void SwapStereoChannels(AudioFrame* frame); + + // Conditionally zero out contents of `frame` for implementing audio mute: + // `previous_frame_muted` && `current_frame_muted` - Zero out whole frame. + // `previous_frame_muted` && !`current_frame_muted` - Fade-in at frame start. + // !`previous_frame_muted` && `current_frame_muted` - Fade-out at frame end. + // !`previous_frame_muted` && !`current_frame_muted` - Leave frame untouched. + static void Mute(AudioFrame* frame, + bool previous_frame_muted, + bool current_frame_muted); + + // Zero out contents of frame. + static void Mute(AudioFrame* frame); + + // Halve samples in `frame`. + static void ApplyHalfGain(AudioFrame* frame); + + static int Scale(float left, float right, AudioFrame* frame); + + static int ScaleWithSat(float scale, AudioFrame* frame); +}; + +} // namespace webrtc + +#endif // AUDIO_UTILITY_AUDIO_FRAME_OPERATIONS_H_ diff --git a/third_party/libwebrtc/audio/utility/audio_frame_operations_gn/moz.build b/third_party/libwebrtc/audio/utility/audio_frame_operations_gn/moz.build new file mode 100644 index 0000000000..e215792f64 --- /dev/null +++ b/third_party/libwebrtc/audio/utility/audio_frame_operations_gn/moz.build @@ -0,0 +1,238 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/audio/utility/audio_frame_operations.cc", + "/third_party/libwebrtc/audio/utility/channel_mixer.cc", + "/third_party/libwebrtc/audio/utility/channel_mixing_matrix.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_LIBEVENT"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_ENABLE_LIBEVENT"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_LIBEVENT"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["RTC_ENABLE_WIN_WGC"] = True + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["TARGET_CPU"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["TARGET_CPU"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["TARGET_CPU"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["TARGET_CPU"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["TARGET_CPU"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["TARGET_CPU"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["OS_TARGET"] == "Android" and CONFIG["TARGET_CPU"] == "arm": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["OS_TARGET"] == "Android" and CONFIG["TARGET_CPU"] == "x86": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "aarch64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "arm": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "x86": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "x86_64": + + DEFINES["_GNU_SOURCE"] = True + +Library("audio_frame_operations_gn") diff --git a/third_party/libwebrtc/audio/utility/audio_frame_operations_unittest.cc b/third_party/libwebrtc/audio/utility/audio_frame_operations_unittest.cc new file mode 100644 index 0000000000..1a2c16e45f --- /dev/null +++ b/third_party/libwebrtc/audio/utility/audio_frame_operations_unittest.cc @@ -0,0 +1,622 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/utility/audio_frame_operations.h" + +#include "rtc_base/checks.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +class AudioFrameOperationsTest : public ::testing::Test { + protected: + AudioFrameOperationsTest() { + // Set typical values. + frame_.samples_per_channel_ = 320; + frame_.num_channels_ = 2; + } + + AudioFrame frame_; +}; + +class AudioFrameOperationsDeathTest : public AudioFrameOperationsTest {}; + +void SetFrameData(int16_t ch1, + int16_t ch2, + int16_t ch3, + int16_t ch4, + AudioFrame* frame) { + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel_ * 4; i += 4) { + frame_data[i] = ch1; + frame_data[i + 1] = ch2; + frame_data[i + 2] = ch3; + frame_data[i + 3] = ch4; + } +} + +void SetFrameData(int16_t left, int16_t right, AudioFrame* frame) { + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) { + frame_data[i] = left; + frame_data[i + 1] = right; + } +} + +void SetFrameData(int16_t data, AudioFrame* frame) { + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_; + i++) { + frame_data[i] = data; + } +} + +void VerifyFramesAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) { + EXPECT_EQ(frame1.num_channels_, frame2.num_channels_); + EXPECT_EQ(frame1.samples_per_channel_, frame2.samples_per_channel_); + const int16_t* frame1_data = frame1.data(); + const int16_t* frame2_data = frame2.data(); + for (size_t i = 0; i < frame1.samples_per_channel_ * frame1.num_channels_; + i++) { + EXPECT_EQ(frame1_data[i], frame2_data[i]); + } + EXPECT_EQ(frame1.muted(), frame2.muted()); +} + +void InitFrame(AudioFrame* frame, + size_t channels, + size_t samples_per_channel, + int16_t left_data, + int16_t right_data) { + RTC_DCHECK(frame); + RTC_DCHECK_GE(2, channels); + RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples, + samples_per_channel * channels); + frame->samples_per_channel_ = samples_per_channel; + frame->num_channels_ = channels; + if (channels == 2) { + SetFrameData(left_data, right_data, frame); + } else if (channels == 1) { + SetFrameData(left_data, frame); + } +} + +int16_t GetChannelData(const AudioFrame& frame, size_t channel, size_t index) { + RTC_DCHECK_LT(channel, frame.num_channels_); + RTC_DCHECK_LT(index, frame.samples_per_channel_); + return frame.data()[index * frame.num_channels_ + channel]; +} + +void VerifyFrameDataBounds(const AudioFrame& frame, + size_t channel, + int16_t max, + int16_t min) { + for (size_t i = 0; i < frame.samples_per_channel_; ++i) { + int16_t s = GetChannelData(frame, channel, i); + EXPECT_LE(min, s); + EXPECT_GE(max, s); + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +TEST_F(AudioFrameOperationsDeathTest, MonoToStereoFailsWithBadParameters) { + EXPECT_DEATH(AudioFrameOperations::UpmixChannels(2, &frame_), ""); + frame_.samples_per_channel_ = AudioFrame::kMaxDataSizeSamples; + frame_.num_channels_ = 1; + EXPECT_DEATH(AudioFrameOperations::UpmixChannels(2, &frame_), ""); +} +#endif + +TEST_F(AudioFrameOperationsTest, MonoToStereoSucceeds) { + frame_.num_channels_ = 1; + SetFrameData(1, &frame_); + + AudioFrameOperations::UpmixChannels(2, &frame_); + EXPECT_EQ(2u, frame_.num_channels_); + + AudioFrame stereo_frame; + stereo_frame.samples_per_channel_ = 320; + stereo_frame.num_channels_ = 2; + SetFrameData(1, 1, &stereo_frame); + VerifyFramesAreEqual(stereo_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, MonoToStereoMuted) { + frame_.num_channels_ = 1; + ASSERT_TRUE(frame_.muted()); + AudioFrameOperations::UpmixChannels(2, &frame_); + EXPECT_EQ(2u, frame_.num_channels_); + EXPECT_TRUE(frame_.muted()); +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +TEST_F(AudioFrameOperationsDeathTest, StereoToMonoFailsWithBadParameters) { + frame_.num_channels_ = 1; + EXPECT_DEATH(AudioFrameOperations::DownmixChannels(1, &frame_), ""); +} +#endif + +TEST_F(AudioFrameOperationsTest, StereoToMonoSucceeds) { + SetFrameData(4, 2, &frame_); + AudioFrameOperations::DownmixChannels(1, &frame_); + EXPECT_EQ(1u, frame_.num_channels_); + + AudioFrame mono_frame; + mono_frame.samples_per_channel_ = 320; + mono_frame.num_channels_ = 1; + SetFrameData(3, &mono_frame); + VerifyFramesAreEqual(mono_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, StereoToMonoMuted) { + ASSERT_TRUE(frame_.muted()); + AudioFrameOperations::DownmixChannels(1, &frame_); + EXPECT_EQ(1u, frame_.num_channels_); + EXPECT_TRUE(frame_.muted()); +} + +TEST_F(AudioFrameOperationsTest, StereoToMonoBufferSucceeds) { + AudioFrame target_frame; + SetFrameData(4, 2, &frame_); + + target_frame.num_channels_ = 1; + target_frame.samples_per_channel_ = frame_.samples_per_channel_; + + AudioFrameOperations::DownmixChannels(frame_.data(), 2, + frame_.samples_per_channel_, 1, + target_frame.mutable_data()); + + AudioFrame mono_frame; + mono_frame.samples_per_channel_ = 320; + mono_frame.num_channels_ = 1; + SetFrameData(3, &mono_frame); + VerifyFramesAreEqual(mono_frame, target_frame); +} + +TEST_F(AudioFrameOperationsTest, StereoToMonoDoesNotWrapAround) { + SetFrameData(-32768, -32768, &frame_); + AudioFrameOperations::DownmixChannels(1, &frame_); + EXPECT_EQ(1u, frame_.num_channels_); + AudioFrame mono_frame; + mono_frame.samples_per_channel_ = 320; + mono_frame.num_channels_ = 1; + SetFrameData(-32768, &mono_frame); + VerifyFramesAreEqual(mono_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, QuadToMonoSucceeds) { + frame_.num_channels_ = 4; + SetFrameData(4, 2, 6, 8, &frame_); + + AudioFrameOperations::DownmixChannels(1, &frame_); + EXPECT_EQ(1u, frame_.num_channels_); + + AudioFrame mono_frame; + mono_frame.samples_per_channel_ = 320; + mono_frame.num_channels_ = 1; + SetFrameData(5, &mono_frame); + VerifyFramesAreEqual(mono_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, QuadToMonoMuted) { + frame_.num_channels_ = 4; + ASSERT_TRUE(frame_.muted()); + AudioFrameOperations::DownmixChannels(1, &frame_); + EXPECT_EQ(1u, frame_.num_channels_); + EXPECT_TRUE(frame_.muted()); +} + +TEST_F(AudioFrameOperationsTest, QuadToMonoBufferSucceeds) { + AudioFrame target_frame; + frame_.num_channels_ = 4; + SetFrameData(4, 2, 6, 8, &frame_); + + target_frame.num_channels_ = 1; + target_frame.samples_per_channel_ = frame_.samples_per_channel_; + + AudioFrameOperations::DownmixChannels(frame_.data(), 4, + frame_.samples_per_channel_, 1, + target_frame.mutable_data()); + AudioFrame mono_frame; + mono_frame.samples_per_channel_ = 320; + mono_frame.num_channels_ = 1; + SetFrameData(5, &mono_frame); + VerifyFramesAreEqual(mono_frame, target_frame); +} + +TEST_F(AudioFrameOperationsTest, QuadToMonoDoesNotWrapAround) { + frame_.num_channels_ = 4; + SetFrameData(-32768, -32768, -32768, -32768, &frame_); + AudioFrameOperations::DownmixChannels(1, &frame_); + EXPECT_EQ(1u, frame_.num_channels_); + + AudioFrame mono_frame; + mono_frame.samples_per_channel_ = 320; + mono_frame.num_channels_ = 1; + SetFrameData(-32768, &mono_frame); + VerifyFramesAreEqual(mono_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, QuadToStereoFailsWithBadParameters) { + frame_.num_channels_ = 1; + EXPECT_EQ(-1, AudioFrameOperations::QuadToStereo(&frame_)); + frame_.num_channels_ = 2; + EXPECT_EQ(-1, AudioFrameOperations::QuadToStereo(&frame_)); +} + +TEST_F(AudioFrameOperationsTest, QuadToStereoSucceeds) { + frame_.num_channels_ = 4; + SetFrameData(4, 2, 6, 8, &frame_); + EXPECT_EQ(0, AudioFrameOperations::QuadToStereo(&frame_)); + + AudioFrame stereo_frame; + stereo_frame.samples_per_channel_ = 320; + stereo_frame.num_channels_ = 2; + SetFrameData(3, 7, &stereo_frame); + VerifyFramesAreEqual(stereo_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, QuadToStereoMuted) { + frame_.num_channels_ = 4; + ASSERT_TRUE(frame_.muted()); + EXPECT_EQ(0, AudioFrameOperations::QuadToStereo(&frame_)); + EXPECT_TRUE(frame_.muted()); +} + +TEST_F(AudioFrameOperationsTest, QuadToStereoBufferSucceeds) { + AudioFrame target_frame; + frame_.num_channels_ = 4; + SetFrameData(4, 2, 6, 8, &frame_); + + target_frame.num_channels_ = 2; + target_frame.samples_per_channel_ = frame_.samples_per_channel_; + + AudioFrameOperations::QuadToStereo(frame_.data(), frame_.samples_per_channel_, + target_frame.mutable_data()); + AudioFrame stereo_frame; + stereo_frame.samples_per_channel_ = 320; + stereo_frame.num_channels_ = 2; + SetFrameData(3, 7, &stereo_frame); + VerifyFramesAreEqual(stereo_frame, target_frame); +} + +TEST_F(AudioFrameOperationsTest, QuadToStereoDoesNotWrapAround) { + frame_.num_channels_ = 4; + SetFrameData(-32768, -32768, -32768, -32768, &frame_); + EXPECT_EQ(0, AudioFrameOperations::QuadToStereo(&frame_)); + + AudioFrame stereo_frame; + stereo_frame.samples_per_channel_ = 320; + stereo_frame.num_channels_ = 2; + SetFrameData(-32768, -32768, &stereo_frame); + VerifyFramesAreEqual(stereo_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, SwapStereoChannelsSucceedsOnStereo) { + SetFrameData(0, 1, &frame_); + + AudioFrame swapped_frame; + swapped_frame.samples_per_channel_ = 320; + swapped_frame.num_channels_ = 2; + SetFrameData(1, 0, &swapped_frame); + + AudioFrameOperations::SwapStereoChannels(&frame_); + VerifyFramesAreEqual(swapped_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, SwapStereoChannelsMuted) { + ASSERT_TRUE(frame_.muted()); + AudioFrameOperations::SwapStereoChannels(&frame_); + EXPECT_TRUE(frame_.muted()); +} + +TEST_F(AudioFrameOperationsTest, SwapStereoChannelsFailsOnMono) { + frame_.num_channels_ = 1; + // Set data to "stereo", despite it being a mono frame. + SetFrameData(0, 1, &frame_); + + AudioFrame orig_frame; + orig_frame.CopyFrom(frame_); + AudioFrameOperations::SwapStereoChannels(&frame_); + // Verify that no swap occurred. + VerifyFramesAreEqual(orig_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, MuteDisabled) { + SetFrameData(1000, -1000, &frame_); + AudioFrameOperations::Mute(&frame_, false, false); + + AudioFrame muted_frame; + muted_frame.samples_per_channel_ = 320; + muted_frame.num_channels_ = 2; + SetFrameData(1000, -1000, &muted_frame); + VerifyFramesAreEqual(muted_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, MuteEnabled) { + SetFrameData(1000, -1000, &frame_); + AudioFrameOperations::Mute(&frame_, true, true); + + AudioFrame muted_frame; + muted_frame.samples_per_channel_ = frame_.samples_per_channel_; + muted_frame.num_channels_ = frame_.num_channels_; + ASSERT_TRUE(muted_frame.muted()); + VerifyFramesAreEqual(muted_frame, frame_); +} + +// Verify that *beginning* to mute works for short and long (>128) frames, mono +// and stereo. Beginning mute should yield a ramp down to zero. +TEST_F(AudioFrameOperationsTest, MuteBeginMonoLong) { + InitFrame(&frame_, 1, 228, 1000, -1000); + AudioFrameOperations::Mute(&frame_, false, true); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + EXPECT_EQ(1000, GetChannelData(frame_, 0, 99)); + EXPECT_EQ(992, GetChannelData(frame_, 0, 100)); + EXPECT_EQ(7, GetChannelData(frame_, 0, 226)); + EXPECT_EQ(0, GetChannelData(frame_, 0, 227)); +} + +TEST_F(AudioFrameOperationsTest, MuteBeginMonoShort) { + InitFrame(&frame_, 1, 93, 1000, -1000); + AudioFrameOperations::Mute(&frame_, false, true); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + EXPECT_EQ(989, GetChannelData(frame_, 0, 0)); + EXPECT_EQ(978, GetChannelData(frame_, 0, 1)); + EXPECT_EQ(10, GetChannelData(frame_, 0, 91)); + EXPECT_EQ(0, GetChannelData(frame_, 0, 92)); +} + +TEST_F(AudioFrameOperationsTest, MuteBeginStereoLong) { + InitFrame(&frame_, 2, 228, 1000, -1000); + AudioFrameOperations::Mute(&frame_, false, true); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + VerifyFrameDataBounds(frame_, 1, 0, -1000); + EXPECT_EQ(1000, GetChannelData(frame_, 0, 99)); + EXPECT_EQ(-1000, GetChannelData(frame_, 1, 99)); + EXPECT_EQ(992, GetChannelData(frame_, 0, 100)); + EXPECT_EQ(-992, GetChannelData(frame_, 1, 100)); + EXPECT_EQ(7, GetChannelData(frame_, 0, 226)); + EXPECT_EQ(-7, GetChannelData(frame_, 1, 226)); + EXPECT_EQ(0, GetChannelData(frame_, 0, 227)); + EXPECT_EQ(0, GetChannelData(frame_, 1, 227)); +} + +TEST_F(AudioFrameOperationsTest, MuteBeginStereoShort) { + InitFrame(&frame_, 2, 93, 1000, -1000); + AudioFrameOperations::Mute(&frame_, false, true); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + VerifyFrameDataBounds(frame_, 1, 0, -1000); + EXPECT_EQ(989, GetChannelData(frame_, 0, 0)); + EXPECT_EQ(-989, GetChannelData(frame_, 1, 0)); + EXPECT_EQ(978, GetChannelData(frame_, 0, 1)); + EXPECT_EQ(-978, GetChannelData(frame_, 1, 1)); + EXPECT_EQ(10, GetChannelData(frame_, 0, 91)); + EXPECT_EQ(-10, GetChannelData(frame_, 1, 91)); + EXPECT_EQ(0, GetChannelData(frame_, 0, 92)); + EXPECT_EQ(0, GetChannelData(frame_, 1, 92)); +} + +// Verify that *ending* to mute works for short and long (>128) frames, mono +// and stereo. Ending mute should yield a ramp up from zero. +TEST_F(AudioFrameOperationsTest, MuteEndMonoLong) { + InitFrame(&frame_, 1, 228, 1000, -1000); + AudioFrameOperations::Mute(&frame_, true, false); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + EXPECT_EQ(7, GetChannelData(frame_, 0, 0)); + EXPECT_EQ(15, GetChannelData(frame_, 0, 1)); + EXPECT_EQ(1000, GetChannelData(frame_, 0, 127)); + EXPECT_EQ(1000, GetChannelData(frame_, 0, 128)); +} + +TEST_F(AudioFrameOperationsTest, MuteEndMonoShort) { + InitFrame(&frame_, 1, 93, 1000, -1000); + AudioFrameOperations::Mute(&frame_, true, false); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + EXPECT_EQ(10, GetChannelData(frame_, 0, 0)); + EXPECT_EQ(21, GetChannelData(frame_, 0, 1)); + EXPECT_EQ(989, GetChannelData(frame_, 0, 91)); + EXPECT_EQ(999, GetChannelData(frame_, 0, 92)); +} + +TEST_F(AudioFrameOperationsTest, MuteEndStereoLong) { + InitFrame(&frame_, 2, 228, 1000, -1000); + AudioFrameOperations::Mute(&frame_, true, false); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + VerifyFrameDataBounds(frame_, 1, 0, -1000); + EXPECT_EQ(7, GetChannelData(frame_, 0, 0)); + EXPECT_EQ(-7, GetChannelData(frame_, 1, 0)); + EXPECT_EQ(15, GetChannelData(frame_, 0, 1)); + EXPECT_EQ(-15, GetChannelData(frame_, 1, 1)); + EXPECT_EQ(1000, GetChannelData(frame_, 0, 127)); + EXPECT_EQ(-1000, GetChannelData(frame_, 1, 127)); + EXPECT_EQ(1000, GetChannelData(frame_, 0, 128)); + EXPECT_EQ(-1000, GetChannelData(frame_, 1, 128)); +} + +TEST_F(AudioFrameOperationsTest, MuteEndStereoShort) { + InitFrame(&frame_, 2, 93, 1000, -1000); + AudioFrameOperations::Mute(&frame_, true, false); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + VerifyFrameDataBounds(frame_, 1, 0, -1000); + EXPECT_EQ(10, GetChannelData(frame_, 0, 0)); + EXPECT_EQ(-10, GetChannelData(frame_, 1, 0)); + EXPECT_EQ(21, GetChannelData(frame_, 0, 1)); + EXPECT_EQ(-21, GetChannelData(frame_, 1, 1)); + EXPECT_EQ(989, GetChannelData(frame_, 0, 91)); + EXPECT_EQ(-989, GetChannelData(frame_, 1, 91)); + EXPECT_EQ(999, GetChannelData(frame_, 0, 92)); + EXPECT_EQ(-999, GetChannelData(frame_, 1, 92)); +} + +TEST_F(AudioFrameOperationsTest, MuteBeginAlreadyMuted) { + ASSERT_TRUE(frame_.muted()); + AudioFrameOperations::Mute(&frame_, false, true); + EXPECT_TRUE(frame_.muted()); +} + +TEST_F(AudioFrameOperationsTest, MuteEndAlreadyMuted) { + ASSERT_TRUE(frame_.muted()); + AudioFrameOperations::Mute(&frame_, true, false); + EXPECT_TRUE(frame_.muted()); +} + +TEST_F(AudioFrameOperationsTest, ApplyHalfGainSucceeds) { + SetFrameData(2, &frame_); + + AudioFrame half_gain_frame; + half_gain_frame.num_channels_ = frame_.num_channels_; + half_gain_frame.samples_per_channel_ = frame_.samples_per_channel_; + SetFrameData(1, &half_gain_frame); + + AudioFrameOperations::ApplyHalfGain(&frame_); + VerifyFramesAreEqual(half_gain_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, ApplyHalfGainMuted) { + ASSERT_TRUE(frame_.muted()); + AudioFrameOperations::ApplyHalfGain(&frame_); + EXPECT_TRUE(frame_.muted()); +} + +// TODO(andrew): should not allow negative scales. +TEST_F(AudioFrameOperationsTest, DISABLED_ScaleFailsWithBadParameters) { + frame_.num_channels_ = 1; + EXPECT_EQ(-1, AudioFrameOperations::Scale(1.0, 1.0, &frame_)); + + frame_.num_channels_ = 3; + EXPECT_EQ(-1, AudioFrameOperations::Scale(1.0, 1.0, &frame_)); + + frame_.num_channels_ = 2; + EXPECT_EQ(-1, AudioFrameOperations::Scale(-1.0, 1.0, &frame_)); + EXPECT_EQ(-1, AudioFrameOperations::Scale(1.0, -1.0, &frame_)); +} + +// TODO(andrew): fix the wraparound bug. We should always saturate. +TEST_F(AudioFrameOperationsTest, DISABLED_ScaleDoesNotWrapAround) { + SetFrameData(4000, -4000, &frame_); + EXPECT_EQ(0, AudioFrameOperations::Scale(10.0, 10.0, &frame_)); + + AudioFrame clipped_frame; + clipped_frame.samples_per_channel_ = 320; + clipped_frame.num_channels_ = 2; + SetFrameData(32767, -32768, &clipped_frame); + VerifyFramesAreEqual(clipped_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, ScaleSucceeds) { + SetFrameData(1, -1, &frame_); + EXPECT_EQ(0, AudioFrameOperations::Scale(2.0, 3.0, &frame_)); + + AudioFrame scaled_frame; + scaled_frame.samples_per_channel_ = 320; + scaled_frame.num_channels_ = 2; + SetFrameData(2, -3, &scaled_frame); + VerifyFramesAreEqual(scaled_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, ScaleMuted) { + ASSERT_TRUE(frame_.muted()); + EXPECT_EQ(0, AudioFrameOperations::Scale(2.0, 3.0, &frame_)); + EXPECT_TRUE(frame_.muted()); +} + +// TODO(andrew): should fail with a negative scale. +TEST_F(AudioFrameOperationsTest, DISABLED_ScaleWithSatFailsWithBadParameters) { + EXPECT_EQ(-1, AudioFrameOperations::ScaleWithSat(-1.0, &frame_)); +} + +TEST_F(AudioFrameOperationsTest, ScaleWithSatDoesNotWrapAround) { + frame_.num_channels_ = 1; + SetFrameData(4000, &frame_); + EXPECT_EQ(0, AudioFrameOperations::ScaleWithSat(10.0, &frame_)); + + AudioFrame clipped_frame; + clipped_frame.samples_per_channel_ = 320; + clipped_frame.num_channels_ = 1; + SetFrameData(32767, &clipped_frame); + VerifyFramesAreEqual(clipped_frame, frame_); + + SetFrameData(-4000, &frame_); + EXPECT_EQ(0, AudioFrameOperations::ScaleWithSat(10.0, &frame_)); + SetFrameData(-32768, &clipped_frame); + VerifyFramesAreEqual(clipped_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, ScaleWithSatSucceeds) { + frame_.num_channels_ = 1; + SetFrameData(1, &frame_); + EXPECT_EQ(0, AudioFrameOperations::ScaleWithSat(2.0, &frame_)); + + AudioFrame scaled_frame; + scaled_frame.samples_per_channel_ = 320; + scaled_frame.num_channels_ = 1; + SetFrameData(2, &scaled_frame); + VerifyFramesAreEqual(scaled_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, ScaleWithSatMuted) { + ASSERT_TRUE(frame_.muted()); + EXPECT_EQ(0, AudioFrameOperations::ScaleWithSat(2.0, &frame_)); + EXPECT_TRUE(frame_.muted()); +} + +TEST_F(AudioFrameOperationsTest, AddingXToEmptyGivesX) { + // When samples_per_channel_ is 0, the frame counts as empty and zero. + AudioFrame frame_to_add_to; + frame_to_add_to.mutable_data(); // Unmute the frame. + ASSERT_FALSE(frame_to_add_to.muted()); + frame_to_add_to.samples_per_channel_ = 0; + frame_to_add_to.num_channels_ = frame_.num_channels_; + + SetFrameData(1000, &frame_); + AudioFrameOperations::Add(frame_, &frame_to_add_to); + VerifyFramesAreEqual(frame_, frame_to_add_to); +} + +TEST_F(AudioFrameOperationsTest, AddingXToMutedGivesX) { + AudioFrame frame_to_add_to; + ASSERT_TRUE(frame_to_add_to.muted()); + frame_to_add_to.samples_per_channel_ = frame_.samples_per_channel_; + frame_to_add_to.num_channels_ = frame_.num_channels_; + + SetFrameData(1000, &frame_); + AudioFrameOperations::Add(frame_, &frame_to_add_to); + VerifyFramesAreEqual(frame_, frame_to_add_to); +} + +TEST_F(AudioFrameOperationsTest, AddingMutedToXGivesX) { + AudioFrame frame_to_add_to; + frame_to_add_to.samples_per_channel_ = frame_.samples_per_channel_; + frame_to_add_to.num_channels_ = frame_.num_channels_; + SetFrameData(1000, &frame_to_add_to); + + AudioFrame frame_copy; + frame_copy.CopyFrom(frame_to_add_to); + + ASSERT_TRUE(frame_.muted()); + AudioFrameOperations::Add(frame_, &frame_to_add_to); + VerifyFramesAreEqual(frame_copy, frame_to_add_to); +} + +TEST_F(AudioFrameOperationsTest, AddingTwoFramesProducesTheirSum) { + AudioFrame frame_to_add_to; + frame_to_add_to.samples_per_channel_ = frame_.samples_per_channel_; + frame_to_add_to.num_channels_ = frame_.num_channels_; + SetFrameData(1000, &frame_to_add_to); + SetFrameData(2000, &frame_); + + AudioFrameOperations::Add(frame_, &frame_to_add_to); + SetFrameData(frame_.data()[0] + 1000, &frame_); + VerifyFramesAreEqual(frame_, frame_to_add_to); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/utility/channel_mixer.cc b/third_party/libwebrtc/audio/utility/channel_mixer.cc new file mode 100644 index 0000000000..0f1e663873 --- /dev/null +++ b/third_party/libwebrtc/audio/utility/channel_mixer.cc @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/utility/channel_mixer.h" + +#include "audio/utility/channel_mixing_matrix.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +ChannelMixer::ChannelMixer(ChannelLayout input_layout, + ChannelLayout output_layout) + : input_layout_(input_layout), + output_layout_(output_layout), + input_channels_(ChannelLayoutToChannelCount(input_layout)), + output_channels_(ChannelLayoutToChannelCount(output_layout)) { + // Create the transformation matrix. + ChannelMixingMatrix matrix_builder(input_layout_, input_channels_, + output_layout_, output_channels_); + remapping_ = matrix_builder.CreateTransformationMatrix(&matrix_); +} + +ChannelMixer::~ChannelMixer() = default; + +void ChannelMixer::Transform(AudioFrame* frame) { + RTC_DCHECK(frame); + RTC_DCHECK_EQ(matrix_[0].size(), static_cast<size_t>(input_channels_)); + RTC_DCHECK_EQ(matrix_.size(), static_cast<size_t>(output_channels_)); + + // Leave the audio frame intact if the channel layouts for in and out are + // identical. + if (input_layout_ == output_layout_) { + return; + } + + if (IsUpMixing()) { + RTC_CHECK_LE(frame->samples_per_channel() * output_channels_, + frame->max_16bit_samples()); + } + + // Only change the number of output channels if the audio frame is muted. + if (frame->muted()) { + frame->num_channels_ = output_channels_; + frame->channel_layout_ = output_layout_; + return; + } + + const int16_t* in_audio = frame->data(); + + // Only allocate fresh memory at first access or if the required size has + // increased. + // TODO(henrika): we might be able to do downmixing in-place and thereby avoid + // extra memory allocation and a memcpy. + const size_t num_elements = frame->samples_per_channel() * output_channels_; + if (audio_vector_ == nullptr || num_elements > audio_vector_size_) { + audio_vector_.reset(new int16_t[num_elements]); + audio_vector_size_ = num_elements; + } + int16_t* out_audio = audio_vector_.get(); + + // Modify the number of channels by creating a weighted sum of input samples + // where the weights (scale factors) for each output sample are given by the + // transformation matrix. + for (size_t i = 0; i < frame->samples_per_channel(); i++) { + for (size_t output_ch = 0; output_ch < output_channels_; ++output_ch) { + float acc_value = 0.0f; + for (size_t input_ch = 0; input_ch < input_channels_; ++input_ch) { + const float scale = matrix_[output_ch][input_ch]; + // Scale should always be positive. + RTC_DCHECK_GE(scale, 0); + // Each output sample is a weighted sum of input samples. + acc_value += scale * in_audio[i * input_channels_ + input_ch]; + } + const size_t index = output_channels_ * i + output_ch; + RTC_CHECK_LE(index, audio_vector_size_); + out_audio[index] = rtc::saturated_cast<int16_t>(acc_value); + } + } + + // Update channel information. + frame->num_channels_ = output_channels_; + frame->channel_layout_ = output_layout_; + + // Copy the output result to the audio frame in `frame`. + memcpy( + frame->mutable_data(), out_audio, + sizeof(int16_t) * frame->samples_per_channel() * frame->num_channels()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/utility/channel_mixer.h b/third_party/libwebrtc/audio/utility/channel_mixer.h new file mode 100644 index 0000000000..2dea8eb45b --- /dev/null +++ b/third_party/libwebrtc/audio/utility/channel_mixer.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_UTILITY_CHANNEL_MIXER_H_ +#define AUDIO_UTILITY_CHANNEL_MIXER_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <memory> +#include <vector> + +#include "api/audio/audio_frame.h" +#include "api/audio/channel_layout.h" + +namespace webrtc { + +// ChannelMixer is for converting audio between channel layouts. The conversion +// matrix is built upon construction and used during each Transform() call. The +// algorithm works by generating a conversion matrix mapping each output channel +// to list of input channels. The transform renders all of the output channels, +// with each output channel rendered according to a weighted sum of the relevant +// input channels as defined in the matrix. +// This file is derived from Chromium's media/base/channel_mixer.h. +class ChannelMixer { + public: + // To mix two channels into one and preserve loudness, we must apply + // (1 / sqrt(2)) gain to each. + static constexpr float kHalfPower = 0.707106781186547524401f; + + ChannelMixer(ChannelLayout input_layout, ChannelLayout output_layout); + ~ChannelMixer(); + + // Transforms all input channels corresponding to the selected `input_layout` + // to the number of channels in the selected `output_layout`. + // Example usage (downmix from stereo to mono): + // + // ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_MONO); + // AudioFrame frame; + // frame.samples_per_channel_ = 160; + // frame.num_channels_ = 2; + // EXPECT_EQ(2u, frame.channels()); + // mixer.Transform(&frame); + // EXPECT_EQ(1u, frame.channels()); + // + void Transform(AudioFrame* frame); + + private: + bool IsUpMixing() const { return output_channels_ > input_channels_; } + + // Selected channel layouts. + const ChannelLayout input_layout_; + const ChannelLayout output_layout_; + + // Channel counts for input and output. + const size_t input_channels_; + const size_t output_channels_; + + // 2D matrix of output channels to input channels. + std::vector<std::vector<float> > matrix_; + + // 1D array used as temporary storage during the transformation. + std::unique_ptr<int16_t[]> audio_vector_; + + // Number of elements allocated for `audio_vector_`. + size_t audio_vector_size_ = 0; + + // Optimization case for when we can simply remap the input channels to output + // channels, i.e., when all scaling factors in `matrix_` equals 1.0. + bool remapping_; + + // Delete the copy constructor and assignment operator. + ChannelMixer(const ChannelMixer& other) = delete; + ChannelMixer& operator=(const ChannelMixer& other) = delete; +}; + +} // namespace webrtc + +#endif // AUDIO_UTILITY_CHANNEL_MIXER_H_ diff --git a/third_party/libwebrtc/audio/utility/channel_mixer_unittest.cc b/third_party/libwebrtc/audio/utility/channel_mixer_unittest.cc new file mode 100644 index 0000000000..94cb1ac7e3 --- /dev/null +++ b/third_party/libwebrtc/audio/utility/channel_mixer_unittest.cc @@ -0,0 +1,393 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/utility/channel_mixer.h" + +#include <memory> + +#include "api/audio/audio_frame.h" +#include "api/audio/channel_layout.h" +#include "audio/utility/channel_mixing_matrix.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +constexpr uint32_t kTimestamp = 27; +constexpr int kSampleRateHz = 16000; +constexpr size_t kSamplesPerChannel = kSampleRateHz / 100; + +class ChannelMixerTest : public ::testing::Test { + protected: + ChannelMixerTest() { + // Use 10ms audio frames by default. Don't set values yet. + frame_.samples_per_channel_ = kSamplesPerChannel; + frame_.sample_rate_hz_ = kSampleRateHz; + EXPECT_TRUE(frame_.muted()); + } + + virtual ~ChannelMixerTest() {} + + AudioFrame frame_; +}; + +void SetFrameData(int16_t data, AudioFrame* frame) { + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel() * frame->num_channels(); + i++) { + frame_data[i] = data; + } +} + +void SetMonoData(int16_t center, AudioFrame* frame) { + frame->num_channels_ = 1; + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel(); ++i) { + frame_data[i] = center; + } + EXPECT_FALSE(frame->muted()); +} + +void SetStereoData(int16_t left, int16_t right, AudioFrame* frame) { + ASSERT_LE(2 * frame->samples_per_channel(), frame->max_16bit_samples()); + frame->num_channels_ = 2; + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel() * 2; i += 2) { + frame_data[i] = left; + frame_data[i + 1] = right; + } + EXPECT_FALSE(frame->muted()); +} + +void SetFiveOneData(int16_t front_left, + int16_t front_right, + int16_t center, + int16_t lfe, + int16_t side_left, + int16_t side_right, + AudioFrame* frame) { + ASSERT_LE(6 * frame->samples_per_channel(), frame->max_16bit_samples()); + frame->num_channels_ = 6; + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel() * 6; i += 6) { + frame_data[i] = front_left; + frame_data[i + 1] = front_right; + frame_data[i + 2] = center; + frame_data[i + 3] = lfe; + frame_data[i + 4] = side_left; + frame_data[i + 5] = side_right; + } + EXPECT_FALSE(frame->muted()); +} + +void SetSevenOneData(int16_t front_left, + int16_t front_right, + int16_t center, + int16_t lfe, + int16_t side_left, + int16_t side_right, + int16_t back_left, + int16_t back_right, + AudioFrame* frame) { + ASSERT_LE(8 * frame->samples_per_channel(), frame->max_16bit_samples()); + frame->num_channels_ = 8; + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel() * 8; i += 8) { + frame_data[i] = front_left; + frame_data[i + 1] = front_right; + frame_data[i + 2] = center; + frame_data[i + 3] = lfe; + frame_data[i + 4] = side_left; + frame_data[i + 5] = side_right; + frame_data[i + 6] = back_left; + frame_data[i + 7] = back_right; + } + EXPECT_FALSE(frame->muted()); +} + +bool AllSamplesEquals(int16_t sample, const AudioFrame* frame) { + const int16_t* frame_data = frame->data(); + for (size_t i = 0; i < frame->samples_per_channel() * frame->num_channels(); + i++) { + if (frame_data[i] != sample) { + return false; + } + } + return true; +} + +void VerifyFramesAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) { + EXPECT_EQ(frame1.num_channels(), frame2.num_channels()); + EXPECT_EQ(frame1.samples_per_channel(), frame2.samples_per_channel()); + const int16_t* frame1_data = frame1.data(); + const int16_t* frame2_data = frame2.data(); + for (size_t i = 0; i < frame1.samples_per_channel() * frame1.num_channels(); + i++) { + EXPECT_EQ(frame1_data[i], frame2_data[i]); + } + EXPECT_EQ(frame1.muted(), frame2.muted()); +} + +} // namespace + +// Test all possible layout conversions can be constructed and mixed. Don't +// care about the actual content, simply run through all mixing combinations +// and ensure that nothing fails. +TEST_F(ChannelMixerTest, ConstructAllPossibleLayouts) { + for (ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + input_layout <= CHANNEL_LAYOUT_MAX; + input_layout = static_cast<ChannelLayout>(input_layout + 1)) { + for (ChannelLayout output_layout = CHANNEL_LAYOUT_MONO; + output_layout <= CHANNEL_LAYOUT_MAX; + output_layout = static_cast<ChannelLayout>(output_layout + 1)) { + // DISCRETE, BITSTREAM can't be tested here based on the current approach. + // CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC is not mixable. + // Stereo down mix should never be the output layout. + if (input_layout == CHANNEL_LAYOUT_BITSTREAM || + input_layout == CHANNEL_LAYOUT_DISCRETE || + input_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC || + output_layout == CHANNEL_LAYOUT_BITSTREAM || + output_layout == CHANNEL_LAYOUT_DISCRETE || + output_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC || + output_layout == CHANNEL_LAYOUT_STEREO_DOWNMIX) { + continue; + } + + rtc::StringBuilder ss; + ss << "Input Layout: " << input_layout + << ", Output Layout: " << output_layout; + SCOPED_TRACE(ss.str()); + ChannelMixer mixer(input_layout, output_layout); + + frame_.UpdateFrame(kTimestamp, nullptr, kSamplesPerChannel, kSampleRateHz, + AudioFrame::kNormalSpeech, AudioFrame::kVadActive, + ChannelLayoutToChannelCount(input_layout)); + EXPECT_TRUE(frame_.muted()); + mixer.Transform(&frame_); + } + } +} + +// Ensure that the audio frame is untouched when input and output channel +// layouts are identical, i.e., the transformation should have no effect. +// Exclude invalid mixing combinations. +TEST_F(ChannelMixerTest, NoMixingForIdenticalChannelLayouts) { + for (ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + input_layout <= CHANNEL_LAYOUT_MAX; + input_layout = static_cast<ChannelLayout>(input_layout + 1)) { + for (ChannelLayout output_layout = CHANNEL_LAYOUT_MONO; + output_layout <= CHANNEL_LAYOUT_MAX; + output_layout = static_cast<ChannelLayout>(output_layout + 1)) { + if (input_layout != output_layout || + input_layout == CHANNEL_LAYOUT_BITSTREAM || + input_layout == CHANNEL_LAYOUT_DISCRETE || + input_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC || + output_layout == CHANNEL_LAYOUT_STEREO_DOWNMIX) { + continue; + } + ChannelMixer mixer(input_layout, output_layout); + frame_.num_channels_ = ChannelLayoutToChannelCount(input_layout); + SetFrameData(99, &frame_); + mixer.Transform(&frame_); + EXPECT_EQ(ChannelLayoutToChannelCount(input_layout), + static_cast<int>(frame_.num_channels())); + EXPECT_TRUE(AllSamplesEquals(99, &frame_)); + } + } +} + +TEST_F(ChannelMixerTest, StereoToMono) { + ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_MONO); + // + // Input: stereo + // LEFT RIGHT + // Output: mono CENTER 0.5 0.5 + // + SetStereoData(7, 3, &frame_); + EXPECT_EQ(2u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(1u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout()); + + AudioFrame mono_frame; + mono_frame.samples_per_channel_ = frame_.samples_per_channel(); + SetMonoData(5, &mono_frame); + VerifyFramesAreEqual(mono_frame, frame_); + + SetStereoData(-32768, -32768, &frame_); + EXPECT_EQ(2u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(1u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout()); + SetMonoData(-32768, &mono_frame); + VerifyFramesAreEqual(mono_frame, frame_); +} + +TEST_F(ChannelMixerTest, StereoToMonoMuted) { + ASSERT_TRUE(frame_.muted()); + ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_MONO); + mixer.Transform(&frame_); + EXPECT_EQ(1u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout()); + EXPECT_TRUE(frame_.muted()); +} + +TEST_F(ChannelMixerTest, FiveOneToSevenOneMuted) { + ASSERT_TRUE(frame_.muted()); + ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_7_1); + mixer.Transform(&frame_); + EXPECT_EQ(8u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout()); + EXPECT_TRUE(frame_.muted()); +} + +TEST_F(ChannelMixerTest, FiveOneToMono) { + ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_MONO); + // + // Input: 5.1 + // LEFT RIGHT CENTER LFE SIDE_LEFT SIDE_RIGHT + // Output: mono CENTER 0.707 0.707 1 0.707 0.707 0.707 + // + // a = [10, 20, 15, 2, 5, 5] + // b = [1/sqrt(2), 1/sqrt(2), 1.0, 1/sqrt(2), 1/sqrt(2), 1/sqrt(2)] => + // a * b (dot product) = 44.69848480983499, + // which is truncated into 44 using 16 bit representation. + // + SetFiveOneData(10, 20, 15, 2, 5, 5, &frame_); + EXPECT_EQ(6u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(1u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout()); + + AudioFrame mono_frame; + mono_frame.samples_per_channel_ = frame_.samples_per_channel(); + SetMonoData(44, &mono_frame); + VerifyFramesAreEqual(mono_frame, frame_); + + SetFiveOneData(-32768, -32768, -32768, -32768, -32768, -32768, &frame_); + EXPECT_EQ(6u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(1u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout()); + SetMonoData(-32768, &mono_frame); + VerifyFramesAreEqual(mono_frame, frame_); +} + +TEST_F(ChannelMixerTest, FiveOneToSevenOne) { + ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_7_1); + // + // Input: 5.1 + // LEFT RIGHT CENTER LFE SIDE_LEFT SIDE_RIGHT + // Output: 7.1 LEFT 1 0 0 0 0 0 + // RIGHT 0 1 0 0 0 0 + // CENTER 0 0 1 0 0 0 + // LFE 0 0 0 1 0 0 + // SIDE_LEFT 0 0 0 0 1 0 + // SIDE_RIGHT 0 0 0 0 0 1 + // BACK_LEFT 0 0 0 0 0 0 + // BACK_RIGHT 0 0 0 0 0 0 + // + SetFiveOneData(10, 20, 15, 2, 5, 5, &frame_); + EXPECT_EQ(6u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(8u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout()); + + AudioFrame seven_one_frame; + seven_one_frame.samples_per_channel_ = frame_.samples_per_channel(); + SetSevenOneData(10, 20, 15, 2, 5, 5, 0, 0, &seven_one_frame); + VerifyFramesAreEqual(seven_one_frame, frame_); + + SetFiveOneData(-32768, 32767, -32768, 32767, -32768, 32767, &frame_); + EXPECT_EQ(6u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(8u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout()); + SetSevenOneData(-32768, 32767, -32768, 32767, -32768, 32767, 0, 0, + &seven_one_frame); + VerifyFramesAreEqual(seven_one_frame, frame_); +} + +TEST_F(ChannelMixerTest, FiveOneBackToStereo) { + ChannelMixer mixer(CHANNEL_LAYOUT_5_1_BACK, CHANNEL_LAYOUT_STEREO); + // + // Input: 5.1 + // LEFT RIGHT CENTER LFE BACK_LEFT BACK_RIGHT + // Output: stereo LEFT 1 0 0.707 0.707 0.707 0 + // RIGHT 0 1 0.707 0.707 0 0.707 + // + SetFiveOneData(20, 30, 15, 2, 5, 5, &frame_); + EXPECT_EQ(6u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(2u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout()); + + AudioFrame stereo_frame; + stereo_frame.samples_per_channel_ = frame_.samples_per_channel(); + SetStereoData(35, 45, &stereo_frame); + VerifyFramesAreEqual(stereo_frame, frame_); + + SetFiveOneData(-32768, -32768, -32768, -32768, -32768, -32768, &frame_); + EXPECT_EQ(6u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(2u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout()); + SetStereoData(-32768, -32768, &stereo_frame); + VerifyFramesAreEqual(stereo_frame, frame_); +} + +TEST_F(ChannelMixerTest, MonoToStereo) { + ChannelMixer mixer(CHANNEL_LAYOUT_MONO, CHANNEL_LAYOUT_STEREO); + // + // Input: mono + // CENTER + // Output: stereo LEFT 1 + // RIGHT 1 + // + SetMonoData(44, &frame_); + EXPECT_EQ(1u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(2u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout()); + + AudioFrame stereo_frame; + stereo_frame.samples_per_channel_ = frame_.samples_per_channel(); + SetStereoData(44, 44, &stereo_frame); + VerifyFramesAreEqual(stereo_frame, frame_); +} + +TEST_F(ChannelMixerTest, StereoToFiveOne) { + ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_5_1); + // + // Input: Stereo + // LEFT RIGHT + // Output: 5.1 LEFT 1 0 + // RIGHT 0 1 + // CENTER 0 0 + // LFE 0 0 + // SIDE_LEFT 0 0 + // SIDE_RIGHT 0 0 + // + SetStereoData(50, 60, &frame_); + EXPECT_EQ(2u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(6u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_5_1, frame_.channel_layout()); + + AudioFrame five_one_frame; + five_one_frame.samples_per_channel_ = frame_.samples_per_channel(); + SetFiveOneData(50, 60, 0, 0, 0, 0, &five_one_frame); + VerifyFramesAreEqual(five_one_frame, frame_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/utility/channel_mixing_matrix.cc b/third_party/libwebrtc/audio/utility/channel_mixing_matrix.cc new file mode 100644 index 0000000000..1244653f63 --- /dev/null +++ b/third_party/libwebrtc/audio/utility/channel_mixing_matrix.cc @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/utility/channel_mixing_matrix.h" + +#include <stddef.h> + +#include <algorithm> + +#include "audio/utility/channel_mixer.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { + +namespace { + +// Selects the default usage of VoIP channel mapping adjustments. +bool UseChannelMappingAdjustmentsByDefault() { + return !field_trial::IsEnabled( + "WebRTC-VoIPChannelRemixingAdjustmentKillSwitch"); +} + +} // namespace + +static void ValidateLayout(ChannelLayout layout) { + RTC_CHECK_NE(layout, CHANNEL_LAYOUT_NONE); + RTC_CHECK_LE(layout, CHANNEL_LAYOUT_MAX); + RTC_CHECK_NE(layout, CHANNEL_LAYOUT_UNSUPPORTED); + RTC_CHECK_NE(layout, CHANNEL_LAYOUT_DISCRETE); + RTC_CHECK_NE(layout, CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC); + + // Verify there's at least one channel. Should always be true here by virtue + // of not being one of the invalid layouts, but lets double check to be sure. + int channel_count = ChannelLayoutToChannelCount(layout); + RTC_DCHECK_GT(channel_count, 0); + + // If we have more than one channel, verify a symmetric layout for sanity. + // The unit test will verify all possible layouts, so this can be a DCHECK. + // Symmetry allows simplifying the matrix building code by allowing us to + // assume that if one channel of a pair exists, the other will too. + if (channel_count > 1) { + // Assert that LEFT exists if and only if RIGHT exists, and so on. + RTC_DCHECK_EQ(ChannelOrder(layout, LEFT) >= 0, + ChannelOrder(layout, RIGHT) >= 0); + RTC_DCHECK_EQ(ChannelOrder(layout, SIDE_LEFT) >= 0, + ChannelOrder(layout, SIDE_RIGHT) >= 0); + RTC_DCHECK_EQ(ChannelOrder(layout, BACK_LEFT) >= 0, + ChannelOrder(layout, BACK_RIGHT) >= 0); + RTC_DCHECK_EQ(ChannelOrder(layout, LEFT_OF_CENTER) >= 0, + ChannelOrder(layout, RIGHT_OF_CENTER) >= 0); + } else { + RTC_DCHECK_EQ(layout, CHANNEL_LAYOUT_MONO); + } +} + +ChannelMixingMatrix::ChannelMixingMatrix(ChannelLayout input_layout, + int input_channels, + ChannelLayout output_layout, + int output_channels) + : use_voip_channel_mapping_adjustments_( + UseChannelMappingAdjustmentsByDefault()), + input_layout_(input_layout), + input_channels_(input_channels), + output_layout_(output_layout), + output_channels_(output_channels) { + // Stereo down mix should never be the output layout. + RTC_CHECK_NE(output_layout, CHANNEL_LAYOUT_STEREO_DOWNMIX); + + // Verify that the layouts are supported + if (input_layout != CHANNEL_LAYOUT_DISCRETE) + ValidateLayout(input_layout); + if (output_layout != CHANNEL_LAYOUT_DISCRETE) + ValidateLayout(output_layout); + + // Special case for 5.0, 5.1 with back channels when upmixed to 7.0, 7.1, + // which should map the back LR to side LR. + if (input_layout_ == CHANNEL_LAYOUT_5_0_BACK && + output_layout_ == CHANNEL_LAYOUT_7_0) { + input_layout_ = CHANNEL_LAYOUT_5_0; + } else if (input_layout_ == CHANNEL_LAYOUT_5_1_BACK && + output_layout_ == CHANNEL_LAYOUT_7_1) { + input_layout_ = CHANNEL_LAYOUT_5_1; + } +} + +ChannelMixingMatrix::~ChannelMixingMatrix() = default; + +bool ChannelMixingMatrix::CreateTransformationMatrix( + std::vector<std::vector<float>>* matrix) { + matrix_ = matrix; + + // Size out the initial matrix. + matrix_->reserve(output_channels_); + for (int output_ch = 0; output_ch < output_channels_; ++output_ch) + matrix_->push_back(std::vector<float>(input_channels_, 0)); + + // First check for discrete case. + if (input_layout_ == CHANNEL_LAYOUT_DISCRETE || + output_layout_ == CHANNEL_LAYOUT_DISCRETE) { + // If the number of input channels is more than output channels, then + // copy as many as we can then drop the remaining input channels. + // If the number of input channels is less than output channels, then + // copy them all, then zero out the remaining output channels. + int passthrough_channels = std::min(input_channels_, output_channels_); + for (int i = 0; i < passthrough_channels; ++i) + (*matrix_)[i][i] = 1; + + return true; + } + + // If specified, use adjusted channel mapping for the VoIP scenario. + if (use_voip_channel_mapping_adjustments_ && + input_layout_ == CHANNEL_LAYOUT_MONO && + ChannelLayoutToChannelCount(output_layout_) >= 2) { + // Only place the mono input in the front left and right channels. + (*matrix_)[0][0] = 1.f; + (*matrix_)[1][0] = 1.f; + + for (size_t output_ch = 2; output_ch < matrix_->size(); ++output_ch) { + (*matrix_)[output_ch][0] = 0.f; + } + return true; + } + + // Route matching channels and figure out which ones aren't accounted for. + for (Channels ch = LEFT; ch < CHANNELS_MAX + 1; + ch = static_cast<Channels>(ch + 1)) { + int input_ch_index = ChannelOrder(input_layout_, ch); + if (input_ch_index < 0) + continue; + + int output_ch_index = ChannelOrder(output_layout_, ch); + if (output_ch_index < 0) { + unaccounted_inputs_.push_back(ch); + continue; + } + + RTC_DCHECK_LT(static_cast<size_t>(output_ch_index), matrix_->size()); + RTC_DCHECK_LT(static_cast<size_t>(input_ch_index), + (*matrix_)[output_ch_index].size()); + (*matrix_)[output_ch_index][input_ch_index] = 1; + } + + // If all input channels are accounted for, there's nothing left to do. + if (unaccounted_inputs_.empty()) { + // Since all output channels map directly to inputs we can optimize. + return true; + } + + // Mix front LR into center. + if (IsUnaccounted(LEFT)) { + // When down mixing to mono from stereo, we need to be careful of full scale + // stereo mixes. Scaling by 1 / sqrt(2) here will likely lead to clipping + // so we use 1 / 2 instead. + float scale = + (output_layout_ == CHANNEL_LAYOUT_MONO && input_channels_ == 2) + ? 0.5 + : ChannelMixer::kHalfPower; + Mix(LEFT, CENTER, scale); + Mix(RIGHT, CENTER, scale); + } + + // Mix center into front LR. + if (IsUnaccounted(CENTER)) { + // When up mixing from mono, just do a copy to front LR. + float scale = + (input_layout_ == CHANNEL_LAYOUT_MONO) ? 1 : ChannelMixer::kHalfPower; + MixWithoutAccounting(CENTER, LEFT, scale); + Mix(CENTER, RIGHT, scale); + } + + // Mix back LR into: side LR || back center || front LR || front center. + if (IsUnaccounted(BACK_LEFT)) { + if (HasOutputChannel(SIDE_LEFT)) { + // If the input has side LR, mix back LR into side LR, but instead if the + // input doesn't have side LR (but output does) copy back LR to side LR. + float scale = HasInputChannel(SIDE_LEFT) ? ChannelMixer::kHalfPower : 1; + Mix(BACK_LEFT, SIDE_LEFT, scale); + Mix(BACK_RIGHT, SIDE_RIGHT, scale); + } else if (HasOutputChannel(BACK_CENTER)) { + // Mix back LR into back center. + Mix(BACK_LEFT, BACK_CENTER, ChannelMixer::kHalfPower); + Mix(BACK_RIGHT, BACK_CENTER, ChannelMixer::kHalfPower); + } else if (output_layout_ > CHANNEL_LAYOUT_MONO) { + // Mix back LR into front LR. + Mix(BACK_LEFT, LEFT, ChannelMixer::kHalfPower); + Mix(BACK_RIGHT, RIGHT, ChannelMixer::kHalfPower); + } else { + // Mix back LR into front center. + Mix(BACK_LEFT, CENTER, ChannelMixer::kHalfPower); + Mix(BACK_RIGHT, CENTER, ChannelMixer::kHalfPower); + } + } + + // Mix side LR into: back LR || back center || front LR || front center. + if (IsUnaccounted(SIDE_LEFT)) { + if (HasOutputChannel(BACK_LEFT)) { + // If the input has back LR, mix side LR into back LR, but instead if the + // input doesn't have back LR (but output does) copy side LR to back LR. + float scale = HasInputChannel(BACK_LEFT) ? ChannelMixer::kHalfPower : 1; + Mix(SIDE_LEFT, BACK_LEFT, scale); + Mix(SIDE_RIGHT, BACK_RIGHT, scale); + } else if (HasOutputChannel(BACK_CENTER)) { + // Mix side LR into back center. + Mix(SIDE_LEFT, BACK_CENTER, ChannelMixer::kHalfPower); + Mix(SIDE_RIGHT, BACK_CENTER, ChannelMixer::kHalfPower); + } else if (output_layout_ > CHANNEL_LAYOUT_MONO) { + // Mix side LR into front LR. + Mix(SIDE_LEFT, LEFT, ChannelMixer::kHalfPower); + Mix(SIDE_RIGHT, RIGHT, ChannelMixer::kHalfPower); + } else { + // Mix side LR into front center. + Mix(SIDE_LEFT, CENTER, ChannelMixer::kHalfPower); + Mix(SIDE_RIGHT, CENTER, ChannelMixer::kHalfPower); + } + } + + // Mix back center into: back LR || side LR || front LR || front center. + if (IsUnaccounted(BACK_CENTER)) { + if (HasOutputChannel(BACK_LEFT)) { + // Mix back center into back LR. + MixWithoutAccounting(BACK_CENTER, BACK_LEFT, ChannelMixer::kHalfPower); + Mix(BACK_CENTER, BACK_RIGHT, ChannelMixer::kHalfPower); + } else if (HasOutputChannel(SIDE_LEFT)) { + // Mix back center into side LR. + MixWithoutAccounting(BACK_CENTER, SIDE_LEFT, ChannelMixer::kHalfPower); + Mix(BACK_CENTER, SIDE_RIGHT, ChannelMixer::kHalfPower); + } else if (output_layout_ > CHANNEL_LAYOUT_MONO) { + // Mix back center into front LR. + // TODO(dalecurtis): Not sure about these values? + MixWithoutAccounting(BACK_CENTER, LEFT, ChannelMixer::kHalfPower); + Mix(BACK_CENTER, RIGHT, ChannelMixer::kHalfPower); + } else { + // Mix back center into front center. + // TODO(dalecurtis): Not sure about these values? + Mix(BACK_CENTER, CENTER, ChannelMixer::kHalfPower); + } + } + + // Mix LR of center into: front LR || front center. + if (IsUnaccounted(LEFT_OF_CENTER)) { + if (HasOutputChannel(LEFT)) { + // Mix LR of center into front LR. + Mix(LEFT_OF_CENTER, LEFT, ChannelMixer::kHalfPower); + Mix(RIGHT_OF_CENTER, RIGHT, ChannelMixer::kHalfPower); + } else { + // Mix LR of center into front center. + Mix(LEFT_OF_CENTER, CENTER, ChannelMixer::kHalfPower); + Mix(RIGHT_OF_CENTER, CENTER, ChannelMixer::kHalfPower); + } + } + + // Mix LFE into: front center || front LR. + if (IsUnaccounted(LFE)) { + if (!HasOutputChannel(CENTER)) { + // Mix LFE into front LR. + MixWithoutAccounting(LFE, LEFT, ChannelMixer::kHalfPower); + Mix(LFE, RIGHT, ChannelMixer::kHalfPower); + } else { + // Mix LFE into front center. + Mix(LFE, CENTER, ChannelMixer::kHalfPower); + } + } + + // All channels should now be accounted for. + RTC_DCHECK(unaccounted_inputs_.empty()); + + // See if the output `matrix_` is simply a remapping matrix. If each input + // channel maps to a single output channel we can simply remap. Doing this + // programmatically is less fragile than logic checks on channel mappings. + for (int output_ch = 0; output_ch < output_channels_; ++output_ch) { + int input_mappings = 0; + for (int input_ch = 0; input_ch < input_channels_; ++input_ch) { + // We can only remap if each row contains a single scale of 1. I.e., each + // output channel is mapped from a single unscaled input channel. + if ((*matrix_)[output_ch][input_ch] != 1 || ++input_mappings > 1) + return false; + } + } + + // If we've gotten here, `matrix_` is simply a remapping. + return true; +} + +void ChannelMixingMatrix::AccountFor(Channels ch) { + unaccounted_inputs_.erase( + std::find(unaccounted_inputs_.begin(), unaccounted_inputs_.end(), ch)); +} + +bool ChannelMixingMatrix::IsUnaccounted(Channels ch) const { + return std::find(unaccounted_inputs_.begin(), unaccounted_inputs_.end(), + ch) != unaccounted_inputs_.end(); +} + +bool ChannelMixingMatrix::HasInputChannel(Channels ch) const { + return ChannelOrder(input_layout_, ch) >= 0; +} + +bool ChannelMixingMatrix::HasOutputChannel(Channels ch) const { + return ChannelOrder(output_layout_, ch) >= 0; +} + +void ChannelMixingMatrix::Mix(Channels input_ch, + Channels output_ch, + float scale) { + MixWithoutAccounting(input_ch, output_ch, scale); + AccountFor(input_ch); +} + +void ChannelMixingMatrix::MixWithoutAccounting(Channels input_ch, + Channels output_ch, + float scale) { + int input_ch_index = ChannelOrder(input_layout_, input_ch); + int output_ch_index = ChannelOrder(output_layout_, output_ch); + + RTC_DCHECK(IsUnaccounted(input_ch)); + RTC_DCHECK_GE(input_ch_index, 0); + RTC_DCHECK_GE(output_ch_index, 0); + + RTC_DCHECK_EQ((*matrix_)[output_ch_index][input_ch_index], 0); + (*matrix_)[output_ch_index][input_ch_index] = scale; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/utility/channel_mixing_matrix.h b/third_party/libwebrtc/audio/utility/channel_mixing_matrix.h new file mode 100644 index 0000000000..ee00860846 --- /dev/null +++ b/third_party/libwebrtc/audio/utility/channel_mixing_matrix.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_UTILITY_CHANNEL_MIXING_MATRIX_H_ +#define AUDIO_UTILITY_CHANNEL_MIXING_MATRIX_H_ + +#include <vector> + +#include "api/audio/channel_layout.h" + +namespace webrtc { + +class ChannelMixingMatrix { + public: + ChannelMixingMatrix(ChannelLayout input_layout, + int input_channels, + ChannelLayout output_layout, + int output_channels); + + ~ChannelMixingMatrix(); + + // Create the transformation matrix of input channels to output channels. + // Updates the empty matrix with the transformation, and returns true + // if the transformation is just a remapping of channels (no mixing). + // The size of `matrix` is `output_channels` x `input_channels`, i.e., the + // number of rows equals the number of output channels and the number of + // columns corresponds to the number of input channels. + // This file is derived from Chromium's media/base/channel_mixing_matrix.h. + bool CreateTransformationMatrix(std::vector<std::vector<float>>* matrix); + + private: + const bool use_voip_channel_mapping_adjustments_; + + // Result transformation of input channels to output channels + std::vector<std::vector<float>>* matrix_; + + // Input and output channel layout provided during construction. + ChannelLayout input_layout_; + int input_channels_; + ChannelLayout output_layout_; + int output_channels_; + + // Helper variable for tracking which inputs are currently unaccounted, + // should be empty after construction completes. + std::vector<Channels> unaccounted_inputs_; + + // Helper methods for managing unaccounted input channels. + void AccountFor(Channels ch); + bool IsUnaccounted(Channels ch) const; + + // Helper methods for checking if `ch` exists in either `input_layout_` or + // `output_layout_` respectively. + bool HasInputChannel(Channels ch) const; + bool HasOutputChannel(Channels ch) const; + + // Helper methods for updating `matrix_` with the proper value for + // mixing `input_ch` into `output_ch`. MixWithoutAccounting() does not + // remove the channel from `unaccounted_inputs_`. + void Mix(Channels input_ch, Channels output_ch, float scale); + void MixWithoutAccounting(Channels input_ch, Channels output_ch, float scale); + + // Delete the copy constructor and assignment operator. + ChannelMixingMatrix(const ChannelMixingMatrix& other) = delete; + ChannelMixingMatrix& operator=(const ChannelMixingMatrix& other) = delete; +}; + +} // namespace webrtc + +#endif // AUDIO_UTILITY_CHANNEL_MIXING_MATRIX_H_ diff --git a/third_party/libwebrtc/audio/utility/channel_mixing_matrix_unittest.cc b/third_party/libwebrtc/audio/utility/channel_mixing_matrix_unittest.cc new file mode 100644 index 0000000000..a4efb4fd38 --- /dev/null +++ b/third_party/libwebrtc/audio/utility/channel_mixing_matrix_unittest.cc @@ -0,0 +1,476 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/utility/channel_mixing_matrix.h" + +#include <stddef.h> + +#include "audio/utility/channel_mixer.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/logging.h" +#include "rtc_base/strings/string_builder.h" +#include "test/field_trial.h" +#include "test/gtest.h" + +namespace webrtc { + +// Test all possible layout conversions can be constructed and mixed. +// Also ensure that the channel matrix fulfill certain conditions when remapping +// is supported. +TEST(ChannelMixingMatrixTest, ConstructAllPossibleLayouts) { + for (ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + input_layout <= CHANNEL_LAYOUT_MAX; + input_layout = static_cast<ChannelLayout>(input_layout + 1)) { + for (ChannelLayout output_layout = CHANNEL_LAYOUT_MONO; + output_layout <= CHANNEL_LAYOUT_MAX; + output_layout = static_cast<ChannelLayout>(output_layout + 1)) { + // DISCRETE, BITSTREAM can't be tested here based on the current approach. + // CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC is not mixable. + // Stereo down mix should never be the output layout. + if (input_layout == CHANNEL_LAYOUT_BITSTREAM || + input_layout == CHANNEL_LAYOUT_DISCRETE || + input_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC || + output_layout == CHANNEL_LAYOUT_BITSTREAM || + output_layout == CHANNEL_LAYOUT_DISCRETE || + output_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC || + output_layout == CHANNEL_LAYOUT_STEREO_DOWNMIX) { + continue; + } + + rtc::StringBuilder ss; + ss << "Input Layout: " << input_layout + << ", Output Layout: " << output_layout; + SCOPED_TRACE(ss.str()); + ChannelMixingMatrix matrix_builder( + input_layout, ChannelLayoutToChannelCount(input_layout), + output_layout, ChannelLayoutToChannelCount(output_layout)); + const int input_channels = ChannelLayoutToChannelCount(input_layout); + const int output_channels = ChannelLayoutToChannelCount(output_layout); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + if (remapping) { + // Also ensure that (when remapping can take place), a maximum of one + // input channel is included per output. This knowledge will simplify + // the channel mixing algorithm since it allows us to find the only + // scale factor which equals 1.0 and copy that input to its + // corresponding output. If no such factor can be found, the + // corresponding output can be set to zero. + for (int i = 0; i < output_channels; i++) { + EXPECT_EQ(static_cast<size_t>(input_channels), matrix[i].size()); + int num_input_channels_accounted_for_per_output = 0; + for (int j = 0; j < input_channels; j++) { + float scale = matrix[i][j]; + if (scale > 0) { + EXPECT_EQ(scale, 1.0f); + num_input_channels_accounted_for_per_output++; + } + } + // Each output channel shall contain contribution from one or less + // input channels. + EXPECT_LE(num_input_channels_accounted_for_per_output, 1); + } + } + } + } +} + +// Verify channels are mixed and scaled correctly. +TEST(ChannelMixingMatrixTest, StereoToMono) { + ChannelLayout input_layout = CHANNEL_LAYOUT_STEREO; + ChannelLayout output_layout = CHANNEL_LAYOUT_MONO; + ChannelMixingMatrix matrix_builder( + input_layout, ChannelLayoutToChannelCount(input_layout), output_layout, + ChannelLayoutToChannelCount(output_layout)); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + // Input: stereo + // LEFT RIGHT + // Output: mono CENTER 0.5 0.5 + // + EXPECT_FALSE(remapping); + EXPECT_EQ(1u, matrix.size()); + EXPECT_EQ(2u, matrix[0].size()); + EXPECT_EQ(0.5f, matrix[0][0]); + EXPECT_EQ(0.5f, matrix[0][1]); +} + +TEST(ChannelMixingMatrixTest, MonoToStereo) { + ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + ChannelLayout output_layout = CHANNEL_LAYOUT_STEREO; + ChannelMixingMatrix matrix_builder( + input_layout, ChannelLayoutToChannelCount(input_layout), output_layout, + ChannelLayoutToChannelCount(output_layout)); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + // Input: mono + // CENTER + // Output: stereo LEFT 1 + // RIGHT 1 + // + EXPECT_TRUE(remapping); + EXPECT_EQ(2u, matrix.size()); + EXPECT_EQ(1u, matrix[0].size()); + EXPECT_EQ(1.0f, matrix[0][0]); + EXPECT_EQ(1u, matrix[1].size()); + EXPECT_EQ(1.0f, matrix[1][0]); +} + +TEST(ChannelMixingMatrixTest, MonoToTwoOneWithoutVoIPAdjustments) { + test::ScopedFieldTrials field_trials( + "WebRTC-VoIPChannelRemixingAdjustmentKillSwitch/Enabled/"); + ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + ChannelLayout output_layout = CHANNEL_LAYOUT_2_1; + ChannelMixingMatrix matrix_builder( + input_layout, ChannelLayoutToChannelCount(input_layout), output_layout, + ChannelLayoutToChannelCount(output_layout)); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + // Input: mono + // CENTER + // Output: 2.1 FRONT_LEFT 1 + // FRONT_RIGHT 1 + // BACK_CENTER 0 + // + EXPECT_FALSE(remapping); + EXPECT_EQ(3u, matrix.size()); + EXPECT_EQ(1u, matrix[0].size()); + EXPECT_EQ(1.0f, matrix[0][0]); + EXPECT_EQ(1.0f, matrix[1][0]); + EXPECT_EQ(0.0f, matrix[2][0]); +} + +TEST(ChannelMixingMatrixTest, MonoToTwoOneWithVoIPAdjustments) { + ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + ChannelLayout output_layout = CHANNEL_LAYOUT_2_1; + ChannelMixingMatrix matrix_builder( + input_layout, ChannelLayoutToChannelCount(input_layout), output_layout, + ChannelLayoutToChannelCount(output_layout)); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + // Input: mono + // CENTER + // Output: 2.1 FRONT_LEFT 1 + // FRONT_RIGHT 1 + // BACK_CENTER 0 + // + EXPECT_TRUE(remapping); + EXPECT_EQ(3u, matrix.size()); + EXPECT_EQ(1u, matrix[0].size()); + EXPECT_EQ(1.0f, matrix[0][0]); + EXPECT_EQ(1.0f, matrix[1][0]); + EXPECT_EQ(0.0f, matrix[2][0]); +} + +TEST(ChannelMixingMatrixTest, MonoToFiveOneWithoutVoIPAdjustments) { + test::ScopedFieldTrials field_trials( + "WebRTC-VoIPChannelRemixingAdjustmentKillSwitch/Enabled/"); + ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + ChannelLayout output_layout = CHANNEL_LAYOUT_5_1; + const int input_channels = ChannelLayoutToChannelCount(input_layout); + const int output_channels = ChannelLayoutToChannelCount(output_layout); + ChannelMixingMatrix matrix_builder(input_layout, input_channels, + output_layout, output_channels); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + // Input: mono + // CENTER + // Output: 5.1 LEFT 0 + // RIGHT 0 + // CENTER 1 + // LFE 0 + // SIDE_LEFT 0 + // SIDE_RIGHT 0 + // + EXPECT_TRUE(remapping); + EXPECT_EQ(static_cast<size_t>(output_channels), matrix.size()); + for (int n = 0; n < output_channels; n++) { + EXPECT_EQ(static_cast<size_t>(input_channels), matrix[n].size()); + if (n == CENTER) { + EXPECT_EQ(1.0f, matrix[CENTER][0]); + } else { + EXPECT_EQ(0.0f, matrix[n][0]); + } + } +} + +TEST(ChannelMixingMatrixTest, MonoToFiveOneWithVoIPAdjustments) { + ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + ChannelLayout output_layout = CHANNEL_LAYOUT_5_1; + const int input_channels = ChannelLayoutToChannelCount(input_layout); + const int output_channels = ChannelLayoutToChannelCount(output_layout); + ChannelMixingMatrix matrix_builder(input_layout, input_channels, + output_layout, output_channels); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + // Input: mono + // CENTER + // Output: 5.1 LEFT 1 + // RIGHT 1 + // CENTER 0 + // LFE 0 + // SIDE_LEFT 0 + // SIDE_RIGHT 0 + // + EXPECT_TRUE(remapping); + EXPECT_EQ(static_cast<size_t>(output_channels), matrix.size()); + for (int n = 0; n < output_channels; n++) { + EXPECT_EQ(static_cast<size_t>(input_channels), matrix[n].size()); + if (n == LEFT || n == RIGHT) { + EXPECT_EQ(1.0f, matrix[n][0]); + } else { + EXPECT_EQ(0.0f, matrix[n][0]); + } + } +} + +TEST(ChannelMixingMatrixTest, MonoToSevenOneWithoutVoIPAdjustments) { + test::ScopedFieldTrials field_trials( + "WebRTC-VoIPChannelRemixingAdjustmentKillSwitch/Enabled/"); + ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + ChannelLayout output_layout = CHANNEL_LAYOUT_7_1; + const int input_channels = ChannelLayoutToChannelCount(input_layout); + const int output_channels = ChannelLayoutToChannelCount(output_layout); + ChannelMixingMatrix matrix_builder(input_layout, input_channels, + output_layout, output_channels); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + // Input: mono + // CENTER + // Output: 7.1 LEFT 0 + // RIGHT 0 + // CENTER 1 + // LFE 0 + // SIDE_LEFT 0 + // SIDE_RIGHT 0 + // BACK_LEFT 0 + // BACK_RIGHT 0 + // + EXPECT_TRUE(remapping); + EXPECT_EQ(static_cast<size_t>(output_channels), matrix.size()); + for (int n = 0; n < output_channels; n++) { + EXPECT_EQ(static_cast<size_t>(input_channels), matrix[n].size()); + if (n == CENTER) { + EXPECT_EQ(1.0f, matrix[CENTER][0]); + } else { + EXPECT_EQ(0.0f, matrix[n][0]); + } + } +} + +TEST(ChannelMixingMatrixTest, MonoToSevenOneWithVoIPAdjustments) { + ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + ChannelLayout output_layout = CHANNEL_LAYOUT_7_1; + const int input_channels = ChannelLayoutToChannelCount(input_layout); + const int output_channels = ChannelLayoutToChannelCount(output_layout); + ChannelMixingMatrix matrix_builder(input_layout, input_channels, + output_layout, output_channels); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + // Input: mono + // CENTER + // Output: 7.1 LEFT 1 + // RIGHT 1 + // CENTER 0 + // LFE 0 + // SIDE_LEFT 0 + // SIDE_RIGHT 0 + // BACK_LEFT 0 + // BACK_RIGHT 0 + // + EXPECT_TRUE(remapping); + EXPECT_EQ(static_cast<size_t>(output_channels), matrix.size()); + for (int n = 0; n < output_channels; n++) { + EXPECT_EQ(static_cast<size_t>(input_channels), matrix[n].size()); + if (n == LEFT || n == RIGHT) { + EXPECT_EQ(1.0f, matrix[n][0]); + } else { + EXPECT_EQ(0.0f, matrix[n][0]); + } + } +} + +TEST(ChannelMixingMatrixTest, FiveOneToMono) { + ChannelLayout input_layout = CHANNEL_LAYOUT_5_1; + ChannelLayout output_layout = CHANNEL_LAYOUT_MONO; + ChannelMixingMatrix matrix_builder( + input_layout, ChannelLayoutToChannelCount(input_layout), output_layout, + ChannelLayoutToChannelCount(output_layout)); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + // Note: 1/sqrt(2) is shown as 0.707. + // + // Input: 5.1 + // LEFT RIGHT CENTER LFE SIDE_LEFT SIDE_RIGHT + // Output: mono CENTER 0.707 0.707 1 0.707 0.707 0.707 + // + EXPECT_FALSE(remapping); + EXPECT_EQ(1u, matrix.size()); + EXPECT_EQ(6u, matrix[0].size()); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[0][0]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[0][1]); + // The center channel will be mixed at scale 1. + EXPECT_EQ(1.0f, matrix[0][2]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[0][3]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[0][4]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[0][5]); +} + +TEST(ChannelMixingMatrixTest, FiveOneBackToStereo) { + // Front L, Front R, Front C, LFE, Back L, Back R + ChannelLayout input_layout = CHANNEL_LAYOUT_5_1_BACK; + ChannelLayout output_layout = CHANNEL_LAYOUT_STEREO; + const int input_channels = ChannelLayoutToChannelCount(input_layout); + const int output_channels = ChannelLayoutToChannelCount(output_layout); + ChannelMixingMatrix matrix_builder(input_layout, input_channels, + output_layout, output_channels); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + // Note: 1/sqrt(2) is shown as 0.707. + // Note: The Channels enumerator is given by {LEFT = 0, RIGHT, CENTER, LFE, + // BACK_LEFT, BACK_RIGHT,...}, hence we can use the enumerator values as + // indexes in the matrix when verifying the scaling factors. + // + // Input: 5.1 + // LEFT RIGHT CENTER LFE BACK_LEFT BACK_RIGHT + // Output: stereo LEFT 1 0 0.707 0.707 0.707 0 + // RIGHT 0 1 0.707 0.707 0 0.707 + // + EXPECT_FALSE(remapping); + EXPECT_EQ(static_cast<size_t>(output_channels), matrix.size()); + EXPECT_EQ(static_cast<size_t>(input_channels), matrix[LEFT].size()); + EXPECT_EQ(static_cast<size_t>(input_channels), matrix[RIGHT].size()); + EXPECT_EQ(1.0f, matrix[LEFT][LEFT]); + EXPECT_EQ(1.0f, matrix[RIGHT][RIGHT]); + EXPECT_EQ(0.0f, matrix[LEFT][RIGHT]); + EXPECT_EQ(0.0f, matrix[RIGHT][LEFT]); + EXPECT_EQ(0.0f, matrix[LEFT][BACK_RIGHT]); + EXPECT_EQ(0.0f, matrix[RIGHT][BACK_LEFT]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[LEFT][CENTER]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[LEFT][LFE]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[LEFT][BACK_LEFT]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[RIGHT][CENTER]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[RIGHT][LFE]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[RIGHT][BACK_RIGHT]); +} + +TEST(ChannelMixingMatrixTest, FiveOneToSevenOne) { + // Front L, Front R, Front C, LFE, Side L, Side R + ChannelLayout input_layout = CHANNEL_LAYOUT_5_1; + // Front L, Front R, Front C, LFE, Side L, Side R, Back L, Back R + ChannelLayout output_layout = CHANNEL_LAYOUT_7_1; + const int input_channels = ChannelLayoutToChannelCount(input_layout); + const int output_channels = ChannelLayoutToChannelCount(output_layout); + ChannelMixingMatrix matrix_builder(input_layout, input_channels, + output_layout, output_channels); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + // Input: 5.1 + // LEFT RIGHT CENTER LFE SIDE_LEFT SIDE_RIGHT + // Output: 7.1 LEFT 1 0 0 0 0 0 + // RIGHT 0 1 0 0 0 0 + // CENTER 0 0 1 0 0 0 + // LFE 0 0 0 1 0 0 + // SIDE_LEFT 0 0 0 0 1 0 + // SIDE_RIGHT 0 0 0 0 0 1 + // BACK_LEFT 0 0 0 0 0 0 + // BACK_RIGHT 0 0 0 0 0 0 + // + EXPECT_TRUE(remapping); + EXPECT_EQ(static_cast<size_t>(output_channels), matrix.size()); + for (int i = 0; i < output_channels; i++) { + EXPECT_EQ(static_cast<size_t>(input_channels), matrix[i].size()); + for (int j = 0; j < input_channels; j++) { + if (i == j) { + EXPECT_EQ(1.0f, matrix[i][j]); + } else { + EXPECT_EQ(0.0f, matrix[i][j]); + } + } + } +} + +TEST(ChannelMixingMatrixTest, StereoToFiveOne) { + ChannelLayout input_layout = CHANNEL_LAYOUT_STEREO; + ChannelLayout output_layout = CHANNEL_LAYOUT_5_1; + const int input_channels = ChannelLayoutToChannelCount(input_layout); + const int output_channels = ChannelLayoutToChannelCount(output_layout); + ChannelMixingMatrix matrix_builder(input_layout, input_channels, + output_layout, output_channels); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + // Input: Stereo + // LEFT RIGHT + // Output: 5.1 LEFT 1 0 + // RIGHT 0 1 + // CENTER 0 0 + // LFE 0 0 + // SIDE_LEFT 0 0 + // SIDE_RIGHT 0 0 + // + EXPECT_TRUE(remapping); + EXPECT_EQ(static_cast<size_t>(output_channels), matrix.size()); + for (int n = 0; n < output_channels; n++) { + EXPECT_EQ(static_cast<size_t>(input_channels), matrix[n].size()); + if (n == LEFT) { + EXPECT_EQ(1.0f, matrix[LEFT][LEFT]); + EXPECT_EQ(0.0f, matrix[LEFT][RIGHT]); + } else if (n == RIGHT) { + EXPECT_EQ(0.0f, matrix[RIGHT][LEFT]); + EXPECT_EQ(1.0f, matrix[RIGHT][RIGHT]); + } else { + EXPECT_EQ(0.0f, matrix[n][LEFT]); + EXPECT_EQ(0.0f, matrix[n][RIGHT]); + } + } +} + +TEST(ChannelMixingMatrixTest, DiscreteToDiscrete) { + const struct { + int input_channels; + int output_channels; + } test_case[] = { + {2, 2}, + {2, 5}, + {5, 2}, + }; + + for (size_t n = 0; n < arraysize(test_case); n++) { + int input_channels = test_case[n].input_channels; + int output_channels = test_case[n].output_channels; + ChannelMixingMatrix matrix_builder(CHANNEL_LAYOUT_DISCRETE, input_channels, + CHANNEL_LAYOUT_DISCRETE, + output_channels); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + EXPECT_TRUE(remapping); + EXPECT_EQ(static_cast<size_t>(output_channels), matrix.size()); + for (int i = 0; i < output_channels; i++) { + EXPECT_EQ(static_cast<size_t>(input_channels), matrix[i].size()); + for (int j = 0; j < input_channels; j++) { + if (i == j) { + EXPECT_EQ(1.0f, matrix[i][j]); + } else { + EXPECT_EQ(0.0f, matrix[i][j]); + } + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/voip/BUILD.gn b/third_party/libwebrtc/audio/voip/BUILD.gn new file mode 100644 index 0000000000..e807e2276b --- /dev/null +++ b/third_party/libwebrtc/audio/voip/BUILD.gn @@ -0,0 +1,103 @@ +# Copyright(c) 2020 The WebRTC project authors.All Rights Reserved. +# +# Use of this source code is governed by a BSD - style license +# that can be found in the LICENSE file in the root of the source +# tree.An additional intellectual property rights grant can be found +# in the file PATENTS.All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../webrtc.gni") + +rtc_library("voip_core") { + sources = [ + "voip_core.cc", + "voip_core.h", + ] + deps = [ + ":audio_channel", + "..:audio", + "../../api:scoped_refptr", + "../../api/audio_codecs:audio_codecs_api", + "../../api/task_queue", + "../../api/voip:voip_api", + "../../modules/audio_device:audio_device_api", + "../../modules/audio_mixer:audio_mixer_impl", + "../../modules/audio_processing:api", + "../../rtc_base:criticalsection", + "../../rtc_base:logging", + "../../rtc_base/synchronization:mutex", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("audio_channel") { + sources = [ + "audio_channel.cc", + "audio_channel.h", + ] + deps = [ + ":audio_egress", + ":audio_ingress", + "../../api:transport_api", + "../../api/audio_codecs:audio_codecs_api", + "../../api/task_queue", + "../../api/voip:voip_api", + "../../modules/audio_device:audio_device_api", + "../../modules/rtp_rtcp", + "../../modules/rtp_rtcp:rtp_rtcp_format", + "../../rtc_base:criticalsection", + "../../rtc_base:logging", + "../../rtc_base:refcount", + ] +} + +rtc_library("audio_ingress") { + sources = [ + "audio_ingress.cc", + "audio_ingress.h", + ] + deps = [ + "..:audio", + "../../api:array_view", + "../../api:rtp_headers", + "../../api:scoped_refptr", + "../../api:transport_api", + "../../api/audio:audio_mixer_api", + "../../api/audio_codecs:audio_codecs_api", + "../../api/voip:voip_api", + "../../modules/audio_coding", + "../../modules/rtp_rtcp", + "../../modules/rtp_rtcp:rtp_rtcp_format", + "../../rtc_base:criticalsection", + "../../rtc_base:logging", + "../../rtc_base:rtc_numerics", + "../../rtc_base:safe_minmax", + "../../rtc_base:timeutils", + "../../rtc_base/synchronization:mutex", + "../utility:audio_frame_operations", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("audio_egress") { + sources = [ + "audio_egress.cc", + "audio_egress.h", + ] + deps = [ + "..:audio", + "../../api:sequence_checker", + "../../api/audio_codecs:audio_codecs_api", + "../../api/task_queue", + "../../call:audio_sender_interface", + "../../modules/audio_coding", + "../../modules/rtp_rtcp", + "../../modules/rtp_rtcp:rtp_rtcp_format", + "../../rtc_base:logging", + "../../rtc_base:rtc_task_queue", + "../../rtc_base:timeutils", + "../../rtc_base/synchronization:mutex", + "../../rtc_base/system:no_unique_address", + "../utility:audio_frame_operations", + ] +} diff --git a/third_party/libwebrtc/audio/voip/audio_channel.cc b/third_party/libwebrtc/audio/voip/audio_channel.cc new file mode 100644 index 0000000000..a70e33ec38 --- /dev/null +++ b/third_party/libwebrtc/audio/voip/audio_channel.cc @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/voip/audio_channel.h" + +#include <utility> +#include <vector> + +#include "api/audio_codecs/audio_format.h" +#include "api/task_queue/task_queue_factory.h" +#include "modules/rtp_rtcp/include/receive_statistics.h" +#include "modules/rtp_rtcp/source/rtp_rtcp_impl2.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +namespace { + +constexpr int kRtcpReportIntervalMs = 5000; + +} // namespace + +AudioChannel::AudioChannel( + Transport* transport, + uint32_t local_ssrc, + TaskQueueFactory* task_queue_factory, + AudioMixer* audio_mixer, + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory) + : audio_mixer_(audio_mixer) { + RTC_DCHECK(task_queue_factory); + RTC_DCHECK(audio_mixer); + + Clock* clock = Clock::GetRealTimeClock(); + receive_statistics_ = ReceiveStatistics::Create(clock); + + RtpRtcpInterface::Configuration rtp_config; + rtp_config.clock = clock; + rtp_config.audio = true; + rtp_config.receive_statistics = receive_statistics_.get(); + rtp_config.rtcp_report_interval_ms = kRtcpReportIntervalMs; + rtp_config.outgoing_transport = transport; + rtp_config.local_media_ssrc = local_ssrc; + + rtp_rtcp_ = ModuleRtpRtcpImpl2::Create(rtp_config); + + rtp_rtcp_->SetSendingMediaStatus(false); + rtp_rtcp_->SetRTCPStatus(RtcpMode::kCompound); + + ingress_ = std::make_unique<AudioIngress>(rtp_rtcp_.get(), clock, + receive_statistics_.get(), + std::move(decoder_factory)); + egress_ = + std::make_unique<AudioEgress>(rtp_rtcp_.get(), clock, task_queue_factory); + + // Set the instance of audio ingress to be part of audio mixer for ADM to + // fetch audio samples to play. + audio_mixer_->AddSource(ingress_.get()); +} + +AudioChannel::~AudioChannel() { + if (egress_->IsSending()) { + StopSend(); + } + if (ingress_->IsPlaying()) { + StopPlay(); + } + + audio_mixer_->RemoveSource(ingress_.get()); + + // TODO(bugs.webrtc.org/11581): unclear if we still need to clear `egress_` + // here. + egress_.reset(); + ingress_.reset(); +} + +bool AudioChannel::StartSend() { + // If encoder has not been set, return false. + if (!egress_->StartSend()) { + return false; + } + + // Start sending with RTP stack if it has not been sending yet. + if (!rtp_rtcp_->Sending()) { + rtp_rtcp_->SetSendingStatus(true); + } + return true; +} + +void AudioChannel::StopSend() { + egress_->StopSend(); + + // Deactivate RTP stack when both sending and receiving are stopped. + // SetSendingStatus(false) triggers the transmission of RTCP BYE + // message to remote endpoint. + if (!ingress_->IsPlaying() && rtp_rtcp_->Sending()) { + rtp_rtcp_->SetSendingStatus(false); + } +} + +bool AudioChannel::StartPlay() { + // If decoders have not been set, return false. + if (!ingress_->StartPlay()) { + return false; + } + + // If RTP stack is not sending then start sending as in recv-only mode, RTCP + // receiver report is expected. + if (!rtp_rtcp_->Sending()) { + rtp_rtcp_->SetSendingStatus(true); + } + return true; +} + +void AudioChannel::StopPlay() { + ingress_->StopPlay(); + + // Deactivate RTP stack only when both sending and receiving are stopped. + if (!rtp_rtcp_->SendingMedia() && rtp_rtcp_->Sending()) { + rtp_rtcp_->SetSendingStatus(false); + } +} + +IngressStatistics AudioChannel::GetIngressStatistics() { + IngressStatistics ingress_stats; + NetworkStatistics stats = ingress_->GetNetworkStatistics(); + ingress_stats.neteq_stats.total_samples_received = stats.totalSamplesReceived; + ingress_stats.neteq_stats.concealed_samples = stats.concealedSamples; + ingress_stats.neteq_stats.concealment_events = stats.concealmentEvents; + ingress_stats.neteq_stats.jitter_buffer_delay_ms = stats.jitterBufferDelayMs; + ingress_stats.neteq_stats.jitter_buffer_emitted_count = + stats.jitterBufferEmittedCount; + ingress_stats.neteq_stats.jitter_buffer_target_delay_ms = + stats.jitterBufferTargetDelayMs; + ingress_stats.neteq_stats.inserted_samples_for_deceleration = + stats.insertedSamplesForDeceleration; + ingress_stats.neteq_stats.removed_samples_for_acceleration = + stats.removedSamplesForAcceleration; + ingress_stats.neteq_stats.silent_concealed_samples = + stats.silentConcealedSamples; + ingress_stats.neteq_stats.fec_packets_received = stats.fecPacketsReceived; + ingress_stats.neteq_stats.fec_packets_discarded = stats.fecPacketsDiscarded; + ingress_stats.neteq_stats.delayed_packet_outage_samples = + stats.delayedPacketOutageSamples; + ingress_stats.neteq_stats.relative_packet_arrival_delay_ms = + stats.relativePacketArrivalDelayMs; + ingress_stats.neteq_stats.interruption_count = stats.interruptionCount; + ingress_stats.neteq_stats.total_interruption_duration_ms = + stats.totalInterruptionDurationMs; + ingress_stats.total_duration = ingress_->GetOutputTotalDuration(); + return ingress_stats; +} + +ChannelStatistics AudioChannel::GetChannelStatistics() { + ChannelStatistics channel_stat = ingress_->GetChannelStatistics(); + + StreamDataCounters rtp_stats, rtx_stats; + rtp_rtcp_->GetSendStreamDataCounters(&rtp_stats, &rtx_stats); + channel_stat.bytes_sent = + rtp_stats.transmitted.payload_bytes + rtx_stats.transmitted.payload_bytes; + channel_stat.packets_sent = + rtp_stats.transmitted.packets + rtx_stats.transmitted.packets; + + return channel_stat; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/voip/audio_channel.h b/third_party/libwebrtc/audio/voip/audio_channel.h new file mode 100644 index 0000000000..7338d9faab --- /dev/null +++ b/third_party/libwebrtc/audio/voip/audio_channel.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_VOIP_AUDIO_CHANNEL_H_ +#define AUDIO_VOIP_AUDIO_CHANNEL_H_ + +#include <map> +#include <memory> +#include <queue> +#include <utility> + +#include "api/task_queue/task_queue_factory.h" +#include "api/voip/voip_base.h" +#include "api/voip/voip_statistics.h" +#include "audio/voip/audio_egress.h" +#include "audio/voip/audio_ingress.h" +#include "modules/rtp_rtcp/source/rtp_rtcp_impl2.h" +#include "rtc_base/ref_count.h" + +namespace webrtc { + +// AudioChannel represents a single media session and provides APIs over +// AudioIngress and AudioEgress. Note that a single RTP stack is shared with +// these two classes as it has both sending and receiving capabilities. +class AudioChannel : public rtc::RefCountInterface { + public: + AudioChannel(Transport* transport, + uint32_t local_ssrc, + TaskQueueFactory* task_queue_factory, + AudioMixer* audio_mixer, + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory); + ~AudioChannel() override; + + // Set and get ChannelId that this audio channel belongs for debugging and + // logging purpose. + void SetId(ChannelId id) { id_ = id; } + ChannelId GetId() const { return id_; } + + // APIs to start/stop audio channel on each direction. + // StartSend/StartPlay returns false if encoder/decoders + // have not been set, respectively. + bool StartSend(); + void StopSend(); + bool StartPlay(); + void StopPlay(); + + // APIs relayed to AudioEgress. + bool IsSendingMedia() const { return egress_->IsSending(); } + AudioSender* GetAudioSender() { return egress_.get(); } + void SetEncoder(int payload_type, + const SdpAudioFormat& encoder_format, + std::unique_ptr<AudioEncoder> encoder) { + egress_->SetEncoder(payload_type, encoder_format, std::move(encoder)); + } + absl::optional<SdpAudioFormat> GetEncoderFormat() const { + return egress_->GetEncoderFormat(); + } + void RegisterTelephoneEventType(int rtp_payload_type, int sample_rate_hz) { + egress_->RegisterTelephoneEventType(rtp_payload_type, sample_rate_hz); + } + bool SendTelephoneEvent(int dtmf_event, int duration_ms) { + return egress_->SendTelephoneEvent(dtmf_event, duration_ms); + } + void SetMute(bool enable) { egress_->SetMute(enable); } + + // APIs relayed to AudioIngress. + bool IsPlaying() const { return ingress_->IsPlaying(); } + void ReceivedRTPPacket(rtc::ArrayView<const uint8_t> rtp_packet) { + ingress_->ReceivedRTPPacket(rtp_packet); + } + void ReceivedRTCPPacket(rtc::ArrayView<const uint8_t> rtcp_packet) { + ingress_->ReceivedRTCPPacket(rtcp_packet); + } + void SetReceiveCodecs(const std::map<int, SdpAudioFormat>& codecs) { + ingress_->SetReceiveCodecs(codecs); + } + IngressStatistics GetIngressStatistics(); + ChannelStatistics GetChannelStatistics(); + + // See comments on the methods used from AudioEgress and AudioIngress. + // Conversion to double is following what is done in + // DoubleAudioLevelFromIntAudioLevel method in rtc_stats_collector.cc to be + // consistent. + double GetInputAudioLevel() const { + return egress_->GetInputAudioLevel() / 32767.0; + } + double GetInputTotalEnergy() const { return egress_->GetInputTotalEnergy(); } + double GetInputTotalDuration() const { + return egress_->GetInputTotalDuration(); + } + double GetOutputAudioLevel() const { + return ingress_->GetOutputAudioLevel() / 32767.0; + } + double GetOutputTotalEnergy() const { + return ingress_->GetOutputTotalEnergy(); + } + double GetOutputTotalDuration() const { + return ingress_->GetOutputTotalDuration(); + } + + // Internal API for testing purpose. + void SendRTCPReportForTesting(RTCPPacketType type) { + int32_t result = rtp_rtcp_->SendRTCP(type); + RTC_DCHECK(result == 0); + } + + private: + // ChannelId that this audio channel belongs for logging purpose. + ChannelId id_; + + // Synchronization is handled internally by AudioMixer. + AudioMixer* audio_mixer_; + + // Listed in order for safe destruction of AudioChannel object. + // Synchronization for these are handled internally. + std::unique_ptr<ReceiveStatistics> receive_statistics_; + std::unique_ptr<ModuleRtpRtcpImpl2> rtp_rtcp_; + std::unique_ptr<AudioIngress> ingress_; + std::unique_ptr<AudioEgress> egress_; +}; + +} // namespace webrtc + +#endif // AUDIO_VOIP_AUDIO_CHANNEL_H_ diff --git a/third_party/libwebrtc/audio/voip/audio_egress.cc b/third_party/libwebrtc/audio/voip/audio_egress.cc new file mode 100644 index 0000000000..95a1a3351e --- /dev/null +++ b/third_party/libwebrtc/audio/voip/audio_egress.cc @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/voip/audio_egress.h" + +#include <utility> +#include <vector> + +#include "rtc_base/logging.h" + +namespace webrtc { + +AudioEgress::AudioEgress(RtpRtcpInterface* rtp_rtcp, + Clock* clock, + TaskQueueFactory* task_queue_factory) + : rtp_rtcp_(rtp_rtcp), + rtp_sender_audio_(clock, rtp_rtcp_->RtpSender()), + audio_coding_(AudioCodingModule::Create()), + encoder_queue_(task_queue_factory->CreateTaskQueue( + "AudioEncoder", + TaskQueueFactory::Priority::NORMAL)) { + audio_coding_->RegisterTransportCallback(this); +} + +AudioEgress::~AudioEgress() { + audio_coding_->RegisterTransportCallback(nullptr); +} + +bool AudioEgress::IsSending() const { + return rtp_rtcp_->SendingMedia(); +} + +void AudioEgress::SetEncoder(int payload_type, + const SdpAudioFormat& encoder_format, + std::unique_ptr<AudioEncoder> encoder) { + RTC_DCHECK_GE(payload_type, 0); + RTC_DCHECK_LE(payload_type, 127); + + SetEncoderFormat(encoder_format); + + // The RTP/RTCP module needs to know the RTP timestamp rate (i.e. clockrate) + // as well as some other things, so we collect this info and send it along. + rtp_rtcp_->RegisterSendPayloadFrequency(payload_type, + encoder->RtpTimestampRateHz()); + rtp_sender_audio_.RegisterAudioPayload("audio", payload_type, + encoder->RtpTimestampRateHz(), + encoder->NumChannels(), 0); + + audio_coding_->SetEncoder(std::move(encoder)); +} + +bool AudioEgress::StartSend() { + if (!GetEncoderFormat()) { + RTC_DLOG(LS_WARNING) << "Send codec has not been set yet"; + return false; + } + rtp_rtcp_->SetSendingMediaStatus(true); + return true; +} + +void AudioEgress::StopSend() { + rtp_rtcp_->SetSendingMediaStatus(false); +} + +void AudioEgress::SendAudioData(std::unique_ptr<AudioFrame> audio_frame) { + RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0); + RTC_DCHECK_LE(audio_frame->num_channels_, 8); + + encoder_queue_.PostTask( + [this, audio_frame = std::move(audio_frame)]() mutable { + RTC_DCHECK_RUN_ON(&encoder_queue_); + if (!rtp_rtcp_->SendingMedia()) { + return; + } + + double duration_seconds = + static_cast<double>(audio_frame->samples_per_channel_) / + audio_frame->sample_rate_hz_; + + input_audio_level_.ComputeLevel(*audio_frame, duration_seconds); + + AudioFrameOperations::Mute(audio_frame.get(), + encoder_context_.previously_muted_, + encoder_context_.mute_); + encoder_context_.previously_muted_ = encoder_context_.mute_; + + audio_frame->timestamp_ = encoder_context_.frame_rtp_timestamp_; + + // This call will trigger AudioPacketizationCallback::SendData if + // encoding is done and payload is ready for packetization and + // transmission. Otherwise, it will return without invoking the + // callback. + if (audio_coding_->Add10MsData(*audio_frame) < 0) { + RTC_DLOG(LS_ERROR) << "ACM::Add10MsData() failed."; + return; + } + + encoder_context_.frame_rtp_timestamp_ += + rtc::dchecked_cast<uint32_t>(audio_frame->samples_per_channel_); + }); +} + +int32_t AudioEgress::SendData(AudioFrameType frame_type, + uint8_t payload_type, + uint32_t timestamp, + const uint8_t* payload_data, + size_t payload_size) { + RTC_DCHECK_RUN_ON(&encoder_queue_); + + rtc::ArrayView<const uint8_t> payload(payload_data, payload_size); + + // Currently we don't get a capture time from downstream modules (ADM, + // AudioTransportImpl). + // TODO(natim@webrtc.org): Integrate once it's ready. + constexpr uint32_t kUndefinedCaptureTime = -1; + + // Push data from ACM to RTP/RTCP-module to deliver audio frame for + // packetization. + if (!rtp_rtcp_->OnSendingRtpFrame(timestamp, kUndefinedCaptureTime, + payload_type, + /*force_sender_report=*/false)) { + return -1; + } + + const uint32_t rtp_timestamp = timestamp + rtp_rtcp_->StartTimestamp(); + + // This call will trigger Transport::SendPacket() from the RTP/RTCP module. + if (!rtp_sender_audio_.SendAudio({.type = frame_type, + .payload = payload, + .payload_id = payload_type, + .rtp_timestamp = rtp_timestamp})) { + RTC_DLOG(LS_ERROR) + << "AudioEgress::SendData() failed to send data to RTP/RTCP module"; + return -1; + } + + return 0; +} + +void AudioEgress::RegisterTelephoneEventType(int rtp_payload_type, + int sample_rate_hz) { + RTC_DCHECK_GE(rtp_payload_type, 0); + RTC_DCHECK_LE(rtp_payload_type, 127); + + rtp_rtcp_->RegisterSendPayloadFrequency(rtp_payload_type, sample_rate_hz); + rtp_sender_audio_.RegisterAudioPayload("telephone-event", rtp_payload_type, + sample_rate_hz, 0, 0); +} + +bool AudioEgress::SendTelephoneEvent(int dtmf_event, int duration_ms) { + RTC_DCHECK_GE(dtmf_event, 0); + RTC_DCHECK_LE(dtmf_event, 255); + RTC_DCHECK_GE(duration_ms, 0); + RTC_DCHECK_LE(duration_ms, 65535); + + if (!IsSending()) { + return false; + } + + constexpr int kTelephoneEventAttenuationdB = 10; + + if (rtp_sender_audio_.SendTelephoneEvent(dtmf_event, duration_ms, + kTelephoneEventAttenuationdB) != 0) { + RTC_DLOG(LS_ERROR) << "SendTelephoneEvent() failed to send event"; + return false; + } + return true; +} + +void AudioEgress::SetMute(bool mute) { + encoder_queue_.PostTask([this, mute] { + RTC_DCHECK_RUN_ON(&encoder_queue_); + encoder_context_.mute_ = mute; + }); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/voip/audio_egress.h b/third_party/libwebrtc/audio/voip/audio_egress.h new file mode 100644 index 0000000000..989e5bda59 --- /dev/null +++ b/third_party/libwebrtc/audio/voip/audio_egress.h @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_VOIP_AUDIO_EGRESS_H_ +#define AUDIO_VOIP_AUDIO_EGRESS_H_ + +#include <memory> +#include <string> + +#include "api/audio_codecs/audio_format.h" +#include "api/sequence_checker.h" +#include "api/task_queue/task_queue_factory.h" +#include "audio/audio_level.h" +#include "audio/utility/audio_frame_operations.h" +#include "call/audio_sender.h" +#include "modules/audio_coding/include/audio_coding_module.h" +#include "modules/rtp_rtcp/include/report_block_data.h" +#include "modules/rtp_rtcp/source/rtp_rtcp_interface.h" +#include "modules/rtp_rtcp/source/rtp_sender_audio.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/task_queue.h" +#include "rtc_base/time_utils.h" + +namespace webrtc { + +// AudioEgress receives input samples from AudioDeviceModule via +// AudioTransportImpl through AudioSender interface. Once it encodes the sample +// via selected encoder through AudioPacketizationCallback interface, the +// encoded payload will be packetized by the RTP stack, resulting in ready to +// send RTP packet to remote endpoint. +// +// TaskQueue is used to encode and send RTP asynchrounously as some OS platform +// uses the same thread for both audio input and output sample deliveries which +// can affect audio quality. +// +// Note that this class is originally based on ChannelSend in +// audio/channel_send.cc with non-audio related logic trimmed as aimed for +// smaller footprint. +class AudioEgress : public AudioSender, public AudioPacketizationCallback { + public: + AudioEgress(RtpRtcpInterface* rtp_rtcp, + Clock* clock, + TaskQueueFactory* task_queue_factory); + ~AudioEgress() override; + + // Set the encoder format and payload type for AudioCodingModule. + // It's possible to change the encoder type during its active usage. + // `payload_type` must be the type that is negotiated with peer through + // offer/answer. + void SetEncoder(int payload_type, + const SdpAudioFormat& encoder_format, + std::unique_ptr<AudioEncoder> encoder); + + // Start or stop sending operation of AudioEgress. This will start/stop + // the RTP stack also causes encoder queue thread to start/stop + // processing input audio samples. StartSend will return false if + // a send codec has not been set. + bool StartSend(); + void StopSend(); + + // Query the state of the RTP stack. This returns true if StartSend() + // called and false if StopSend() is called. + bool IsSending() const; + + // Enable or disable Mute state. + void SetMute(bool mute); + + // Retrieve current encoder format info. This returns encoder format set + // by SetEncoder() and if encoder is not set, this will return nullopt. + absl::optional<SdpAudioFormat> GetEncoderFormat() const { + MutexLock lock(&lock_); + return encoder_format_; + } + + // Register the payload type and sample rate for DTMF (RFC 4733) payload. + void RegisterTelephoneEventType(int rtp_payload_type, int sample_rate_hz); + + // Send DTMF named event as specified by + // https://tools.ietf.org/html/rfc4733#section-3.2 + // `duration_ms` specifies the duration of DTMF packets that will be emitted + // in place of real RTP packets instead. + // This will return true when requested dtmf event is successfully scheduled + // otherwise false when the dtmf queue reached maximum of 20 events. + bool SendTelephoneEvent(int dtmf_event, int duration_ms); + + // See comments on LevelFullRange, TotalEnergy, TotalDuration from + // audio/audio_level.h. + int GetInputAudioLevel() const { return input_audio_level_.LevelFullRange(); } + double GetInputTotalEnergy() const { + return input_audio_level_.TotalEnergy(); + } + double GetInputTotalDuration() const { + return input_audio_level_.TotalDuration(); + } + + // Implementation of AudioSender interface. + void SendAudioData(std::unique_ptr<AudioFrame> audio_frame) override; + + // Implementation of AudioPacketizationCallback interface. + int32_t SendData(AudioFrameType frame_type, + uint8_t payload_type, + uint32_t timestamp, + const uint8_t* payload_data, + size_t payload_size) override; + + private: + void SetEncoderFormat(const SdpAudioFormat& encoder_format) { + MutexLock lock(&lock_); + encoder_format_ = encoder_format; + } + + mutable Mutex lock_; + + // Current encoder format selected by caller. + absl::optional<SdpAudioFormat> encoder_format_ RTC_GUARDED_BY(lock_); + + // Synchronization is handled internally by RtpRtcp. + RtpRtcpInterface* const rtp_rtcp_; + + // Synchronization is handled internally by RTPSenderAudio. + RTPSenderAudio rtp_sender_audio_; + + // Synchronization is handled internally by AudioCodingModule. + const std::unique_ptr<AudioCodingModule> audio_coding_; + + // Synchronization is handled internally by voe::AudioLevel. + voe::AudioLevel input_audio_level_; + + // Struct that holds all variables used by encoder task queue. + struct EncoderContext { + // Offset used to mark rtp timestamp in sample rate unit in + // newly received audio frame from AudioTransport. + uint32_t frame_rtp_timestamp_ = 0; + + // Flag to track mute state from caller. `previously_muted_` is used to + // track previous state as part of input to AudioFrameOperations::Mute + // to implement fading effect when (un)mute is invoked. + bool mute_ = false; + bool previously_muted_ = false; + }; + + EncoderContext encoder_context_ RTC_GUARDED_BY(encoder_queue_); + + // Defined last to ensure that there are no running tasks when the other + // members are destroyed. + rtc::TaskQueue encoder_queue_; +}; + +} // namespace webrtc + +#endif // AUDIO_VOIP_AUDIO_EGRESS_H_ diff --git a/third_party/libwebrtc/audio/voip/audio_ingress.cc b/third_party/libwebrtc/audio/voip/audio_ingress.cc new file mode 100644 index 0000000000..80f21152c0 --- /dev/null +++ b/third_party/libwebrtc/audio/voip/audio_ingress.cc @@ -0,0 +1,290 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/voip/audio_ingress.h" + +#include <algorithm> +#include <utility> +#include <vector> + +#include "api/audio_codecs/audio_format.h" +#include "audio/utility/audio_frame_operations.h" +#include "modules/audio_coding/include/audio_coding_module.h" +#include "modules/rtp_rtcp/source/byte_io.h" +#include "modules/rtp_rtcp/source/rtcp_packet/common_header.h" +#include "modules/rtp_rtcp/source/rtcp_packet/receiver_report.h" +#include "modules/rtp_rtcp/source/rtcp_packet/sender_report.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "rtc_base/time_utils.h" + +namespace webrtc { + +namespace { + +acm2::AcmReceiver::Config CreateAcmConfig( + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory) { + acm2::AcmReceiver::Config acm_config; + acm_config.neteq_config.enable_muted_state = true; + acm_config.decoder_factory = decoder_factory; + return acm_config; +} + +} // namespace + +AudioIngress::AudioIngress( + RtpRtcpInterface* rtp_rtcp, + Clock* clock, + ReceiveStatistics* receive_statistics, + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory) + : playing_(false), + remote_ssrc_(0), + first_rtp_timestamp_(-1), + rtp_receive_statistics_(receive_statistics), + rtp_rtcp_(rtp_rtcp), + acm_receiver_(CreateAcmConfig(decoder_factory)), + ntp_estimator_(clock) {} + +AudioIngress::~AudioIngress() = default; + +AudioMixer::Source::AudioFrameInfo AudioIngress::GetAudioFrameWithInfo( + int sampling_rate, + AudioFrame* audio_frame) { + audio_frame->sample_rate_hz_ = sampling_rate; + + // Get 10ms raw PCM data from the ACM. + bool muted = false; + if (acm_receiver_.GetAudio(sampling_rate, audio_frame, &muted) == -1) { + RTC_DLOG(LS_ERROR) << "GetAudio() failed!"; + // In all likelihood, the audio in this frame is garbage. We return an + // error so that the audio mixer module doesn't add it to the mix. As + // a result, it won't be played out and the actions skipped here are + // irrelevant. + return AudioMixer::Source::AudioFrameInfo::kError; + } + + if (muted) { + AudioFrameOperations::Mute(audio_frame); + } + + // Measure audio level. + constexpr double kAudioSampleDurationSeconds = 0.01; + output_audio_level_.ComputeLevel(*audio_frame, kAudioSampleDurationSeconds); + + // If caller invoked StopPlay(), then mute the frame. + if (!playing_) { + AudioFrameOperations::Mute(audio_frame); + muted = true; + } + + // Set first rtp timestamp with first audio frame with valid timestamp. + if (first_rtp_timestamp_ < 0 && audio_frame->timestamp_ != 0) { + first_rtp_timestamp_ = audio_frame->timestamp_; + } + + if (first_rtp_timestamp_ >= 0) { + // Compute elapsed and NTP times. + int64_t unwrap_timestamp; + { + MutexLock lock(&lock_); + unwrap_timestamp = + timestamp_wrap_handler_.Unwrap(audio_frame->timestamp_); + audio_frame->ntp_time_ms_ = + ntp_estimator_.Estimate(audio_frame->timestamp_); + } + // For clock rate, default to the playout sampling rate if we haven't + // received any packets yet. + absl::optional<std::pair<int, SdpAudioFormat>> decoder = + acm_receiver_.LastDecoder(); + int clock_rate = decoder ? decoder->second.clockrate_hz + : acm_receiver_.last_output_sample_rate_hz(); + RTC_DCHECK_GT(clock_rate, 0); + audio_frame->elapsed_time_ms_ = + (unwrap_timestamp - first_rtp_timestamp_) / (clock_rate / 1000); + } + + return muted ? AudioMixer::Source::AudioFrameInfo::kMuted + : AudioMixer::Source::AudioFrameInfo::kNormal; +} + +bool AudioIngress::StartPlay() { + { + MutexLock lock(&lock_); + if (receive_codec_info_.empty()) { + RTC_DLOG(LS_WARNING) << "Receive codecs have not been set yet"; + return false; + } + } + playing_ = true; + return true; +} + +void AudioIngress::SetReceiveCodecs( + const std::map<int, SdpAudioFormat>& codecs) { + { + MutexLock lock(&lock_); + for (const auto& kv : codecs) { + receive_codec_info_[kv.first] = kv.second.clockrate_hz; + } + } + acm_receiver_.SetCodecs(codecs); +} + +void AudioIngress::ReceivedRTPPacket(rtc::ArrayView<const uint8_t> rtp_packet) { + RtpPacketReceived rtp_packet_received; + rtp_packet_received.Parse(rtp_packet.data(), rtp_packet.size()); + + // Set payload type's sampling rate before we feed it into ReceiveStatistics. + { + MutexLock lock(&lock_); + const auto& it = + receive_codec_info_.find(rtp_packet_received.PayloadType()); + // If sampling rate info is not available in our received codec set, it + // would mean that remote media endpoint is sending incorrect payload id + // which can't be processed correctly especially on payload type id in + // dynamic range. + if (it == receive_codec_info_.end()) { + RTC_DLOG(LS_WARNING) << "Unexpected payload id received: " + << rtp_packet_received.PayloadType(); + return; + } + rtp_packet_received.set_payload_type_frequency(it->second); + } + + // Track current remote SSRC. + if (rtp_packet_received.Ssrc() != remote_ssrc_) { + rtp_rtcp_->SetRemoteSSRC(rtp_packet_received.Ssrc()); + remote_ssrc_.store(rtp_packet_received.Ssrc()); + } + + rtp_receive_statistics_->OnRtpPacket(rtp_packet_received); + + RTPHeader header; + rtp_packet_received.GetHeader(&header); + + size_t packet_length = rtp_packet_received.size(); + if (packet_length < header.headerLength || + (packet_length - header.headerLength) < header.paddingLength) { + RTC_DLOG(LS_ERROR) << "Packet length(" << packet_length << ") header(" + << header.headerLength << ") padding(" + << header.paddingLength << ")"; + return; + } + + const uint8_t* payload = rtp_packet_received.data() + header.headerLength; + size_t payload_length = packet_length - header.headerLength; + size_t payload_data_length = payload_length - header.paddingLength; + auto data_view = rtc::ArrayView<const uint8_t>(payload, payload_data_length); + + // Push the incoming payload (parsed and ready for decoding) into the ACM. + if (acm_receiver_.InsertPacket(header, data_view) != 0) { + RTC_DLOG(LS_ERROR) << "AudioIngress::ReceivedRTPPacket() unable to " + "push data to the ACM"; + } +} + +void AudioIngress::ReceivedRTCPPacket( + rtc::ArrayView<const uint8_t> rtcp_packet) { + rtcp::CommonHeader rtcp_header; + if (rtcp_header.Parse(rtcp_packet.data(), rtcp_packet.size()) && + (rtcp_header.type() == rtcp::SenderReport::kPacketType || + rtcp_header.type() == rtcp::ReceiverReport::kPacketType)) { + RTC_DCHECK_GE(rtcp_packet.size(), 8); + + uint32_t sender_ssrc = + ByteReader<uint32_t>::ReadBigEndian(rtcp_packet.data() + 4); + + // If we don't have remote ssrc at this point, it's likely that remote + // endpoint is receive-only or it could have restarted the media. + if (sender_ssrc != remote_ssrc_) { + rtp_rtcp_->SetRemoteSSRC(sender_ssrc); + remote_ssrc_.store(sender_ssrc); + } + } + + // Deliver RTCP packet to RTP/RTCP module for parsing and processing. + rtp_rtcp_->IncomingRtcpPacket(rtcp_packet); + + absl::optional<TimeDelta> rtt = rtp_rtcp_->LastRtt(); + if (!rtt.has_value()) { + // Waiting for valid RTT. + return; + } + + absl::optional<RtpRtcpInterface::SenderReportStats> last_sr = + rtp_rtcp_->GetSenderReportStats(); + if (!last_sr.has_value()) { + // Waiting for RTCP. + return; + } + + { + MutexLock lock(&lock_); + ntp_estimator_.UpdateRtcpTimestamp(*rtt, last_sr->last_remote_timestamp, + last_sr->last_remote_rtp_timestamp); + } +} + +ChannelStatistics AudioIngress::GetChannelStatistics() { + ChannelStatistics channel_stats; + + // Get clockrate for current decoder ahead of jitter calculation. + uint32_t clockrate_hz = 0; + absl::optional<std::pair<int, SdpAudioFormat>> decoder = + acm_receiver_.LastDecoder(); + if (decoder) { + clockrate_hz = decoder->second.clockrate_hz; + } + + StreamStatistician* statistician = + rtp_receive_statistics_->GetStatistician(remote_ssrc_); + if (statistician) { + RtpReceiveStats stats = statistician->GetStats(); + channel_stats.packets_lost = stats.packets_lost; + channel_stats.packets_received = stats.packet_counter.packets; + channel_stats.bytes_received = stats.packet_counter.payload_bytes; + channel_stats.remote_ssrc = remote_ssrc_; + if (clockrate_hz > 0) { + channel_stats.jitter = static_cast<double>(stats.jitter) / clockrate_hz; + } + } + + // Get RTCP report using remote SSRC. + const std::vector<ReportBlockData>& report_data = + rtp_rtcp_->GetLatestReportBlockData(); + for (const ReportBlockData& rtcp_report : report_data) { + if (rtp_rtcp_->SSRC() != rtcp_report.source_ssrc() || + remote_ssrc_ != rtcp_report.sender_ssrc()) { + continue; + } + RemoteRtcpStatistics remote_stat; + remote_stat.packets_lost = rtcp_report.cumulative_lost(); + remote_stat.fraction_lost = rtcp_report.fraction_lost(); + if (clockrate_hz > 0) { + remote_stat.jitter = rtcp_report.jitter(clockrate_hz).seconds<double>(); + } + if (rtcp_report.has_rtt()) { + remote_stat.round_trip_time = rtcp_report.last_rtt().seconds<double>(); + } + remote_stat.last_report_received_timestamp_ms = + rtcp_report.report_block_timestamp_utc().ms(); + channel_stats.remote_rtcp = remote_stat; + + // Receive only channel won't send any RTP packets. + if (!channel_stats.remote_ssrc.has_value()) { + channel_stats.remote_ssrc = remote_ssrc_; + } + break; + } + + return channel_stats; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/voip/audio_ingress.h b/third_party/libwebrtc/audio/voip/audio_ingress.h new file mode 100644 index 0000000000..11bde7ce28 --- /dev/null +++ b/third_party/libwebrtc/audio/voip/audio_ingress.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_VOIP_AUDIO_INGRESS_H_ +#define AUDIO_VOIP_AUDIO_INGRESS_H_ + +#include <algorithm> +#include <atomic> +#include <map> +#include <memory> +#include <utility> + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/audio_mixer.h" +#include "api/rtp_headers.h" +#include "api/scoped_refptr.h" +#include "api/voip/voip_statistics.h" +#include "audio/audio_level.h" +#include "modules/audio_coding/acm2/acm_receiver.h" +#include "modules/audio_coding/include/audio_coding_module.h" +#include "modules/rtp_rtcp/include/receive_statistics.h" +#include "modules/rtp_rtcp/include/remote_ntp_time_estimator.h" +#include "modules/rtp_rtcp/source/rtp_packet_received.h" +#include "modules/rtp_rtcp/source/rtp_rtcp_interface.h" +#include "rtc_base/numerics/sequence_number_unwrapper.h" +#include "rtc_base/synchronization/mutex.h" + +namespace webrtc { + +// AudioIngress handles incoming RTP/RTCP packets from the remote +// media endpoint. Received RTP packets are injected into AcmReceiver and +// when audio output thread requests for audio samples to play through system +// output such as speaker device, AudioIngress provides the samples via its +// implementation on AudioMixer::Source interface. +// +// Note that this class is originally based on ChannelReceive in +// audio/channel_receive.cc with non-audio related logic trimmed as aimed for +// smaller footprint. +class AudioIngress : public AudioMixer::Source { + public: + AudioIngress(RtpRtcpInterface* rtp_rtcp, + Clock* clock, + ReceiveStatistics* receive_statistics, + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory); + ~AudioIngress() override; + + // Start or stop receiving operation of AudioIngress. + bool StartPlay(); + void StopPlay() { + playing_ = false; + output_audio_level_.ResetLevelFullRange(); + } + + // Query the state of the AudioIngress. + bool IsPlaying() const { return playing_; } + + // Set the decoder formats and payload type for AcmReceiver where the + // key type (int) of the map is the payload type of SdpAudioFormat. + void SetReceiveCodecs(const std::map<int, SdpAudioFormat>& codecs); + + // APIs to handle received RTP/RTCP packets from caller. + void ReceivedRTPPacket(rtc::ArrayView<const uint8_t> rtp_packet); + void ReceivedRTCPPacket(rtc::ArrayView<const uint8_t> rtcp_packet); + + // See comments on LevelFullRange, TotalEnergy, TotalDuration from + // audio/audio_level.h. + int GetOutputAudioLevel() const { + return output_audio_level_.LevelFullRange(); + } + double GetOutputTotalEnergy() { return output_audio_level_.TotalEnergy(); } + double GetOutputTotalDuration() { + return output_audio_level_.TotalDuration(); + } + + NetworkStatistics GetNetworkStatistics() const { + NetworkStatistics stats; + acm_receiver_.GetNetworkStatistics(&stats, + /*get_and_clear_legacy_stats=*/false); + return stats; + } + + ChannelStatistics GetChannelStatistics(); + + // Implementation of AudioMixer::Source interface. + AudioMixer::Source::AudioFrameInfo GetAudioFrameWithInfo( + int sampling_rate, + AudioFrame* audio_frame) override; + int Ssrc() const override { + return rtc::dchecked_cast<int>(remote_ssrc_.load()); + } + int PreferredSampleRate() const override { + // If we haven't received any RTP packet from remote and thus + // last_packet_sampling_rate is not available then use NetEq's sampling + // rate as that would be what would be used for audio output sample. + return std::max(acm_receiver_.last_packet_sample_rate_hz().value_or(0), + acm_receiver_.last_output_sample_rate_hz()); + } + + private: + // Indicates AudioIngress status as caller invokes Start/StopPlaying. + // If not playing, incoming RTP data processing is skipped, thus + // producing no data to output device. + std::atomic<bool> playing_; + + // Currently active remote ssrc from remote media endpoint. + std::atomic<uint32_t> remote_ssrc_; + + // The first rtp timestamp of the output audio frame that is used to + // calculate elasped time for subsequent audio frames. + std::atomic<int64_t> first_rtp_timestamp_; + + // Synchronizaton is handled internally by ReceiveStatistics. + ReceiveStatistics* const rtp_receive_statistics_; + + // Synchronizaton is handled internally by RtpRtcpInterface. + RtpRtcpInterface* const rtp_rtcp_; + + // Synchronizaton is handled internally by acm2::AcmReceiver. + acm2::AcmReceiver acm_receiver_; + + // Synchronizaton is handled internally by voe::AudioLevel. + voe::AudioLevel output_audio_level_; + + Mutex lock_; + + RemoteNtpTimeEstimator ntp_estimator_ RTC_GUARDED_BY(lock_); + + // For receiving RTP statistics, this tracks the sampling rate value + // per payload type set when caller set via SetReceiveCodecs. + std::map<int, int> receive_codec_info_ RTC_GUARDED_BY(lock_); + + RtpTimestampUnwrapper timestamp_wrap_handler_ RTC_GUARDED_BY(lock_); +}; + +} // namespace webrtc + +#endif // AUDIO_VOIP_AUDIO_INGRESS_H_ diff --git a/third_party/libwebrtc/audio/voip/test/BUILD.gn b/third_party/libwebrtc/audio/voip/test/BUILD.gn new file mode 100644 index 0000000000..00e9bee622 --- /dev/null +++ b/third_party/libwebrtc/audio/voip/test/BUILD.gn @@ -0,0 +1,107 @@ +# Copyright(c) 2020 The WebRTC project authors.All Rights Reserved. +# +# Use of this source code is governed by a BSD - style license +# that can be found in the LICENSE file in the root of the source +# tree.An additional intellectual property rights grant can be found +# in the file PATENTS.All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") + +if (rtc_include_tests) { + rtc_source_set("mock_task_queue") { + testonly = true + visibility = [ "*" ] + sources = [ "mock_task_queue.h" ] + deps = [ + "../../../api/task_queue:task_queue", + "../../../api/task_queue/test:mock_task_queue_base", + "../../../test:test_support", + ] + } + + if (!build_with_chromium) { + rtc_library("voip_core_unittests") { + testonly = true + sources = [ "voip_core_unittest.cc" ] + deps = [ + "..:voip_core", + "../../../api/audio_codecs:builtin_audio_decoder_factory", + "../../../api/audio_codecs:builtin_audio_encoder_factory", + "../../../api/task_queue:default_task_queue_factory", + "../../../modules/audio_device:mock_audio_device", + "../../../modules/audio_processing:mocks", + "../../../test:audio_codec_mocks", + "../../../test:mock_transport", + "../../../test:run_loop", + "../../../test:test_support", + ] + } + } + + rtc_library("audio_channel_unittests") { + testonly = true + sources = [ "audio_channel_unittest.cc" ] + deps = [ + ":mock_task_queue", + "..:audio_channel", + "../../../api:transport_api", + "../../../api/audio_codecs:builtin_audio_decoder_factory", + "../../../api/audio_codecs:builtin_audio_encoder_factory", + "../../../api/task_queue:task_queue", + "../../../modules/audio_mixer:audio_mixer_impl", + "../../../modules/audio_mixer:audio_mixer_test_utils", + "../../../modules/rtp_rtcp:rtp_rtcp", + "../../../modules/rtp_rtcp:rtp_rtcp_format", + "../../../rtc_base:logging", + "../../../test:mock_transport", + "../../../test:test_support", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/functional:any_invocable" ] + } + + rtc_library("audio_ingress_unittests") { + testonly = true + sources = [ "audio_ingress_unittest.cc" ] + deps = [ + "..:audio_egress", + "..:audio_ingress", + "../../../api:transport_api", + "../../../api/audio_codecs:builtin_audio_decoder_factory", + "../../../api/audio_codecs:builtin_audio_encoder_factory", + "../../../api/task_queue:default_task_queue_factory", + "../../../api/units:time_delta", + "../../../api/units:timestamp", + "../../../modules/audio_mixer:audio_mixer_test_utils", + "../../../modules/rtp_rtcp:rtp_rtcp", + "../../../rtc_base:logging", + "../../../rtc_base:rtc_event", + "../../../test:mock_transport", + "../../../test:run_loop", + "../../../test:test_support", + "../../../test/time_controller:time_controller", + ] + } + + rtc_library("audio_egress_unittests") { + testonly = true + sources = [ "audio_egress_unittest.cc" ] + deps = [ + "..:audio_egress", + "../../../api:transport_api", + "../../../api/audio_codecs:builtin_audio_encoder_factory", + "../../../api/task_queue:default_task_queue_factory", + "../../../api/units:time_delta", + "../../../api/units:timestamp", + "../../../modules/audio_mixer:audio_mixer_test_utils", + "../../../modules/rtp_rtcp:rtp_rtcp", + "../../../modules/rtp_rtcp:rtp_rtcp_format", + "../../../rtc_base:logging", + "../../../rtc_base:rtc_event", + "../../../test:mock_transport", + "../../../test:run_loop", + "../../../test:test_support", + "../../../test/time_controller:time_controller", + ] + } +} diff --git a/third_party/libwebrtc/audio/voip/test/audio_channel_unittest.cc b/third_party/libwebrtc/audio/voip/test/audio_channel_unittest.cc new file mode 100644 index 0000000000..0c8312b738 --- /dev/null +++ b/third_party/libwebrtc/audio/voip/test/audio_channel_unittest.cc @@ -0,0 +1,355 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/voip/audio_channel.h" + +#include "absl/functional/any_invocable.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/audio_codecs/builtin_audio_encoder_factory.h" +#include "api/call/transport.h" +#include "api/task_queue/task_queue_base.h" +#include "api/task_queue/task_queue_factory.h" +#include "audio/voip/test/mock_task_queue.h" +#include "modules/audio_mixer/audio_mixer_impl.h" +#include "modules/audio_mixer/sine_wave_generator.h" +#include "modules/rtp_rtcp/source/rtp_packet_received.h" +#include "rtc_base/logging.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/mock_transport.h" + +namespace webrtc { +namespace { + +using ::testing::Invoke; +using ::testing::NiceMock; +using ::testing::Return; +using ::testing::Unused; +using ::testing::WithArg; + +constexpr uint64_t kStartTime = 123456789; +constexpr uint32_t kLocalSsrc = 0xdeadc0de; +constexpr int16_t kAudioLevel = 3004; // used for sine wave level +constexpr int kPcmuPayload = 0; + +class AudioChannelTest : public ::testing::Test { + public: + const SdpAudioFormat kPcmuFormat = {"pcmu", 8000, 1}; + + AudioChannelTest() + : fake_clock_(kStartTime), wave_generator_(1000.0, kAudioLevel) { + task_queue_factory_ = std::make_unique<MockTaskQueueFactory>(&task_queue_); + audio_mixer_ = AudioMixerImpl::Create(); + encoder_factory_ = CreateBuiltinAudioEncoderFactory(); + decoder_factory_ = CreateBuiltinAudioDecoderFactory(); + + // By default, run the queued task immediately. + ON_CALL(task_queue_, PostTaskImpl) + .WillByDefault(WithArg<0>( + [](absl::AnyInvocable<void() &&> task) { std::move(task)(); })); + } + + void SetUp() override { audio_channel_ = CreateAudioChannel(kLocalSsrc); } + + void TearDown() override { audio_channel_ = nullptr; } + + rtc::scoped_refptr<AudioChannel> CreateAudioChannel(uint32_t ssrc) { + // Use same audio mixer here for simplicity sake as we are not checking + // audio activity of RTP in our testcases. If we need to do test on audio + // signal activity then we need to assign audio mixer for each channel. + // Also this uses the same transport object for different audio channel to + // simplify network routing logic. + rtc::scoped_refptr<AudioChannel> audio_channel = + rtc::make_ref_counted<AudioChannel>( + &transport_, ssrc, task_queue_factory_.get(), audio_mixer_.get(), + decoder_factory_); + audio_channel->SetEncoder(kPcmuPayload, kPcmuFormat, + encoder_factory_->MakeAudioEncoder( + kPcmuPayload, kPcmuFormat, absl::nullopt)); + audio_channel->SetReceiveCodecs({{kPcmuPayload, kPcmuFormat}}); + audio_channel->StartSend(); + audio_channel->StartPlay(); + return audio_channel; + } + + std::unique_ptr<AudioFrame> GetAudioFrame(int order) { + auto frame = std::make_unique<AudioFrame>(); + frame->sample_rate_hz_ = kPcmuFormat.clockrate_hz; + frame->samples_per_channel_ = kPcmuFormat.clockrate_hz / 100; // 10 ms. + frame->num_channels_ = kPcmuFormat.num_channels; + frame->timestamp_ = frame->samples_per_channel_ * order; + wave_generator_.GenerateNextFrame(frame.get()); + return frame; + } + + SimulatedClock fake_clock_; + SineWaveGenerator wave_generator_; + NiceMock<MockTransport> transport_; + NiceMock<MockTaskQueue> task_queue_; + std::unique_ptr<TaskQueueFactory> task_queue_factory_; + rtc::scoped_refptr<AudioMixer> audio_mixer_; + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory_; + rtc::scoped_refptr<AudioEncoderFactory> encoder_factory_; + rtc::scoped_refptr<AudioChannel> audio_channel_; +}; + +// Validate RTP packet generation by feeding audio frames with sine wave. +// Resulted RTP packet is looped back into AudioChannel and gets decoded into +// audio frame to see if it has some signal to indicate its validity. +TEST_F(AudioChannelTest, PlayRtpByLocalLoop) { + auto loop_rtp = [&](rtc::ArrayView<const uint8_t> packet, Unused) { + audio_channel_->ReceivedRTPPacket(packet); + return true; + }; + EXPECT_CALL(transport_, SendRtp).WillOnce(Invoke(loop_rtp)); + + auto audio_sender = audio_channel_->GetAudioSender(); + audio_sender->SendAudioData(GetAudioFrame(0)); + audio_sender->SendAudioData(GetAudioFrame(1)); + + AudioFrame empty_frame, audio_frame; + empty_frame.Mute(); + empty_frame.mutable_data(); // This will zero out the data. + audio_frame.CopyFrom(empty_frame); + audio_mixer_->Mix(/*number_of_channels*/ 1, &audio_frame); + + // We expect now audio frame to pick up something. + EXPECT_NE(memcmp(empty_frame.data(), audio_frame.data(), + AudioFrame::kMaxDataSizeBytes), + 0); +} + +// Validate assigned local SSRC is resulted in RTP packet. +TEST_F(AudioChannelTest, VerifyLocalSsrcAsAssigned) { + RtpPacketReceived rtp; + auto loop_rtp = [&](rtc::ArrayView<const uint8_t> packet, Unused) { + rtp.Parse(packet); + return true; + }; + EXPECT_CALL(transport_, SendRtp).WillOnce(Invoke(loop_rtp)); + + auto audio_sender = audio_channel_->GetAudioSender(); + audio_sender->SendAudioData(GetAudioFrame(0)); + audio_sender->SendAudioData(GetAudioFrame(1)); + + EXPECT_EQ(rtp.Ssrc(), kLocalSsrc); +} + +// Check metrics after processing an RTP packet. +TEST_F(AudioChannelTest, TestIngressStatistics) { + auto loop_rtp = [&](rtc::ArrayView<const uint8_t> packet, Unused) { + audio_channel_->ReceivedRTPPacket(packet); + return true; + }; + EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(loop_rtp)); + + auto audio_sender = audio_channel_->GetAudioSender(); + audio_sender->SendAudioData(GetAudioFrame(0)); + audio_sender->SendAudioData(GetAudioFrame(1)); + + AudioFrame audio_frame; + audio_mixer_->Mix(/*number_of_channels=*/1, &audio_frame); + audio_mixer_->Mix(/*number_of_channels=*/1, &audio_frame); + + absl::optional<IngressStatistics> ingress_stats = + audio_channel_->GetIngressStatistics(); + EXPECT_TRUE(ingress_stats); + EXPECT_EQ(ingress_stats->neteq_stats.total_samples_received, 160ULL); + EXPECT_EQ(ingress_stats->neteq_stats.concealed_samples, 0ULL); + EXPECT_EQ(ingress_stats->neteq_stats.concealment_events, 0ULL); + EXPECT_EQ(ingress_stats->neteq_stats.inserted_samples_for_deceleration, 0ULL); + EXPECT_EQ(ingress_stats->neteq_stats.removed_samples_for_acceleration, 0ULL); + EXPECT_EQ(ingress_stats->neteq_stats.silent_concealed_samples, 0ULL); + // To extract the jitter buffer length in millisecond, jitter_buffer_delay_ms + // needs to be divided by jitter_buffer_emitted_count (number of samples). + EXPECT_EQ(ingress_stats->neteq_stats.jitter_buffer_delay_ms, 1600ULL); + EXPECT_EQ(ingress_stats->neteq_stats.jitter_buffer_emitted_count, 160ULL); + EXPECT_GT(ingress_stats->neteq_stats.jitter_buffer_target_delay_ms, 0ULL); + EXPECT_EQ(ingress_stats->neteq_stats.interruption_count, 0); + EXPECT_EQ(ingress_stats->neteq_stats.total_interruption_duration_ms, 0); + EXPECT_DOUBLE_EQ(ingress_stats->total_duration, 0.02); + + // Now without any RTP pending in jitter buffer pull more. + audio_mixer_->Mix(/*number_of_channels=*/1, &audio_frame); + audio_mixer_->Mix(/*number_of_channels=*/1, &audio_frame); + + // Send another RTP packet to intentionally break PLC. + audio_sender->SendAudioData(GetAudioFrame(2)); + audio_sender->SendAudioData(GetAudioFrame(3)); + + ingress_stats = audio_channel_->GetIngressStatistics(); + EXPECT_TRUE(ingress_stats); + EXPECT_EQ(ingress_stats->neteq_stats.total_samples_received, 320ULL); + EXPECT_EQ(ingress_stats->neteq_stats.concealed_samples, 168ULL); + EXPECT_EQ(ingress_stats->neteq_stats.concealment_events, 1ULL); + EXPECT_EQ(ingress_stats->neteq_stats.inserted_samples_for_deceleration, 0ULL); + EXPECT_EQ(ingress_stats->neteq_stats.removed_samples_for_acceleration, 0ULL); + EXPECT_EQ(ingress_stats->neteq_stats.silent_concealed_samples, 0ULL); + EXPECT_EQ(ingress_stats->neteq_stats.jitter_buffer_delay_ms, 1600ULL); + EXPECT_EQ(ingress_stats->neteq_stats.jitter_buffer_emitted_count, 160ULL); + EXPECT_GT(ingress_stats->neteq_stats.jitter_buffer_target_delay_ms, 0ULL); + EXPECT_EQ(ingress_stats->neteq_stats.interruption_count, 0); + EXPECT_EQ(ingress_stats->neteq_stats.total_interruption_duration_ms, 0); + EXPECT_DOUBLE_EQ(ingress_stats->total_duration, 0.04); + + // Pull the last RTP packet. + audio_mixer_->Mix(/*number_of_channels=*/1, &audio_frame); + audio_mixer_->Mix(/*number_of_channels=*/1, &audio_frame); + + ingress_stats = audio_channel_->GetIngressStatistics(); + EXPECT_TRUE(ingress_stats); + EXPECT_EQ(ingress_stats->neteq_stats.total_samples_received, 480ULL); + EXPECT_EQ(ingress_stats->neteq_stats.concealed_samples, 168ULL); + EXPECT_EQ(ingress_stats->neteq_stats.concealment_events, 1ULL); + EXPECT_EQ(ingress_stats->neteq_stats.inserted_samples_for_deceleration, 0ULL); + EXPECT_EQ(ingress_stats->neteq_stats.removed_samples_for_acceleration, 0ULL); + EXPECT_EQ(ingress_stats->neteq_stats.silent_concealed_samples, 0ULL); + EXPECT_EQ(ingress_stats->neteq_stats.jitter_buffer_delay_ms, 3200ULL); + EXPECT_EQ(ingress_stats->neteq_stats.jitter_buffer_emitted_count, 320ULL); + EXPECT_GT(ingress_stats->neteq_stats.jitter_buffer_target_delay_ms, 0ULL); + EXPECT_EQ(ingress_stats->neteq_stats.interruption_count, 0); + EXPECT_EQ(ingress_stats->neteq_stats.total_interruption_duration_ms, 0); + EXPECT_DOUBLE_EQ(ingress_stats->total_duration, 0.06); +} + +// Check ChannelStatistics metric after processing RTP and RTCP packets. +TEST_F(AudioChannelTest, TestChannelStatistics) { + auto loop_rtp = [&](rtc::ArrayView<const uint8_t> packet, Unused) { + audio_channel_->ReceivedRTPPacket(packet); + return true; + }; + auto loop_rtcp = [&](rtc::ArrayView<const uint8_t> packet) { + audio_channel_->ReceivedRTCPPacket(packet); + return true; + }; + EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(loop_rtp)); + EXPECT_CALL(transport_, SendRtcp).WillRepeatedly(Invoke(loop_rtcp)); + + // Simulate microphone giving audio frame (10 ms). This will trigger transport + // to send RTP as handled in loop_rtp above. + auto audio_sender = audio_channel_->GetAudioSender(); + audio_sender->SendAudioData(GetAudioFrame(0)); + audio_sender->SendAudioData(GetAudioFrame(1)); + + // Simulate speaker requesting audio frame (10 ms). This will trigger VoIP + // engine to fetch audio samples from RTP packets stored in jitter buffer. + AudioFrame audio_frame; + audio_mixer_->Mix(/*number_of_channels=*/1, &audio_frame); + audio_mixer_->Mix(/*number_of_channels=*/1, &audio_frame); + + // Force sending RTCP SR report in order to have remote_rtcp field available + // in channel statistics. This will trigger transport to send RTCP as handled + // in loop_rtcp above. + audio_channel_->SendRTCPReportForTesting(kRtcpSr); + + absl::optional<ChannelStatistics> channel_stats = + audio_channel_->GetChannelStatistics(); + EXPECT_TRUE(channel_stats); + + EXPECT_EQ(channel_stats->packets_sent, 1ULL); + EXPECT_EQ(channel_stats->bytes_sent, 160ULL); + + EXPECT_EQ(channel_stats->packets_received, 1ULL); + EXPECT_EQ(channel_stats->bytes_received, 160ULL); + EXPECT_EQ(channel_stats->jitter, 0); + EXPECT_EQ(channel_stats->packets_lost, 0); + EXPECT_EQ(channel_stats->remote_ssrc.value(), kLocalSsrc); + + EXPECT_TRUE(channel_stats->remote_rtcp.has_value()); + + EXPECT_EQ(channel_stats->remote_rtcp->jitter, 0); + EXPECT_EQ(channel_stats->remote_rtcp->packets_lost, 0); + EXPECT_EQ(channel_stats->remote_rtcp->fraction_lost, 0); + EXPECT_GT(channel_stats->remote_rtcp->last_report_received_timestamp_ms, 0); + EXPECT_FALSE(channel_stats->remote_rtcp->round_trip_time.has_value()); +} + +// Check ChannelStatistics RTT metric after processing RTP and RTCP packets +// using three audio channels where each represents media endpoint. +// +// 1) AC1 <- RTP/RTCP -> AC2 +// 2) AC1 <- RTP/RTCP -> AC3 +// +// During step 1), AC1 should be able to check RTT from AC2's SSRC. +// During step 2), AC1 should be able to check RTT from AC3's SSRC. +TEST_F(AudioChannelTest, RttIsAvailableAfterChangeOfRemoteSsrc) { + // Create AC2 and AC3. + constexpr uint32_t kAc2Ssrc = 0xdeadbeef; + constexpr uint32_t kAc3Ssrc = 0xdeafbeef; + + auto ac_2 = CreateAudioChannel(kAc2Ssrc); + auto ac_3 = CreateAudioChannel(kAc3Ssrc); + + auto send_recv_rtp = [&](rtc::scoped_refptr<AudioChannel> rtp_sender, + rtc::scoped_refptr<AudioChannel> rtp_receiver) { + // Setup routing logic via transport_. + auto route_rtp = [&](rtc::ArrayView<const uint8_t> packet, Unused) { + rtp_receiver->ReceivedRTPPacket(packet); + return true; + }; + ON_CALL(transport_, SendRtp).WillByDefault(route_rtp); + + // This will trigger route_rtp callback via transport_. + rtp_sender->GetAudioSender()->SendAudioData(GetAudioFrame(0)); + rtp_sender->GetAudioSender()->SendAudioData(GetAudioFrame(1)); + + // Process received RTP in receiver. + AudioFrame audio_frame; + audio_mixer_->Mix(/*number_of_channels=*/1, &audio_frame); + audio_mixer_->Mix(/*number_of_channels=*/1, &audio_frame); + + // Revert to default to avoid using reference in route_rtp lambda. + ON_CALL(transport_, SendRtp).WillByDefault(Return(true)); + }; + + auto send_recv_rtcp = [&](rtc::scoped_refptr<AudioChannel> rtcp_sender, + rtc::scoped_refptr<AudioChannel> rtcp_receiver) { + // Setup routing logic via transport_. + auto route_rtcp = [&](rtc::ArrayView<const uint8_t> packet) { + rtcp_receiver->ReceivedRTCPPacket(packet); + return true; + }; + ON_CALL(transport_, SendRtcp).WillByDefault(route_rtcp); + + // This will trigger route_rtcp callback via transport_. + rtcp_sender->SendRTCPReportForTesting(kRtcpSr); + + // Revert to default to avoid using reference in route_rtcp lambda. + ON_CALL(transport_, SendRtcp).WillByDefault(Return(true)); + }; + + // AC1 <-- RTP/RTCP --> AC2 + send_recv_rtp(audio_channel_, ac_2); + send_recv_rtp(ac_2, audio_channel_); + send_recv_rtcp(audio_channel_, ac_2); + send_recv_rtcp(ac_2, audio_channel_); + + absl::optional<ChannelStatistics> channel_stats = + audio_channel_->GetChannelStatistics(); + ASSERT_TRUE(channel_stats); + EXPECT_EQ(channel_stats->remote_ssrc, kAc2Ssrc); + ASSERT_TRUE(channel_stats->remote_rtcp); + EXPECT_GT(channel_stats->remote_rtcp->round_trip_time, 0.0); + + // AC1 <-- RTP/RTCP --> AC3 + send_recv_rtp(audio_channel_, ac_3); + send_recv_rtp(ac_3, audio_channel_); + send_recv_rtcp(audio_channel_, ac_3); + send_recv_rtcp(ac_3, audio_channel_); + + channel_stats = audio_channel_->GetChannelStatistics(); + ASSERT_TRUE(channel_stats); + EXPECT_EQ(channel_stats->remote_ssrc, kAc3Ssrc); + ASSERT_TRUE(channel_stats->remote_rtcp); + EXPECT_GT(channel_stats->remote_rtcp->round_trip_time, 0.0); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/voip/test/audio_egress_unittest.cc b/third_party/libwebrtc/audio/voip/test/audio_egress_unittest.cc new file mode 100644 index 0000000000..83df26eef1 --- /dev/null +++ b/third_party/libwebrtc/audio/voip/test/audio_egress_unittest.cc @@ -0,0 +1,326 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/voip/audio_egress.h" + +#include "api/audio_codecs/builtin_audio_encoder_factory.h" +#include "api/call/transport.h" +#include "api/task_queue/default_task_queue_factory.h" +#include "api/units/time_delta.h" +#include "api/units/timestamp.h" +#include "modules/audio_mixer/sine_wave_generator.h" +#include "modules/rtp_rtcp/source/rtp_packet_received.h" +#include "modules/rtp_rtcp/source/rtp_rtcp_impl2.h" +#include "rtc_base/event.h" +#include "rtc_base/logging.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/mock_transport.h" +#include "test/run_loop.h" +#include "test/time_controller/simulated_time_controller.h" + +namespace webrtc { +namespace { + +using ::testing::Invoke; +using ::testing::NiceMock; +using ::testing::Unused; + +std::unique_ptr<ModuleRtpRtcpImpl2> CreateRtpStack(Clock* clock, + Transport* transport, + uint32_t remote_ssrc) { + RtpRtcpInterface::Configuration rtp_config; + rtp_config.clock = clock; + rtp_config.audio = true; + rtp_config.rtcp_report_interval_ms = 5000; + rtp_config.outgoing_transport = transport; + rtp_config.local_media_ssrc = remote_ssrc; + auto rtp_rtcp = ModuleRtpRtcpImpl2::Create(rtp_config); + rtp_rtcp->SetSendingMediaStatus(false); + rtp_rtcp->SetRTCPStatus(RtcpMode::kCompound); + return rtp_rtcp; +} + +constexpr int16_t kAudioLevel = 3004; // Used for sine wave level. + +// AudioEgressTest configures audio egress by using Rtp Stack, fake clock, +// and task queue factory. Encoder factory is needed to create codec and +// configure the RTP stack in audio egress. +class AudioEgressTest : public ::testing::Test { + public: + static constexpr uint16_t kSeqNum = 12345; + static constexpr uint64_t kStartTime = 123456789; + static constexpr uint32_t kRemoteSsrc = 0xDEADBEEF; + const SdpAudioFormat kPcmuFormat = {"pcmu", 8000, 1}; + + AudioEgressTest() : wave_generator_(1000.0, kAudioLevel) { + encoder_factory_ = CreateBuiltinAudioEncoderFactory(); + } + + // Prepare test on audio egress by using PCMu codec with specific + // sequence number and its status to be running. + void SetUp() override { + rtp_rtcp_ = + CreateRtpStack(time_controller_.GetClock(), &transport_, kRemoteSsrc); + egress_ = std::make_unique<AudioEgress>( + rtp_rtcp_.get(), time_controller_.GetClock(), + time_controller_.GetTaskQueueFactory()); + constexpr int kPcmuPayload = 0; + egress_->SetEncoder(kPcmuPayload, kPcmuFormat, + encoder_factory_->MakeAudioEncoder( + kPcmuPayload, kPcmuFormat, absl::nullopt)); + egress_->StartSend(); + rtp_rtcp_->SetSequenceNumber(kSeqNum); + rtp_rtcp_->SetSendingStatus(true); + } + + // Make sure we have shut down rtp stack and reset egress for each test. + void TearDown() override { + egress_->StopSend(); + rtp_rtcp_->SetSendingStatus(false); + egress_.reset(); + rtp_rtcp_.reset(); + } + + // Create an audio frame prepared for pcmu encoding. Timestamp is + // increased per RTP specification which is the number of samples it contains. + // Wave generator writes sine wave which has expected high level set + // by kAudioLevel. + std::unique_ptr<AudioFrame> GetAudioFrame(int order) { + auto frame = std::make_unique<AudioFrame>(); + frame->sample_rate_hz_ = kPcmuFormat.clockrate_hz; + frame->samples_per_channel_ = kPcmuFormat.clockrate_hz / 100; // 10 ms. + frame->num_channels_ = kPcmuFormat.num_channels; + frame->timestamp_ = frame->samples_per_channel_ * order; + wave_generator_.GenerateNextFrame(frame.get()); + return frame; + } + + GlobalSimulatedTimeController time_controller_{Timestamp::Micros(kStartTime)}; + NiceMock<MockTransport> transport_; + SineWaveGenerator wave_generator_; + std::unique_ptr<ModuleRtpRtcpImpl2> rtp_rtcp_; + rtc::scoped_refptr<AudioEncoderFactory> encoder_factory_; + std::unique_ptr<AudioEgress> egress_; +}; + +TEST_F(AudioEgressTest, SendingStatusAfterStartAndStop) { + EXPECT_TRUE(egress_->IsSending()); + egress_->StopSend(); + EXPECT_FALSE(egress_->IsSending()); +} + +TEST_F(AudioEgressTest, ProcessAudioWithMute) { + constexpr int kExpected = 10; + rtc::Event event; + int rtp_count = 0; + RtpPacketReceived rtp; + auto rtp_sent = [&](rtc::ArrayView<const uint8_t> packet, Unused) { + rtp.Parse(packet); + if (++rtp_count == kExpected) { + event.Set(); + } + return true; + }; + + EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(rtp_sent)); + + egress_->SetMute(true); + + // Two 10 ms audio frames will result in rtp packet with ptime 20. + for (size_t i = 0; i < kExpected * 2; i++) { + egress_->SendAudioData(GetAudioFrame(i)); + time_controller_.AdvanceTime(TimeDelta::Millis(10)); + } + + event.Wait(TimeDelta::Seconds(1)); + EXPECT_EQ(rtp_count, kExpected); + + // we expect on pcmu payload to result in 255 for silenced payload + RTPHeader header; + rtp.GetHeader(&header); + size_t packet_length = rtp.size(); + size_t payload_length = packet_length - header.headerLength; + size_t payload_data_length = payload_length - header.paddingLength; + const uint8_t* payload = rtp.data() + header.headerLength; + for (size_t i = 0; i < payload_data_length; ++i) { + EXPECT_EQ(*payload++, 255); + } +} + +TEST_F(AudioEgressTest, ProcessAudioWithSineWave) { + constexpr int kExpected = 10; + rtc::Event event; + int rtp_count = 0; + RtpPacketReceived rtp; + auto rtp_sent = [&](rtc::ArrayView<const uint8_t> packet, Unused) { + rtp.Parse(packet); + if (++rtp_count == kExpected) { + event.Set(); + } + return true; + }; + + EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(rtp_sent)); + + // Two 10 ms audio frames will result in rtp packet with ptime 20. + for (size_t i = 0; i < kExpected * 2; i++) { + egress_->SendAudioData(GetAudioFrame(i)); + time_controller_.AdvanceTime(TimeDelta::Millis(10)); + } + + event.Wait(TimeDelta::Seconds(1)); + EXPECT_EQ(rtp_count, kExpected); + + // we expect on pcmu to result in < 255 for payload with sine wave + RTPHeader header; + rtp.GetHeader(&header); + size_t packet_length = rtp.size(); + size_t payload_length = packet_length - header.headerLength; + size_t payload_data_length = payload_length - header.paddingLength; + const uint8_t* payload = rtp.data() + header.headerLength; + for (size_t i = 0; i < payload_data_length; ++i) { + EXPECT_NE(*payload++, 255); + } +} + +TEST_F(AudioEgressTest, SkipAudioEncodingAfterStopSend) { + constexpr int kExpected = 10; + rtc::Event event; + int rtp_count = 0; + auto rtp_sent = [&](rtc::ArrayView<const uint8_t> packet, Unused) { + if (++rtp_count == kExpected) { + event.Set(); + } + return true; + }; + + EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(rtp_sent)); + + // Two 10 ms audio frames will result in rtp packet with ptime 20. + for (size_t i = 0; i < kExpected * 2; i++) { + egress_->SendAudioData(GetAudioFrame(i)); + time_controller_.AdvanceTime(TimeDelta::Millis(10)); + } + + event.Wait(TimeDelta::Seconds(1)); + EXPECT_EQ(rtp_count, kExpected); + + // Now stop send and yet feed more data. + egress_->StopSend(); + + // It should be safe to exit the test case while encoder_queue_ has + // outstanding data to process. We are making sure that this doesn't + // result in crashes or sanitizer errors due to remaining data. + for (size_t i = 0; i < kExpected * 2; i++) { + egress_->SendAudioData(GetAudioFrame(i)); + time_controller_.AdvanceTime(TimeDelta::Millis(10)); + } +} + +TEST_F(AudioEgressTest, ChangeEncoderFromPcmuToOpus) { + absl::optional<SdpAudioFormat> pcmu = egress_->GetEncoderFormat(); + EXPECT_TRUE(pcmu); + EXPECT_EQ(pcmu->clockrate_hz, kPcmuFormat.clockrate_hz); + EXPECT_EQ(pcmu->num_channels, kPcmuFormat.num_channels); + + constexpr int kOpusPayload = 120; + const SdpAudioFormat kOpusFormat = {"opus", 48000, 2}; + + egress_->SetEncoder(kOpusPayload, kOpusFormat, + encoder_factory_->MakeAudioEncoder( + kOpusPayload, kOpusFormat, absl::nullopt)); + + absl::optional<SdpAudioFormat> opus = egress_->GetEncoderFormat(); + EXPECT_TRUE(opus); + EXPECT_EQ(opus->clockrate_hz, kOpusFormat.clockrate_hz); + EXPECT_EQ(opus->num_channels, kOpusFormat.num_channels); +} + +TEST_F(AudioEgressTest, SendDTMF) { + constexpr int kExpected = 7; + constexpr int kPayloadType = 100; + constexpr int kDurationMs = 100; + constexpr int kSampleRate = 8000; + constexpr int kEvent = 3; + + egress_->RegisterTelephoneEventType(kPayloadType, kSampleRate); + // 100 ms duration will produce total 7 DTMF + // 1 @ 20 ms, 2 @ 40 ms, 3 @ 60 ms, 4 @ 80 ms + // 5, 6, 7 @ 100 ms (last one sends 3 dtmf) + egress_->SendTelephoneEvent(kEvent, kDurationMs); + + rtc::Event event; + int dtmf_count = 0; + auto is_dtmf = [&](RtpPacketReceived& rtp) { + return (rtp.PayloadType() == kPayloadType && + rtp.SequenceNumber() == kSeqNum + dtmf_count && + rtp.padding_size() == 0 && rtp.Marker() == (dtmf_count == 0) && + rtp.Ssrc() == kRemoteSsrc); + }; + + // It's possible that we may have actual audio RTP packets along with + // DTMF packtets. We are only interested in the exact number of DTMF + // packets rtp stack is emitting. + auto rtp_sent = [&](rtc::ArrayView<const uint8_t> packet, Unused) { + RtpPacketReceived rtp; + rtp.Parse(packet); + if (is_dtmf(rtp) && ++dtmf_count == kExpected) { + event.Set(); + } + return true; + }; + + EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(rtp_sent)); + + // Two 10 ms audio frames will result in rtp packet with ptime 20. + for (size_t i = 0; i < kExpected * 2; i++) { + egress_->SendAudioData(GetAudioFrame(i)); + time_controller_.AdvanceTime(TimeDelta::Millis(10)); + } + + event.Wait(TimeDelta::Seconds(1)); + EXPECT_EQ(dtmf_count, kExpected); +} + +TEST_F(AudioEgressTest, TestAudioInputLevelAndEnergyDuration) { + // Per audio_level's kUpdateFrequency, we need more than 10 audio samples to + // get audio level from input source. + constexpr int kExpected = 6; + rtc::Event event; + int rtp_count = 0; + auto rtp_sent = [&](rtc::ArrayView<const uint8_t> packet, Unused) { + if (++rtp_count == kExpected) { + event.Set(); + } + return true; + }; + + EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(rtp_sent)); + + // Two 10 ms audio frames will result in rtp packet with ptime 20. + for (size_t i = 0; i < kExpected * 2; i++) { + egress_->SendAudioData(GetAudioFrame(i)); + time_controller_.AdvanceTime(TimeDelta::Millis(10)); + } + + event.Wait(/*give_up_after=*/TimeDelta::Seconds(1)); + EXPECT_EQ(rtp_count, kExpected); + + constexpr double kExpectedEnergy = 0.00016809565587789564; + constexpr double kExpectedDuration = 0.11999999999999998; + + EXPECT_EQ(egress_->GetInputAudioLevel(), kAudioLevel); + EXPECT_DOUBLE_EQ(egress_->GetInputTotalEnergy(), kExpectedEnergy); + EXPECT_DOUBLE_EQ(egress_->GetInputTotalDuration(), kExpectedDuration); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/voip/test/audio_ingress_unittest.cc b/third_party/libwebrtc/audio/voip/test/audio_ingress_unittest.cc new file mode 100644 index 0000000000..c7736b247a --- /dev/null +++ b/third_party/libwebrtc/audio/voip/test/audio_ingress_unittest.cc @@ -0,0 +1,240 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/voip/audio_ingress.h" + +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/audio_codecs/builtin_audio_encoder_factory.h" +#include "api/call/transport.h" +#include "api/task_queue/default_task_queue_factory.h" +#include "api/units/time_delta.h" +#include "audio/voip/audio_egress.h" +#include "modules/audio_mixer/sine_wave_generator.h" +#include "modules/rtp_rtcp/source/rtp_rtcp_impl2.h" +#include "rtc_base/event.h" +#include "rtc_base/logging.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/mock_transport.h" +#include "test/run_loop.h" +#include "test/time_controller/simulated_time_controller.h" + +namespace webrtc { +namespace { + +using ::testing::Invoke; +using ::testing::NiceMock; +using ::testing::Unused; + +constexpr int16_t kAudioLevel = 3004; // Used for sine wave level. + +class AudioIngressTest : public ::testing::Test { + public: + const SdpAudioFormat kPcmuFormat = {"pcmu", 8000, 1}; + + AudioIngressTest() : wave_generator_(1000.0, kAudioLevel) { + receive_statistics_ = + ReceiveStatistics::Create(time_controller_.GetClock()); + + RtpRtcpInterface::Configuration rtp_config; + rtp_config.clock = time_controller_.GetClock(); + rtp_config.audio = true; + rtp_config.receive_statistics = receive_statistics_.get(); + rtp_config.rtcp_report_interval_ms = 5000; + rtp_config.outgoing_transport = &transport_; + rtp_config.local_media_ssrc = 0xdeadc0de; + rtp_rtcp_ = ModuleRtpRtcpImpl2::Create(rtp_config); + + rtp_rtcp_->SetSendingMediaStatus(false); + rtp_rtcp_->SetRTCPStatus(RtcpMode::kCompound); + + encoder_factory_ = CreateBuiltinAudioEncoderFactory(); + decoder_factory_ = CreateBuiltinAudioDecoderFactory(); + } + + void SetUp() override { + constexpr int kPcmuPayload = 0; + ingress_ = std::make_unique<AudioIngress>( + rtp_rtcp_.get(), time_controller_.GetClock(), receive_statistics_.get(), + decoder_factory_); + ingress_->SetReceiveCodecs({{kPcmuPayload, kPcmuFormat}}); + + egress_ = std::make_unique<AudioEgress>( + rtp_rtcp_.get(), time_controller_.GetClock(), + time_controller_.GetTaskQueueFactory()); + egress_->SetEncoder(kPcmuPayload, kPcmuFormat, + encoder_factory_->MakeAudioEncoder( + kPcmuPayload, kPcmuFormat, absl::nullopt)); + egress_->StartSend(); + ingress_->StartPlay(); + rtp_rtcp_->SetSendingStatus(true); + } + + void TearDown() override { + rtp_rtcp_->SetSendingStatus(false); + ingress_->StopPlay(); + egress_->StopSend(); + egress_.reset(); + ingress_.reset(); + } + + std::unique_ptr<AudioFrame> GetAudioFrame(int order) { + auto frame = std::make_unique<AudioFrame>(); + frame->sample_rate_hz_ = kPcmuFormat.clockrate_hz; + frame->samples_per_channel_ = kPcmuFormat.clockrate_hz / 100; // 10 ms. + frame->num_channels_ = kPcmuFormat.num_channels; + frame->timestamp_ = frame->samples_per_channel_ * order; + wave_generator_.GenerateNextFrame(frame.get()); + return frame; + } + + GlobalSimulatedTimeController time_controller_{Timestamp::Micros(123456789)}; + SineWaveGenerator wave_generator_; + NiceMock<MockTransport> transport_; + std::unique_ptr<ReceiveStatistics> receive_statistics_; + std::unique_ptr<ModuleRtpRtcpImpl2> rtp_rtcp_; + rtc::scoped_refptr<AudioEncoderFactory> encoder_factory_; + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory_; + std::unique_ptr<AudioIngress> ingress_; + std::unique_ptr<AudioEgress> egress_; +}; + +TEST_F(AudioIngressTest, PlayingAfterStartAndStop) { + EXPECT_EQ(ingress_->IsPlaying(), true); + ingress_->StopPlay(); + EXPECT_EQ(ingress_->IsPlaying(), false); +} + +TEST_F(AudioIngressTest, GetAudioFrameAfterRtpReceived) { + rtc::Event event; + auto handle_rtp = [&](rtc::ArrayView<const uint8_t> packet, Unused) { + ingress_->ReceivedRTPPacket(packet); + event.Set(); + return true; + }; + EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(handle_rtp)); + egress_->SendAudioData(GetAudioFrame(0)); + egress_->SendAudioData(GetAudioFrame(1)); + time_controller_.AdvanceTime(TimeDelta::Zero()); + ASSERT_TRUE(event.Wait(TimeDelta::Seconds(1))); + + AudioFrame audio_frame; + EXPECT_EQ( + ingress_->GetAudioFrameWithInfo(kPcmuFormat.clockrate_hz, &audio_frame), + AudioMixer::Source::AudioFrameInfo::kNormal); + EXPECT_FALSE(audio_frame.muted()); + EXPECT_EQ(audio_frame.num_channels_, 1u); + EXPECT_EQ(audio_frame.samples_per_channel_, + static_cast<size_t>(kPcmuFormat.clockrate_hz / 100)); + EXPECT_EQ(audio_frame.sample_rate_hz_, kPcmuFormat.clockrate_hz); + EXPECT_NE(audio_frame.timestamp_, 0u); + EXPECT_EQ(audio_frame.elapsed_time_ms_, 0); +} + +TEST_F(AudioIngressTest, TestSpeechOutputLevelAndEnergyDuration) { + // Per audio_level's kUpdateFrequency, we need more than 10 audio samples to + // get audio level from output source. + constexpr int kNumRtp = 6; + int rtp_count = 0; + rtc::Event event; + auto handle_rtp = [&](rtc::ArrayView<const uint8_t> packet, Unused) { + ingress_->ReceivedRTPPacket(packet); + if (++rtp_count == kNumRtp) { + event.Set(); + } + return true; + }; + EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(handle_rtp)); + for (int i = 0; i < kNumRtp * 2; i++) { + egress_->SendAudioData(GetAudioFrame(i)); + time_controller_.AdvanceTime(TimeDelta::Millis(10)); + } + event.Wait(/*give_up_after=*/TimeDelta::Seconds(1)); + + for (int i = 0; i < kNumRtp * 2; ++i) { + AudioFrame audio_frame; + EXPECT_EQ( + ingress_->GetAudioFrameWithInfo(kPcmuFormat.clockrate_hz, &audio_frame), + AudioMixer::Source::AudioFrameInfo::kNormal); + } + EXPECT_EQ(ingress_->GetOutputAudioLevel(), kAudioLevel); + + constexpr double kExpectedEnergy = 0.00016809565587789564; + constexpr double kExpectedDuration = 0.11999999999999998; + + EXPECT_DOUBLE_EQ(ingress_->GetOutputTotalEnergy(), kExpectedEnergy); + EXPECT_DOUBLE_EQ(ingress_->GetOutputTotalDuration(), kExpectedDuration); +} + +TEST_F(AudioIngressTest, PreferredSampleRate) { + rtc::Event event; + auto handle_rtp = [&](rtc::ArrayView<const uint8_t> packet, Unused) { + ingress_->ReceivedRTPPacket(packet); + event.Set(); + return true; + }; + EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(handle_rtp)); + egress_->SendAudioData(GetAudioFrame(0)); + egress_->SendAudioData(GetAudioFrame(1)); + time_controller_.AdvanceTime(TimeDelta::Zero()); + ASSERT_TRUE(event.Wait(TimeDelta::Seconds(1))); + + AudioFrame audio_frame; + EXPECT_EQ( + ingress_->GetAudioFrameWithInfo(kPcmuFormat.clockrate_hz, &audio_frame), + AudioMixer::Source::AudioFrameInfo::kNormal); + EXPECT_EQ(ingress_->PreferredSampleRate(), kPcmuFormat.clockrate_hz); +} + +// This test highlights the case where caller invokes StopPlay() which then +// AudioIngress should play silence frame afterwards. +TEST_F(AudioIngressTest, GetMutedAudioFrameAfterRtpReceivedAndStopPlay) { + // StopPlay before we start sending RTP packet with sine wave. + ingress_->StopPlay(); + + // Send 6 RTP packets to generate more than 100 ms audio sample to get + // valid speech level. + constexpr int kNumRtp = 6; + int rtp_count = 0; + rtc::Event event; + auto handle_rtp = [&](rtc::ArrayView<const uint8_t> packet, Unused) { + ingress_->ReceivedRTPPacket(packet); + if (++rtp_count == kNumRtp) { + event.Set(); + } + return true; + }; + EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(handle_rtp)); + for (int i = 0; i < kNumRtp * 2; i++) { + egress_->SendAudioData(GetAudioFrame(i)); + time_controller_.AdvanceTime(TimeDelta::Millis(10)); + } + event.Wait(/*give_up_after=*/TimeDelta::Seconds(1)); + + for (int i = 0; i < kNumRtp * 2; ++i) { + AudioFrame audio_frame; + EXPECT_EQ( + ingress_->GetAudioFrameWithInfo(kPcmuFormat.clockrate_hz, &audio_frame), + AudioMixer::Source::AudioFrameInfo::kMuted); + const int16_t* audio_data = audio_frame.data(); + size_t length = + audio_frame.samples_per_channel_ * audio_frame.num_channels_; + for (size_t j = 0; j < length; ++j) { + EXPECT_EQ(audio_data[j], 0); + } + } + + // Now we should still see valid speech output level as StopPlay won't affect + // the measurement. + EXPECT_EQ(ingress_->GetOutputAudioLevel(), kAudioLevel); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/voip/test/mock_task_queue.h b/third_party/libwebrtc/audio/voip/test/mock_task_queue.h new file mode 100644 index 0000000000..547b0d3f75 --- /dev/null +++ b/third_party/libwebrtc/audio/voip/test/mock_task_queue.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_VOIP_TEST_MOCK_TASK_QUEUE_H_ +#define AUDIO_VOIP_TEST_MOCK_TASK_QUEUE_H_ + +#include <memory> + +#include "api/task_queue/task_queue_factory.h" +#include "api/task_queue/test/mock_task_queue_base.h" +#include "test/gmock.h" + +namespace webrtc { + +// MockTaskQueue enables immediate task run from global TaskQueueBase. +// It's necessary for some tests depending on TaskQueueBase internally. +class MockTaskQueue : public MockTaskQueueBase { + public: + MockTaskQueue() : current_(this) {} + + // Delete is deliberately defined as no-op as MockTaskQueue is expected to + // hold onto current global TaskQueueBase throughout the testing. + void Delete() override {} + + private: + CurrentTaskQueueSetter current_; +}; + +class MockTaskQueueFactory : public TaskQueueFactory { + public: + explicit MockTaskQueueFactory(MockTaskQueue* task_queue) + : task_queue_(task_queue) {} + + std::unique_ptr<TaskQueueBase, TaskQueueDeleter> CreateTaskQueue( + absl::string_view name, + Priority priority) const override { + // Default MockTaskQueue::Delete is no-op, therefore it's safe to pass the + // raw pointer. + return std::unique_ptr<TaskQueueBase, TaskQueueDeleter>(task_queue_); + } + + private: + MockTaskQueue* task_queue_; +}; + +} // namespace webrtc + +#endif // AUDIO_VOIP_TEST_MOCK_TASK_QUEUE_H_ diff --git a/third_party/libwebrtc/audio/voip/test/voip_core_unittest.cc b/third_party/libwebrtc/audio/voip/test/voip_core_unittest.cc new file mode 100644 index 0000000000..b432506b12 --- /dev/null +++ b/third_party/libwebrtc/audio/voip/test/voip_core_unittest.cc @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/voip/voip_core.h" + +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/audio_codecs/builtin_audio_encoder_factory.h" +#include "api/task_queue/default_task_queue_factory.h" +#include "modules/audio_device/include/mock_audio_device.h" +#include "modules/audio_processing/include/mock_audio_processing.h" +#include "test/gtest.h" +#include "test/mock_transport.h" +#include "test/run_loop.h" + +namespace webrtc { +namespace { + +using ::testing::NiceMock; +using ::testing::Return; + +constexpr int kPcmuPayload = 0; +constexpr int kPcmuSampleRateHz = 8000; +constexpr int kDtmfEventDurationMs = 1000; +constexpr DtmfEvent kDtmfEventCode = DtmfEvent::kDigitZero; + +class VoipCoreTest : public ::testing::Test { + public: + const SdpAudioFormat kPcmuFormat = {"pcmu", 8000, 1}; + + VoipCoreTest() { audio_device_ = test::MockAudioDeviceModule::CreateNice(); } + + void SetUp() override { + auto encoder_factory = CreateBuiltinAudioEncoderFactory(); + auto decoder_factory = CreateBuiltinAudioDecoderFactory(); + rtc::scoped_refptr<AudioProcessing> audio_processing = + rtc::make_ref_counted<NiceMock<test::MockAudioProcessing>>(); + + voip_core_ = std::make_unique<VoipCore>( + std::move(encoder_factory), std::move(decoder_factory), + CreateDefaultTaskQueueFactory(), audio_device_, + std::move(audio_processing)); + } + + test::RunLoop run_loop_; + std::unique_ptr<VoipCore> voip_core_; + NiceMock<MockTransport> transport_; + rtc::scoped_refptr<test::MockAudioDeviceModule> audio_device_; +}; + +// Validate expected API calls that involves with VoipCore. Some verification is +// involved with checking mock audio device. +TEST_F(VoipCoreTest, BasicVoipCoreOperation) { + // Program mock as non-operational and ready to start. + EXPECT_CALL(*audio_device_, Recording()).WillOnce(Return(false)); + EXPECT_CALL(*audio_device_, Playing()).WillOnce(Return(false)); + EXPECT_CALL(*audio_device_, InitRecording()).WillOnce(Return(0)); + EXPECT_CALL(*audio_device_, InitPlayout()).WillOnce(Return(0)); + EXPECT_CALL(*audio_device_, StartRecording()).WillOnce(Return(0)); + EXPECT_CALL(*audio_device_, StartPlayout()).WillOnce(Return(0)); + + auto channel = voip_core_->CreateChannel(&transport_, 0xdeadc0de); + + EXPECT_EQ(voip_core_->SetSendCodec(channel, kPcmuPayload, kPcmuFormat), + VoipResult::kOk); + EXPECT_EQ( + voip_core_->SetReceiveCodecs(channel, {{kPcmuPayload, kPcmuFormat}}), + VoipResult::kOk); + + EXPECT_EQ(voip_core_->StartSend(channel), VoipResult::kOk); + EXPECT_EQ(voip_core_->StartPlayout(channel), VoipResult::kOk); + + EXPECT_EQ(voip_core_->RegisterTelephoneEventType(channel, kPcmuPayload, + kPcmuSampleRateHz), + VoipResult::kOk); + + EXPECT_EQ( + voip_core_->SendDtmfEvent(channel, kDtmfEventCode, kDtmfEventDurationMs), + VoipResult::kOk); + + // Program mock as operational that is ready to be stopped. + EXPECT_CALL(*audio_device_, Recording()).WillOnce(Return(true)); + EXPECT_CALL(*audio_device_, Playing()).WillOnce(Return(true)); + EXPECT_CALL(*audio_device_, StopRecording()).WillOnce(Return(0)); + EXPECT_CALL(*audio_device_, StopPlayout()).WillOnce(Return(0)); + + EXPECT_EQ(voip_core_->StopSend(channel), VoipResult::kOk); + EXPECT_EQ(voip_core_->StopPlayout(channel), VoipResult::kOk); + EXPECT_EQ(voip_core_->ReleaseChannel(channel), VoipResult::kOk); +} + +TEST_F(VoipCoreTest, ExpectFailToUseReleasedChannelId) { + auto channel = voip_core_->CreateChannel(&transport_, 0xdeadc0de); + + // Release right after creation. + EXPECT_EQ(voip_core_->ReleaseChannel(channel), VoipResult::kOk); + + // Now use released channel. + + EXPECT_EQ(voip_core_->SetSendCodec(channel, kPcmuPayload, kPcmuFormat), + VoipResult::kInvalidArgument); + EXPECT_EQ( + voip_core_->SetReceiveCodecs(channel, {{kPcmuPayload, kPcmuFormat}}), + VoipResult::kInvalidArgument); + EXPECT_EQ(voip_core_->RegisterTelephoneEventType(channel, kPcmuPayload, + kPcmuSampleRateHz), + VoipResult::kInvalidArgument); + EXPECT_EQ(voip_core_->StartSend(channel), VoipResult::kInvalidArgument); + EXPECT_EQ(voip_core_->StartPlayout(channel), VoipResult::kInvalidArgument); + EXPECT_EQ( + voip_core_->SendDtmfEvent(channel, kDtmfEventCode, kDtmfEventDurationMs), + VoipResult::kInvalidArgument); +} + +TEST_F(VoipCoreTest, SendDtmfEventWithoutRegistering) { + // Program mock as non-operational and ready to start send. + EXPECT_CALL(*audio_device_, Recording()).WillOnce(Return(false)); + EXPECT_CALL(*audio_device_, InitRecording()).WillOnce(Return(0)); + EXPECT_CALL(*audio_device_, StartRecording()).WillOnce(Return(0)); + + auto channel = voip_core_->CreateChannel(&transport_, 0xdeadc0de); + + EXPECT_EQ(voip_core_->SetSendCodec(channel, kPcmuPayload, kPcmuFormat), + VoipResult::kOk); + + EXPECT_EQ(voip_core_->StartSend(channel), VoipResult::kOk); + // Send Dtmf event without registering beforehand, thus payload + // type is not set and kFailedPrecondition is expected. + EXPECT_EQ( + voip_core_->SendDtmfEvent(channel, kDtmfEventCode, kDtmfEventDurationMs), + VoipResult::kFailedPrecondition); + + // Program mock as sending and is ready to be stopped. + EXPECT_CALL(*audio_device_, Recording()).WillOnce(Return(true)); + EXPECT_CALL(*audio_device_, StopRecording()).WillOnce(Return(0)); + + EXPECT_EQ(voip_core_->StopSend(channel), VoipResult::kOk); + EXPECT_EQ(voip_core_->ReleaseChannel(channel), VoipResult::kOk); +} + +TEST_F(VoipCoreTest, SendDtmfEventWithoutStartSend) { + auto channel = voip_core_->CreateChannel(&transport_, 0xdeadc0de); + + EXPECT_EQ(voip_core_->RegisterTelephoneEventType(channel, kPcmuPayload, + kPcmuSampleRateHz), + VoipResult::kOk); + + // Send Dtmf event without calling StartSend beforehand, thus + // Dtmf events cannot be sent and kFailedPrecondition is expected. + EXPECT_EQ( + voip_core_->SendDtmfEvent(channel, kDtmfEventCode, kDtmfEventDurationMs), + VoipResult::kFailedPrecondition); + + EXPECT_EQ(voip_core_->ReleaseChannel(channel), VoipResult::kOk); +} + +TEST_F(VoipCoreTest, StartSendAndPlayoutWithoutSettingCodec) { + auto channel = voip_core_->CreateChannel(&transport_, 0xdeadc0de); + + // Call StartSend and StartPlayout without setting send/receive + // codec. Code should see that codecs aren't set and return false. + EXPECT_EQ(voip_core_->StartSend(channel), VoipResult::kFailedPrecondition); + EXPECT_EQ(voip_core_->StartPlayout(channel), VoipResult::kFailedPrecondition); + + EXPECT_EQ(voip_core_->ReleaseChannel(channel), VoipResult::kOk); +} + +TEST_F(VoipCoreTest, StopSendAndPlayoutWithoutStarting) { + auto channel = voip_core_->CreateChannel(&transport_, 0xdeadc0de); + + EXPECT_EQ(voip_core_->SetSendCodec(channel, kPcmuPayload, kPcmuFormat), + VoipResult::kOk); + EXPECT_EQ( + voip_core_->SetReceiveCodecs(channel, {{kPcmuPayload, kPcmuFormat}}), + VoipResult::kOk); + + // Call StopSend and StopPlayout without starting them in + // the first place. Should see that it is already in the + // stopped state and return true. + EXPECT_EQ(voip_core_->StopSend(channel), VoipResult::kOk); + EXPECT_EQ(voip_core_->StopPlayout(channel), VoipResult::kOk); + + EXPECT_EQ(voip_core_->ReleaseChannel(channel), VoipResult::kOk); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/voip/voip_core.cc b/third_party/libwebrtc/audio/voip/voip_core.cc new file mode 100644 index 0000000000..8df1c594aa --- /dev/null +++ b/third_party/libwebrtc/audio/voip/voip_core.cc @@ -0,0 +1,500 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/voip/voip_core.h" + +#include <algorithm> +#include <memory> +#include <utility> + +#include "api/audio_codecs/audio_format.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +namespace { + +// For Windows, use specific enum type to initialize default audio device as +// defined in AudioDeviceModule::WindowsDeviceType. +#if defined(WEBRTC_WIN) +constexpr AudioDeviceModule::WindowsDeviceType kAudioDeviceId = + AudioDeviceModule::WindowsDeviceType::kDefaultCommunicationDevice; +#else +constexpr uint16_t kAudioDeviceId = 0; +#endif // defined(WEBRTC_WIN) + +// Maximum value range limit on ChannelId. This can be increased without any +// side effect and only set at this moderate value for better readability for +// logging. +static constexpr int kMaxChannelId = 100000; + +} // namespace + +VoipCore::VoipCore(rtc::scoped_refptr<AudioEncoderFactory> encoder_factory, + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory, + std::unique_ptr<TaskQueueFactory> task_queue_factory, + rtc::scoped_refptr<AudioDeviceModule> audio_device_module, + rtc::scoped_refptr<AudioProcessing> audio_processing) { + encoder_factory_ = std::move(encoder_factory); + decoder_factory_ = std::move(decoder_factory); + task_queue_factory_ = std::move(task_queue_factory); + audio_device_module_ = std::move(audio_device_module); + audio_processing_ = std::move(audio_processing); + audio_mixer_ = AudioMixerImpl::Create(); + + // AudioTransportImpl depends on audio mixer and audio processing instances. + audio_transport_ = std::make_unique<AudioTransportImpl>( + audio_mixer_.get(), audio_processing_.get(), nullptr); +} + +bool VoipCore::InitializeIfNeeded() { + // `audio_device_module_` internally owns a lock and the whole logic here + // needs to be executed atomically once using another lock in VoipCore. + // Further changes in this method will need to make sure that no deadlock is + // introduced in the future. + MutexLock lock(&lock_); + + if (initialized_) { + return true; + } + + // Initialize ADM. + if (audio_device_module_->Init() != 0) { + RTC_LOG(LS_ERROR) << "Failed to initialize the ADM."; + return false; + } + + // Note that failures on initializing default recording/speaker devices are + // not considered to be fatal here. In certain case, caller may not care about + // recording device functioning (e.g webinar where only speaker is available). + // It's also possible that there are other audio devices available that may + // work. + + // Initialize default speaker device. + if (audio_device_module_->SetPlayoutDevice(kAudioDeviceId) != 0) { + RTC_LOG(LS_WARNING) << "Unable to set playout device."; + } + if (audio_device_module_->InitSpeaker() != 0) { + RTC_LOG(LS_WARNING) << "Unable to access speaker."; + } + + // Initialize default recording device. + if (audio_device_module_->SetRecordingDevice(kAudioDeviceId) != 0) { + RTC_LOG(LS_WARNING) << "Unable to set recording device."; + } + if (audio_device_module_->InitMicrophone() != 0) { + RTC_LOG(LS_WARNING) << "Unable to access microphone."; + } + + // Set number of channels on speaker device. + bool available = false; + if (audio_device_module_->StereoPlayoutIsAvailable(&available) != 0) { + RTC_LOG(LS_WARNING) << "Unable to query stereo playout."; + } + if (audio_device_module_->SetStereoPlayout(available) != 0) { + RTC_LOG(LS_WARNING) << "Unable to set mono/stereo playout mode."; + } + + // Set number of channels on recording device. + available = false; + if (audio_device_module_->StereoRecordingIsAvailable(&available) != 0) { + RTC_LOG(LS_WARNING) << "Unable to query stereo recording."; + } + if (audio_device_module_->SetStereoRecording(available) != 0) { + RTC_LOG(LS_WARNING) << "Unable to set stereo recording mode."; + } + + if (audio_device_module_->RegisterAudioCallback(audio_transport_.get()) != + 0) { + RTC_LOG(LS_WARNING) << "Unable to register audio callback."; + } + + initialized_ = true; + + return true; +} + +ChannelId VoipCore::CreateChannel(Transport* transport, + absl::optional<uint32_t> local_ssrc) { + ChannelId channel_id; + + // Set local ssrc to random if not set by caller. + if (!local_ssrc) { + Random random(rtc::TimeMicros()); + local_ssrc = random.Rand<uint32_t>(); + } + + rtc::scoped_refptr<AudioChannel> channel = + rtc::make_ref_counted<AudioChannel>(transport, local_ssrc.value(), + task_queue_factory_.get(), + audio_mixer_.get(), decoder_factory_); + + { + MutexLock lock(&lock_); + + channel_id = static_cast<ChannelId>(next_channel_id_); + channels_[channel_id] = channel; + next_channel_id_++; + if (next_channel_id_ >= kMaxChannelId) { + next_channel_id_ = 0; + } + } + + // Set ChannelId in audio channel for logging/debugging purpose. + channel->SetId(channel_id); + + return channel_id; +} + +VoipResult VoipCore::ReleaseChannel(ChannelId channel_id) { + // Destroy channel outside of the lock. + rtc::scoped_refptr<AudioChannel> channel; + + bool no_channels_after_release = false; + + { + MutexLock lock(&lock_); + + auto iter = channels_.find(channel_id); + if (iter != channels_.end()) { + channel = std::move(iter->second); + channels_.erase(iter); + } + + no_channels_after_release = channels_.empty(); + } + + VoipResult status_code = VoipResult::kOk; + if (!channel) { + RTC_LOG(LS_WARNING) << "Channel " << channel_id << " not found"; + status_code = VoipResult::kInvalidArgument; + } + + if (no_channels_after_release) { + // TODO(bugs.webrtc.org/11581): unclear if we still need to clear `channel` + // here. + channel = nullptr; + + // Make sure to stop playout on ADM if it is playing. + if (audio_device_module_->Playing()) { + if (audio_device_module_->StopPlayout() != 0) { + RTC_LOG(LS_WARNING) << "StopPlayout failed"; + status_code = VoipResult::kInternal; + } + } + } + + return status_code; +} + +rtc::scoped_refptr<AudioChannel> VoipCore::GetChannel(ChannelId channel_id) { + rtc::scoped_refptr<AudioChannel> channel; + { + MutexLock lock(&lock_); + auto iter = channels_.find(channel_id); + if (iter != channels_.end()) { + channel = iter->second; + } + } + if (!channel) { + RTC_LOG(LS_ERROR) << "Channel " << channel_id << " not found"; + } + return channel; +} + +bool VoipCore::UpdateAudioTransportWithSenders() { + std::vector<AudioSender*> audio_senders; + + // Gather a list of audio channel that are currently sending along with + // highest sampling rate and channel numbers to configure into audio + // transport. + int max_sampling_rate = 8000; + size_t max_num_channels = 1; + { + MutexLock lock(&lock_); + // Reserve to prevent run time vector re-allocation. + audio_senders.reserve(channels_.size()); + for (auto kv : channels_) { + rtc::scoped_refptr<AudioChannel>& channel = kv.second; + if (channel->IsSendingMedia()) { + auto encoder_format = channel->GetEncoderFormat(); + if (!encoder_format) { + RTC_LOG(LS_ERROR) + << "channel " << channel->GetId() << " encoder is not set"; + continue; + } + audio_senders.push_back(channel->GetAudioSender()); + max_sampling_rate = + std::max(max_sampling_rate, encoder_format->clockrate_hz); + max_num_channels = + std::max(max_num_channels, encoder_format->num_channels); + } + } + } + + audio_transport_->UpdateAudioSenders(audio_senders, max_sampling_rate, + max_num_channels); + + // Depending on availability of senders, turn on or off ADM recording. + if (!audio_senders.empty()) { + // Initialize audio device module and default device if needed. + if (!InitializeIfNeeded()) { + return false; + } + + if (!audio_device_module_->Recording()) { + if (audio_device_module_->InitRecording() != 0) { + RTC_LOG(LS_ERROR) << "InitRecording failed"; + return false; + } + if (audio_device_module_->StartRecording() != 0) { + RTC_LOG(LS_ERROR) << "StartRecording failed"; + return false; + } + } + } else { + if (audio_device_module_->Recording() && + audio_device_module_->StopRecording() != 0) { + RTC_LOG(LS_ERROR) << "StopRecording failed"; + return false; + } + } + return true; +} + +VoipResult VoipCore::StartSend(ChannelId channel_id) { + rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id); + + if (!channel) { + return VoipResult::kInvalidArgument; + } + + if (!channel->StartSend()) { + return VoipResult::kFailedPrecondition; + } + + return UpdateAudioTransportWithSenders() ? VoipResult::kOk + : VoipResult::kInternal; +} + +VoipResult VoipCore::StopSend(ChannelId channel_id) { + rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id); + + if (!channel) { + return VoipResult::kInvalidArgument; + } + + channel->StopSend(); + + return UpdateAudioTransportWithSenders() ? VoipResult::kOk + : VoipResult::kInternal; +} + +VoipResult VoipCore::StartPlayout(ChannelId channel_id) { + rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id); + + if (!channel) { + return VoipResult::kInvalidArgument; + } + + if (channel->IsPlaying()) { + return VoipResult::kOk; + } + + if (!channel->StartPlay()) { + return VoipResult::kFailedPrecondition; + } + + // Initialize audio device module and default device if needed. + if (!InitializeIfNeeded()) { + return VoipResult::kInternal; + } + + if (!audio_device_module_->Playing()) { + if (audio_device_module_->InitPlayout() != 0) { + RTC_LOG(LS_ERROR) << "InitPlayout failed"; + return VoipResult::kInternal; + } + if (audio_device_module_->StartPlayout() != 0) { + RTC_LOG(LS_ERROR) << "StartPlayout failed"; + return VoipResult::kInternal; + } + } + + return VoipResult::kOk; +} + +VoipResult VoipCore::StopPlayout(ChannelId channel_id) { + rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id); + + if (!channel) { + return VoipResult::kInvalidArgument; + } + + channel->StopPlay(); + + return VoipResult::kOk; +} + +VoipResult VoipCore::ReceivedRTPPacket( + ChannelId channel_id, + rtc::ArrayView<const uint8_t> rtp_packet) { + rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id); + + if (!channel) { + return VoipResult::kInvalidArgument; + } + + channel->ReceivedRTPPacket(rtp_packet); + + return VoipResult::kOk; +} + +VoipResult VoipCore::ReceivedRTCPPacket( + ChannelId channel_id, + rtc::ArrayView<const uint8_t> rtcp_packet) { + rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id); + + if (!channel) { + return VoipResult::kInvalidArgument; + } + + channel->ReceivedRTCPPacket(rtcp_packet); + + return VoipResult::kOk; +} + +VoipResult VoipCore::SetSendCodec(ChannelId channel_id, + int payload_type, + const SdpAudioFormat& encoder_format) { + rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id); + + if (!channel) { + return VoipResult::kInvalidArgument; + } + + auto encoder = encoder_factory_->MakeAudioEncoder( + payload_type, encoder_format, absl::nullopt); + channel->SetEncoder(payload_type, encoder_format, std::move(encoder)); + + return VoipResult::kOk; +} + +VoipResult VoipCore::SetReceiveCodecs( + ChannelId channel_id, + const std::map<int, SdpAudioFormat>& decoder_specs) { + rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id); + + if (!channel) { + return VoipResult::kInvalidArgument; + } + + channel->SetReceiveCodecs(decoder_specs); + + return VoipResult::kOk; +} + +VoipResult VoipCore::RegisterTelephoneEventType(ChannelId channel_id, + int rtp_payload_type, + int sample_rate_hz) { + rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id); + + if (!channel) { + return VoipResult::kInvalidArgument; + } + + channel->RegisterTelephoneEventType(rtp_payload_type, sample_rate_hz); + + return VoipResult::kOk; +} + +VoipResult VoipCore::SendDtmfEvent(ChannelId channel_id, + DtmfEvent dtmf_event, + int duration_ms) { + rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id); + + if (!channel) { + return VoipResult::kInvalidArgument; + } + + return (channel->SendTelephoneEvent(static_cast<int>(dtmf_event), duration_ms) + ? VoipResult::kOk + : VoipResult::kFailedPrecondition); +} + +VoipResult VoipCore::GetIngressStatistics(ChannelId channel_id, + IngressStatistics& ingress_stats) { + rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id); + + if (!channel) { + return VoipResult::kInvalidArgument; + } + + ingress_stats = channel->GetIngressStatistics(); + + return VoipResult::kOk; +} + +VoipResult VoipCore::GetChannelStatistics(ChannelId channel_id, + ChannelStatistics& channel_stats) { + rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id); + + if (!channel) { + return VoipResult::kInvalidArgument; + } + + channel_stats = channel->GetChannelStatistics(); + + return VoipResult::kOk; +} + +VoipResult VoipCore::SetInputMuted(ChannelId channel_id, bool enable) { + rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id); + + if (!channel) { + return VoipResult::kInvalidArgument; + } + + channel->SetMute(enable); + + return VoipResult::kOk; +} + +VoipResult VoipCore::GetInputVolumeInfo(ChannelId channel_id, + VolumeInfo& input_volume) { + rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id); + + if (!channel) { + return VoipResult::kInvalidArgument; + } + + input_volume.audio_level = channel->GetInputAudioLevel(); + input_volume.total_energy = channel->GetInputTotalEnergy(); + input_volume.total_duration = channel->GetInputTotalDuration(); + + return VoipResult::kOk; +} + +VoipResult VoipCore::GetOutputVolumeInfo(ChannelId channel_id, + VolumeInfo& output_volume) { + rtc::scoped_refptr<AudioChannel> channel = GetChannel(channel_id); + + if (!channel) { + return VoipResult::kInvalidArgument; + } + + output_volume.audio_level = channel->GetOutputAudioLevel(); + output_volume.total_energy = channel->GetOutputTotalEnergy(); + output_volume.total_duration = channel->GetOutputTotalDuration(); + + return VoipResult::kOk; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/voip/voip_core.h b/third_party/libwebrtc/audio/voip/voip_core.h new file mode 100644 index 0000000000..6c3aec6fa2 --- /dev/null +++ b/third_party/libwebrtc/audio/voip/voip_core.h @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_VOIP_VOIP_CORE_H_ +#define AUDIO_VOIP_VOIP_CORE_H_ + +#include <map> +#include <memory> +#include <queue> +#include <unordered_map> +#include <vector> + +#include "api/audio_codecs/audio_decoder_factory.h" +#include "api/audio_codecs/audio_encoder_factory.h" +#include "api/scoped_refptr.h" +#include "api/task_queue/task_queue_factory.h" +#include "api/voip/voip_base.h" +#include "api/voip/voip_codec.h" +#include "api/voip/voip_dtmf.h" +#include "api/voip/voip_engine.h" +#include "api/voip/voip_network.h" +#include "api/voip/voip_statistics.h" +#include "api/voip/voip_volume_control.h" +#include "audio/audio_transport_impl.h" +#include "audio/voip/audio_channel.h" +#include "modules/audio_device/include/audio_device.h" +#include "modules/audio_mixer/audio_mixer_impl.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/synchronization/mutex.h" + +namespace webrtc { + +// VoipCore is the implementatino of VoIP APIs listed in api/voip directory. +// It manages a vector of AudioChannel objects where each is mapped with a +// ChannelId (int) type. ChannelId is the primary key to locate a specific +// AudioChannel object to operate requested VoIP API from the caller. +// +// This class receives required audio components from caller at construction and +// owns the life cycle of them to orchestrate the proper destruction sequence. +class VoipCore : public VoipEngine, + public VoipBase, + public VoipNetwork, + public VoipCodec, + public VoipDtmf, + public VoipStatistics, + public VoipVolumeControl { + public: + // Construct VoipCore with provided arguments. + VoipCore(rtc::scoped_refptr<AudioEncoderFactory> encoder_factory, + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory, + std::unique_ptr<TaskQueueFactory> task_queue_factory, + rtc::scoped_refptr<AudioDeviceModule> audio_device_module, + rtc::scoped_refptr<AudioProcessing> audio_processing); + ~VoipCore() override = default; + + // Implements VoipEngine interfaces. + VoipBase& Base() override { return *this; } + VoipNetwork& Network() override { return *this; } + VoipCodec& Codec() override { return *this; } + VoipDtmf& Dtmf() override { return *this; } + VoipStatistics& Statistics() override { return *this; } + VoipVolumeControl& VolumeControl() override { return *this; } + + // Implements VoipBase interfaces. + ChannelId CreateChannel(Transport* transport, + absl::optional<uint32_t> local_ssrc) override; + VoipResult ReleaseChannel(ChannelId channel_id) override; + VoipResult StartSend(ChannelId channel_id) override; + VoipResult StopSend(ChannelId channel_id) override; + VoipResult StartPlayout(ChannelId channel_id) override; + VoipResult StopPlayout(ChannelId channel_id) override; + + // Implements VoipNetwork interfaces. + VoipResult ReceivedRTPPacket( + ChannelId channel_id, + rtc::ArrayView<const uint8_t> rtp_packet) override; + VoipResult ReceivedRTCPPacket( + ChannelId channel_id, + rtc::ArrayView<const uint8_t> rtcp_packet) override; + + // Implements VoipCodec interfaces. + VoipResult SetSendCodec(ChannelId channel_id, + int payload_type, + const SdpAudioFormat& encoder_format) override; + VoipResult SetReceiveCodecs( + ChannelId channel_id, + const std::map<int, SdpAudioFormat>& decoder_specs) override; + + // Implements VoipDtmf interfaces. + VoipResult RegisterTelephoneEventType(ChannelId channel_id, + int rtp_payload_type, + int sample_rate_hz) override; + VoipResult SendDtmfEvent(ChannelId channel_id, + DtmfEvent dtmf_event, + int duration_ms) override; + + // Implements VoipStatistics interfaces. + VoipResult GetIngressStatistics(ChannelId channel_id, + IngressStatistics& ingress_stats) override; + VoipResult GetChannelStatistics(ChannelId channe_id, + ChannelStatistics& channel_stats) override; + + // Implements VoipVolumeControl interfaces. + VoipResult SetInputMuted(ChannelId channel_id, bool enable) override; + VoipResult GetInputVolumeInfo(ChannelId channel_id, + VolumeInfo& volume_info) override; + VoipResult GetOutputVolumeInfo(ChannelId channel_id, + VolumeInfo& volume_info) override; + + private: + // Initialize ADM and default audio device if needed. + // Returns true if ADM is successfully initialized or already in such state + // (e.g called more than once). Returns false when ADM fails to initialize + // which would presumably render further processing useless. Note that such + // failure won't necessarily succeed in next initialization attempt as it + // would mean changing the ADM implementation. From Android N and onwards, the + // mobile app may not be able to gain microphone access when in background + // mode. Therefore it would be better to delay the logic as late as possible. + bool InitializeIfNeeded(); + + // Fetches the corresponding AudioChannel assigned with given `channel`. + // Returns nullptr if not found. + rtc::scoped_refptr<AudioChannel> GetChannel(ChannelId channel_id); + + // Updates AudioTransportImpl with a new set of actively sending AudioSender + // (AudioEgress). This needs to be invoked whenever StartSend/StopSend is + // involved by caller. Returns false when the selected audio device fails to + // initialize where it can't expect to deliver any audio input sample. + bool UpdateAudioTransportWithSenders(); + + // Synchronization for these are handled internally. + rtc::scoped_refptr<AudioEncoderFactory> encoder_factory_; + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory_; + std::unique_ptr<TaskQueueFactory> task_queue_factory_; + + // Synchronization is handled internally by AudioProcessing. + // Must be placed before `audio_device_module_` for proper destruction. + rtc::scoped_refptr<AudioProcessing> audio_processing_; + + // Synchronization is handled internally by AudioMixer. + // Must be placed before `audio_device_module_` for proper destruction. + rtc::scoped_refptr<AudioMixer> audio_mixer_; + + // Synchronization is handled internally by AudioTransportImpl. + // Must be placed before `audio_device_module_` for proper destruction. + std::unique_ptr<AudioTransportImpl> audio_transport_; + + // Synchronization is handled internally by AudioDeviceModule. + rtc::scoped_refptr<AudioDeviceModule> audio_device_module_; + + Mutex lock_; + + // Member to track a next ChannelId for new AudioChannel. + int next_channel_id_ RTC_GUARDED_BY(lock_) = 0; + + // Container to track currently active AudioChannel objects mapped by + // ChannelId. + std::unordered_map<ChannelId, rtc::scoped_refptr<AudioChannel>> channels_ + RTC_GUARDED_BY(lock_); + + // Boolean flag to ensure initialization only occurs once. + bool initialized_ RTC_GUARDED_BY(lock_) = false; +}; + +} // namespace webrtc + +#endif // AUDIO_VOIP_VOIP_CORE_H_ |