/* * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "modules/audio_mixer/frame_combiner.h" #include #include #include #include #include #include #include "absl/types/optional.h" #include "api/array_view.h" #include "api/rtp_packet_info.h" #include "api/rtp_packet_infos.h" #include "api/units/timestamp.h" #include "audio/utility/audio_frame_operations.h" #include "modules/audio_mixer/gain_change_calculator.h" #include "modules/audio_mixer/sine_wave_generator.h" #include "rtc_base/checks.h" #include "rtc_base/strings/string_builder.h" #include "test/gmock.h" #include "test/gtest.h" namespace webrtc { namespace { using ::testing::ElementsAreArray; using ::testing::IsEmpty; using ::testing::UnorderedElementsAreArray; struct FrameCombinerConfig { bool use_limiter; int sample_rate_hz; int number_of_channels; float wave_frequency; }; std::string ProduceDebugText(int sample_rate_hz, int number_of_channels, int number_of_sources) { rtc::StringBuilder ss; ss << "Sample rate: " << sample_rate_hz << " ,"; ss << "number of channels: " << number_of_channels << " ,"; ss << "number of sources: " << number_of_sources; return ss.Release(); } std::string ProduceDebugText(const FrameCombinerConfig& config) { rtc::StringBuilder ss; ss << "Sample rate: " << config.sample_rate_hz << " ,"; ss << "number of channels: " << config.number_of_channels << " ,"; ss << "limiter active: " << (config.use_limiter ? "on" : "off") << " ,"; ss << "wave frequency: " << config.wave_frequency << " ,"; return ss.Release(); } AudioFrame frame1; AudioFrame frame2; void SetUpFrames(int sample_rate_hz, int number_of_channels) { RtpPacketInfo packet_info1(/*ssrc=*/1001, /*csrcs=*/{}, /*rtp_timestamp=*/1000, /*receive_time=*/Timestamp::Millis(1)); RtpPacketInfo packet_info2(/*ssrc=*/4004, /*csrcs=*/{}, /*rtp_timestamp=*/1234, /*receive_time=*/Timestamp::Millis(2)); RtpPacketInfo packet_info3(/*ssrc=*/7007, /*csrcs=*/{}, /*rtp_timestamp=*/1333, /*receive_time=*/Timestamp::Millis(2)); frame1.packet_infos_ = RtpPacketInfos({packet_info1}); frame2.packet_infos_ = RtpPacketInfos({packet_info2, packet_info3}); for (auto* frame : {&frame1, &frame2}) { frame->UpdateFrame(0, nullptr, rtc::CheckedDivExact(sample_rate_hz, 100), sample_rate_hz, AudioFrame::kNormalSpeech, AudioFrame::kVadActive, number_of_channels); } } } // namespace // The limiter requires sample rate divisible by 2000. TEST(FrameCombiner, BasicApiCallsLimiter) { FrameCombiner combiner(true); for (const int rate : {8000, 18000, 34000, 48000}) { for (const int number_of_channels : {1, 2, 4, 8}) { const std::vector all_frames = {&frame1, &frame2}; SetUpFrames(rate, number_of_channels); for (const int number_of_frames : {0, 1, 2}) { SCOPED_TRACE( ProduceDebugText(rate, number_of_channels, number_of_frames)); const std::vector frames_to_combine( all_frames.begin(), all_frames.begin() + number_of_frames); AudioFrame audio_frame_for_mixing; combiner.Combine(frames_to_combine, number_of_channels, rate, frames_to_combine.size(), &audio_frame_for_mixing); } } } } // The RtpPacketInfos field of the mixed packet should contain the union of the // RtpPacketInfos from the frames that were actually mixed. TEST(FrameCombiner, ContainsAllRtpPacketInfos) { static constexpr int kSampleRateHz = 48000; static constexpr int kNumChannels = 1; FrameCombiner combiner(true); const std::vector all_frames = {&frame1, &frame2}; SetUpFrames(kSampleRateHz, kNumChannels); for (const int number_of_frames : {0, 1, 2}) { SCOPED_TRACE( ProduceDebugText(kSampleRateHz, kNumChannels, number_of_frames)); const std::vector frames_to_combine( all_frames.begin(), all_frames.begin() + number_of_frames); std::vector packet_infos; for (const auto& frame : frames_to_combine) { packet_infos.insert(packet_infos.end(), frame->packet_infos_.begin(), frame->packet_infos_.end()); } AudioFrame audio_frame_for_mixing; combiner.Combine(frames_to_combine, kNumChannels, kSampleRateHz, frames_to_combine.size(), &audio_frame_for_mixing); EXPECT_THAT(audio_frame_for_mixing.packet_infos_, UnorderedElementsAreArray(packet_infos)); } } // There are DCHECKs in place to check for invalid parameters. TEST(FrameCombinerDeathTest, DebugBuildCrashesWithManyChannels) { FrameCombiner combiner(true); for (const int rate : {8000, 18000, 34000, 48000}) { for (const int number_of_channels : {10, 20, 21}) { if (static_cast(rate / 100 * number_of_channels) > AudioFrame::kMaxDataSizeSamples) { continue; } const std::vector all_frames = {&frame1, &frame2}; SetUpFrames(rate, number_of_channels); const int number_of_frames = 2; SCOPED_TRACE( ProduceDebugText(rate, number_of_channels, number_of_frames)); const std::vector frames_to_combine( all_frames.begin(), all_frames.begin() + number_of_frames); AudioFrame audio_frame_for_mixing; #if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) EXPECT_DEATH( combiner.Combine(frames_to_combine, number_of_channels, rate, frames_to_combine.size(), &audio_frame_for_mixing), ""); #elif !RTC_DCHECK_IS_ON combiner.Combine(frames_to_combine, number_of_channels, rate, frames_to_combine.size(), &audio_frame_for_mixing); #endif } } } TEST(FrameCombinerDeathTest, DebugBuildCrashesWithHighRate) { FrameCombiner combiner(true); for (const int rate : {50000, 96000, 128000, 196000}) { for (const int number_of_channels : {1, 2, 3}) { if (static_cast(rate / 100 * number_of_channels) > AudioFrame::kMaxDataSizeSamples) { continue; } const std::vector all_frames = {&frame1, &frame2}; SetUpFrames(rate, number_of_channels); const int number_of_frames = 2; SCOPED_TRACE( ProduceDebugText(rate, number_of_channels, number_of_frames)); const std::vector frames_to_combine( all_frames.begin(), all_frames.begin() + number_of_frames); AudioFrame audio_frame_for_mixing; #if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) EXPECT_DEATH( combiner.Combine(frames_to_combine, number_of_channels, rate, frames_to_combine.size(), &audio_frame_for_mixing), ""); #elif !RTC_DCHECK_IS_ON combiner.Combine(frames_to_combine, number_of_channels, rate, frames_to_combine.size(), &audio_frame_for_mixing); #endif } } } // With no limiter, the rate has to be divisible by 100 since we use // 10 ms frames. TEST(FrameCombiner, BasicApiCallsNoLimiter) { FrameCombiner combiner(false); for (const int rate : {8000, 10000, 11000, 32000, 44100}) { for (const int number_of_channels : {1, 2, 4, 8}) { const std::vector all_frames = {&frame1, &frame2}; SetUpFrames(rate, number_of_channels); for (const int number_of_frames : {0, 1, 2}) { SCOPED_TRACE( ProduceDebugText(rate, number_of_channels, number_of_frames)); const std::vector frames_to_combine( all_frames.begin(), all_frames.begin() + number_of_frames); AudioFrame audio_frame_for_mixing; combiner.Combine(frames_to_combine, number_of_channels, rate, frames_to_combine.size(), &audio_frame_for_mixing); } } } } TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) { FrameCombiner combiner(false); for (const int rate : {8000, 10000, 11000, 32000, 44100}) { for (const int number_of_channels : {1, 2}) { SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 0)); AudioFrame audio_frame_for_mixing; const std::vector frames_to_combine; combiner.Combine(frames_to_combine, number_of_channels, rate, frames_to_combine.size(), &audio_frame_for_mixing); const int16_t* audio_frame_for_mixing_data = audio_frame_for_mixing.data(); const std::vector mixed_data( audio_frame_for_mixing_data, audio_frame_for_mixing_data + number_of_channels * rate / 100); const std::vector expected(number_of_channels * rate / 100, 0); EXPECT_EQ(mixed_data, expected); EXPECT_THAT(audio_frame_for_mixing.packet_infos_, IsEmpty()); } } } TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) { FrameCombiner combiner(false); for (const int rate : {8000, 10000, 11000, 32000, 44100}) { for (const int number_of_channels : {1, 2, 4, 8, 10}) { SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1)); AudioFrame audio_frame_for_mixing; SetUpFrames(rate, number_of_channels); int16_t* frame1_data = frame1.mutable_data(); std::iota(frame1_data, frame1_data + number_of_channels * rate / 100, 0); const std::vector frames_to_combine = {&frame1}; combiner.Combine(frames_to_combine, number_of_channels, rate, frames_to_combine.size(), &audio_frame_for_mixing); const int16_t* audio_frame_for_mixing_data = audio_frame_for_mixing.data(); const std::vector mixed_data( audio_frame_for_mixing_data, audio_frame_for_mixing_data + number_of_channels * rate / 100); std::vector expected(number_of_channels * rate / 100); std::iota(expected.begin(), expected.end(), 0); EXPECT_EQ(mixed_data, expected); EXPECT_THAT(audio_frame_for_mixing.packet_infos_, ElementsAreArray(frame1.packet_infos_)); } } } // Send a sine wave through the FrameCombiner, and check that the // difference between input and output varies smoothly. Also check // that it is inside reasonable bounds. This is to catch issues like // chromium:695993 and chromium:816875. TEST(FrameCombiner, GainCurveIsSmoothForAlternatingNumberOfStreams) { // Rates are divisible by 2000 when limiter is active. std::vector configs = { {false, 30100, 2, 50.f}, {false, 16500, 1, 3200.f}, {true, 8000, 1, 3200.f}, {true, 16000, 1, 50.f}, {true, 18000, 8, 3200.f}, {true, 10000, 2, 50.f}, }; for (const auto& config : configs) { SCOPED_TRACE(ProduceDebugText(config)); FrameCombiner combiner(config.use_limiter); constexpr int16_t wave_amplitude = 30000; SineWaveGenerator wave_generator(config.wave_frequency, wave_amplitude); GainChangeCalculator change_calculator; float cumulative_change = 0.f; constexpr size_t iterations = 100; for (size_t i = 0; i < iterations; ++i) { SetUpFrames(config.sample_rate_hz, config.number_of_channels); wave_generator.GenerateNextFrame(&frame1); AudioFrameOperations::Mute(&frame2); std::vector frames_to_combine = {&frame1}; if (i % 2 == 0) { frames_to_combine.push_back(&frame2); } const size_t number_of_samples = frame1.samples_per_channel_ * config.number_of_channels; // Ensures limiter is on if 'use_limiter'. constexpr size_t number_of_streams = 2; AudioFrame audio_frame_for_mixing; combiner.Combine(frames_to_combine, config.number_of_channels, config.sample_rate_hz, number_of_streams, &audio_frame_for_mixing); cumulative_change += change_calculator.CalculateGainChange( rtc::ArrayView(frame1.data(), number_of_samples), rtc::ArrayView(audio_frame_for_mixing.data(), number_of_samples)); } // Check that the gain doesn't vary too much. EXPECT_LT(cumulative_change, 10); // Check that the latest gain is within reasonable bounds. It // should be slightly less that 1. EXPECT_LT(0.9f, change_calculator.LatestGain()); EXPECT_LT(change_calculator.LatestGain(), 1.01f); } } } // namespace webrtc