diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/libwebrtc/modules/audio_coding/acm2 | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/libwebrtc/modules/audio_coding/acm2')
17 files changed, 4105 insertions, 0 deletions
diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_receive_test.cc b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receive_test.cc new file mode 100644 index 0000000000..8bc76cd2af --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receive_test.cc @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/acm2/acm_receive_test.h" + +#include <stdio.h> + +#include <memory> + +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "modules/audio_coding/include/audio_coding_module.h" +#include "modules/audio_coding/neteq/tools/audio_sink.h" +#include "modules/audio_coding/neteq/tools/packet.h" +#include "modules/audio_coding/neteq/tools/packet_source.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { + +namespace { +AudioCodingModule::Config MakeAcmConfig( + Clock* clock, + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory) { + AudioCodingModule::Config config; + config.clock = clock; + config.decoder_factory = std::move(decoder_factory); + return config; +} +} // namespace + +AcmReceiveTestOldApi::AcmReceiveTestOldApi( + PacketSource* packet_source, + AudioSink* audio_sink, + int output_freq_hz, + NumOutputChannels exptected_output_channels, + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory) + : clock_(0), + acm_(webrtc::AudioCodingModule::Create( + MakeAcmConfig(&clock_, std::move(decoder_factory)))), + packet_source_(packet_source), + audio_sink_(audio_sink), + output_freq_hz_(output_freq_hz), + exptected_output_channels_(exptected_output_channels) {} + +AcmReceiveTestOldApi::~AcmReceiveTestOldApi() = default; + +void AcmReceiveTestOldApi::RegisterDefaultCodecs() { + acm_->SetReceiveCodecs({{103, {"ISAC", 16000, 1}}, + {104, {"ISAC", 32000, 1}}, + {107, {"L16", 8000, 1}}, + {108, {"L16", 16000, 1}}, + {109, {"L16", 32000, 1}}, + {111, {"L16", 8000, 2}}, + {112, {"L16", 16000, 2}}, + {113, {"L16", 32000, 2}}, + {0, {"PCMU", 8000, 1}}, + {110, {"PCMU", 8000, 2}}, + {8, {"PCMA", 8000, 1}}, + {118, {"PCMA", 8000, 2}}, + {102, {"ILBC", 8000, 1}}, + {9, {"G722", 8000, 1}}, + {119, {"G722", 8000, 2}}, + {120, {"OPUS", 48000, 2, {{"stereo", "1"}}}}, + {13, {"CN", 8000, 1}}, + {98, {"CN", 16000, 1}}, + {99, {"CN", 32000, 1}}}); +} + +// Remaps payload types from ACM's default to those used in the resource file +// neteq_universal_new.rtp. +void AcmReceiveTestOldApi::RegisterNetEqTestCodecs() { + acm_->SetReceiveCodecs({{103, {"ISAC", 16000, 1}}, + {104, {"ISAC", 32000, 1}}, + {93, {"L16", 8000, 1}}, + {94, {"L16", 16000, 1}}, + {95, {"L16", 32000, 1}}, + {0, {"PCMU", 8000, 1}}, + {8, {"PCMA", 8000, 1}}, + {102, {"ILBC", 8000, 1}}, + {9, {"G722", 8000, 1}}, + {120, {"OPUS", 48000, 2}}, + {13, {"CN", 8000, 1}}, + {98, {"CN", 16000, 1}}, + {99, {"CN", 32000, 1}}}); +} + +void AcmReceiveTestOldApi::Run() { + for (std::unique_ptr<Packet> packet(packet_source_->NextPacket()); packet; + packet = packet_source_->NextPacket()) { + // Pull audio until time to insert packet. + while (clock_.TimeInMilliseconds() < packet->time_ms()) { + AudioFrame output_frame; + bool muted; + EXPECT_EQ(0, + acm_->PlayoutData10Ms(output_freq_hz_, &output_frame, &muted)); + ASSERT_EQ(output_freq_hz_, output_frame.sample_rate_hz_); + ASSERT_FALSE(muted); + const size_t samples_per_block = + static_cast<size_t>(output_freq_hz_ * 10 / 1000); + EXPECT_EQ(samples_per_block, output_frame.samples_per_channel_); + if (exptected_output_channels_ != kArbitraryChannels) { + if (output_frame.speech_type_ == webrtc::AudioFrame::kPLC) { + // Don't check number of channels for PLC output, since each test run + // usually starts with a short period of mono PLC before decoding the + // first packet. + } else { + EXPECT_EQ(exptected_output_channels_, output_frame.num_channels_); + } + } + ASSERT_TRUE(audio_sink_->WriteAudioFrame(output_frame)); + clock_.AdvanceTimeMilliseconds(10); + AfterGetAudio(); + } + + EXPECT_EQ(0, acm_->IncomingPacket( + packet->payload(), + static_cast<int32_t>(packet->payload_length_bytes()), + packet->header())) + << "Failure when inserting packet:" << std::endl + << " PT = " << static_cast<int>(packet->header().payloadType) + << std::endl + << " TS = " << packet->header().timestamp << std::endl + << " SN = " << packet->header().sequenceNumber; + } +} + +AcmReceiveTestToggleOutputFreqOldApi::AcmReceiveTestToggleOutputFreqOldApi( + PacketSource* packet_source, + AudioSink* audio_sink, + int output_freq_hz_1, + int output_freq_hz_2, + int toggle_period_ms, + NumOutputChannels exptected_output_channels) + : AcmReceiveTestOldApi(packet_source, + audio_sink, + output_freq_hz_1, + exptected_output_channels, + CreateBuiltinAudioDecoderFactory()), + output_freq_hz_1_(output_freq_hz_1), + output_freq_hz_2_(output_freq_hz_2), + toggle_period_ms_(toggle_period_ms), + last_toggle_time_ms_(clock_.TimeInMilliseconds()) {} + +void AcmReceiveTestToggleOutputFreqOldApi::AfterGetAudio() { + if (clock_.TimeInMilliseconds() >= last_toggle_time_ms_ + toggle_period_ms_) { + output_freq_hz_ = (output_freq_hz_ == output_freq_hz_1_) + ? output_freq_hz_2_ + : output_freq_hz_1_; + last_toggle_time_ms_ = clock_.TimeInMilliseconds(); + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_receive_test.h b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receive_test.h new file mode 100644 index 0000000000..2095ef9025 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receive_test.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_ACM2_ACM_RECEIVE_TEST_H_ +#define MODULES_AUDIO_CODING_ACM2_ACM_RECEIVE_TEST_H_ + +#include <stddef.h> // for size_t + +#include <memory> +#include <string> + +#include "api/audio_codecs/audio_decoder_factory.h" +#include "api/scoped_refptr.h" +#include "system_wrappers/include/clock.h" + +namespace webrtc { +class AudioCodingModule; +class AudioDecoder; + +namespace test { +class AudioSink; +class PacketSource; + +class AcmReceiveTestOldApi { + public: + enum NumOutputChannels : size_t { + kArbitraryChannels = 0, + kMonoOutput = 1, + kStereoOutput = 2, + kQuadOutput = 4 + }; + + AcmReceiveTestOldApi(PacketSource* packet_source, + AudioSink* audio_sink, + int output_freq_hz, + NumOutputChannels exptected_output_channels, + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory); + virtual ~AcmReceiveTestOldApi(); + + AcmReceiveTestOldApi(const AcmReceiveTestOldApi&) = delete; + AcmReceiveTestOldApi& operator=(const AcmReceiveTestOldApi&) = delete; + + // Registers the codecs with default parameters from ACM. + void RegisterDefaultCodecs(); + + // Registers codecs with payload types matching the pre-encoded NetEq test + // files. + void RegisterNetEqTestCodecs(); + + // Runs the test and returns true if successful. + void Run(); + + AudioCodingModule* get_acm() { return acm_.get(); } + + protected: + // Method is called after each block of output audio is received from ACM. + virtual void AfterGetAudio() {} + + SimulatedClock clock_; + std::unique_ptr<AudioCodingModule> acm_; + PacketSource* packet_source_; + AudioSink* audio_sink_; + int output_freq_hz_; + NumOutputChannels exptected_output_channels_; +}; + +// This test toggles the output frequency every `toggle_period_ms`. The test +// starts with `output_freq_hz_1`. Except for the toggling, it does the same +// thing as AcmReceiveTestOldApi. +class AcmReceiveTestToggleOutputFreqOldApi : public AcmReceiveTestOldApi { + public: + AcmReceiveTestToggleOutputFreqOldApi( + PacketSource* packet_source, + AudioSink* audio_sink, + int output_freq_hz_1, + int output_freq_hz_2, + int toggle_period_ms, + NumOutputChannels exptected_output_channels); + + protected: + void AfterGetAudio() override; + + const int output_freq_hz_1_; + const int output_freq_hz_2_; + const int toggle_period_ms_; + int64_t last_toggle_time_ms_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_ACM2_ACM_RECEIVE_TEST_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver.cc b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver.cc new file mode 100644 index 0000000000..b078af1d2d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver.cc @@ -0,0 +1,352 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/acm2/acm_receiver.h" + +#include <stdlib.h> +#include <string.h> + +#include <cstdint> +#include <vector> + +#include "absl/strings/match.h" +#include "api/audio/audio_frame.h" +#include "api/audio_codecs/audio_decoder.h" +#include "api/neteq/neteq.h" +#include "modules/audio_coding/acm2/acm_resampler.h" +#include "modules/audio_coding/acm2/call_statistics.h" +#include "modules/audio_coding/neteq/default_neteq_factory.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/strings/audio_format_to_string.h" +#include "system_wrappers/include/clock.h" + +namespace webrtc { + +namespace acm2 { + +namespace { + +std::unique_ptr<NetEq> CreateNetEq( + NetEqFactory* neteq_factory, + const NetEq::Config& config, + Clock* clock, + const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory) { + if (neteq_factory) { + return neteq_factory->CreateNetEq(config, decoder_factory, clock); + } + return DefaultNetEqFactory().CreateNetEq(config, decoder_factory, clock); +} + +} // namespace + +AcmReceiver::AcmReceiver(const AudioCodingModule::Config& config) + : last_audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]), + neteq_(CreateNetEq(config.neteq_factory, + config.neteq_config, + config.clock, + config.decoder_factory)), + clock_(config.clock), + resampled_last_output_frame_(true) { + RTC_DCHECK(clock_); + memset(last_audio_buffer_.get(), 0, + sizeof(int16_t) * AudioFrame::kMaxDataSizeSamples); +} + +AcmReceiver::~AcmReceiver() = default; + +int AcmReceiver::SetMinimumDelay(int delay_ms) { + if (neteq_->SetMinimumDelay(delay_ms)) + return 0; + RTC_LOG(LS_ERROR) << "AcmReceiver::SetExtraDelay " << delay_ms; + return -1; +} + +int AcmReceiver::SetMaximumDelay(int delay_ms) { + if (neteq_->SetMaximumDelay(delay_ms)) + return 0; + RTC_LOG(LS_ERROR) << "AcmReceiver::SetExtraDelay " << delay_ms; + return -1; +} + +bool AcmReceiver::SetBaseMinimumDelayMs(int delay_ms) { + return neteq_->SetBaseMinimumDelayMs(delay_ms); +} + +int AcmReceiver::GetBaseMinimumDelayMs() const { + return neteq_->GetBaseMinimumDelayMs(); +} + +absl::optional<int> AcmReceiver::last_packet_sample_rate_hz() const { + MutexLock lock(&mutex_); + if (!last_decoder_) { + return absl::nullopt; + } + return last_decoder_->sample_rate_hz; +} + +int AcmReceiver::last_output_sample_rate_hz() const { + return neteq_->last_output_sample_rate_hz(); +} + +int AcmReceiver::InsertPacket(const RTPHeader& rtp_header, + rtc::ArrayView<const uint8_t> incoming_payload) { + if (incoming_payload.empty()) { + neteq_->InsertEmptyPacket(rtp_header); + return 0; + } + + int payload_type = rtp_header.payloadType; + auto format = neteq_->GetDecoderFormat(payload_type); + if (format && absl::EqualsIgnoreCase(format->sdp_format.name, "red")) { + // This is a RED packet. Get the format of the audio codec. + payload_type = incoming_payload[0] & 0x7f; + format = neteq_->GetDecoderFormat(payload_type); + } + if (!format) { + RTC_LOG_F(LS_ERROR) << "Payload-type " << payload_type + << " is not registered."; + return -1; + } + + { + MutexLock lock(&mutex_); + if (absl::EqualsIgnoreCase(format->sdp_format.name, "cn")) { + if (last_decoder_ && last_decoder_->num_channels > 1) { + // This is a CNG and the audio codec is not mono, so skip pushing in + // packets into NetEq. + return 0; + } + } else { + last_decoder_ = DecoderInfo{/*payload_type=*/payload_type, + /*sample_rate_hz=*/format->sample_rate_hz, + /*num_channels=*/format->num_channels, + /*sdp_format=*/std::move(format->sdp_format)}; + } + } // `mutex_` is released. + + if (neteq_->InsertPacket(rtp_header, incoming_payload) < 0) { + RTC_LOG(LS_ERROR) << "AcmReceiver::InsertPacket " + << static_cast<int>(rtp_header.payloadType) + << " Failed to insert packet"; + return -1; + } + return 0; +} + +int AcmReceiver::GetAudio(int desired_freq_hz, + AudioFrame* audio_frame, + bool* muted) { + RTC_DCHECK(muted); + + int current_sample_rate_hz = 0; + if (neteq_->GetAudio(audio_frame, muted, ¤t_sample_rate_hz) != + NetEq::kOK) { + RTC_LOG(LS_ERROR) << "AcmReceiver::GetAudio - NetEq Failed."; + return -1; + } + + RTC_DCHECK_NE(current_sample_rate_hz, 0); + + // Update if resampling is required. + const bool need_resampling = + (desired_freq_hz != -1) && (current_sample_rate_hz != desired_freq_hz); + + // Accessing members, take the lock. + MutexLock lock(&mutex_); + if (need_resampling && !resampled_last_output_frame_) { + // Prime the resampler with the last frame. + int16_t temp_output[AudioFrame::kMaxDataSizeSamples]; + int samples_per_channel_int = resampler_.Resample10Msec( + last_audio_buffer_.get(), current_sample_rate_hz, desired_freq_hz, + audio_frame->num_channels_, AudioFrame::kMaxDataSizeSamples, + temp_output); + if (samples_per_channel_int < 0) { + RTC_LOG(LS_ERROR) << "AcmReceiver::GetAudio - " + "Resampling last_audio_buffer_ failed."; + return -1; + } + } + + // TODO(bugs.webrtc.org/3923) Glitches in the output may appear if the output + // rate from NetEq changes. + if (need_resampling) { + // TODO(yujo): handle this more efficiently for muted frames. + int samples_per_channel_int = resampler_.Resample10Msec( + audio_frame->data(), current_sample_rate_hz, desired_freq_hz, + audio_frame->num_channels_, AudioFrame::kMaxDataSizeSamples, + audio_frame->mutable_data()); + if (samples_per_channel_int < 0) { + RTC_LOG(LS_ERROR) + << "AcmReceiver::GetAudio - Resampling audio_buffer_ failed."; + return -1; + } + audio_frame->samples_per_channel_ = + static_cast<size_t>(samples_per_channel_int); + audio_frame->sample_rate_hz_ = desired_freq_hz; + RTC_DCHECK_EQ( + audio_frame->sample_rate_hz_, + rtc::dchecked_cast<int>(audio_frame->samples_per_channel_ * 100)); + resampled_last_output_frame_ = true; + } else { + resampled_last_output_frame_ = false; + // We might end up here ONLY if codec is changed. + } + + // Store current audio in `last_audio_buffer_` for next time. + memcpy(last_audio_buffer_.get(), audio_frame->data(), + sizeof(int16_t) * audio_frame->samples_per_channel_ * + audio_frame->num_channels_); + + call_stats_.DecodedByNetEq(audio_frame->speech_type_, *muted); + return 0; +} + +void AcmReceiver::SetCodecs(const std::map<int, SdpAudioFormat>& codecs) { + neteq_->SetCodecs(codecs); +} + +void AcmReceiver::FlushBuffers() { + neteq_->FlushBuffers(); +} + +void AcmReceiver::RemoveAllCodecs() { + MutexLock lock(&mutex_); + neteq_->RemoveAllPayloadTypes(); + last_decoder_ = absl::nullopt; +} + +absl::optional<uint32_t> AcmReceiver::GetPlayoutTimestamp() { + return neteq_->GetPlayoutTimestamp(); +} + +int AcmReceiver::FilteredCurrentDelayMs() const { + return neteq_->FilteredCurrentDelayMs(); +} + +int AcmReceiver::TargetDelayMs() const { + return neteq_->TargetDelayMs(); +} + +absl::optional<std::pair<int, SdpAudioFormat>> AcmReceiver::LastDecoder() + const { + MutexLock lock(&mutex_); + if (!last_decoder_) { + return absl::nullopt; + } + RTC_DCHECK_NE(-1, last_decoder_->payload_type); + return std::make_pair(last_decoder_->payload_type, last_decoder_->sdp_format); +} + +void AcmReceiver::GetNetworkStatistics( + NetworkStatistics* acm_stat, + bool get_and_clear_legacy_stats /* = true */) const { + NetEqNetworkStatistics neteq_stat; + if (get_and_clear_legacy_stats) { + // NetEq function always returns zero, so we don't check the return value. + neteq_->NetworkStatistics(&neteq_stat); + + acm_stat->currentExpandRate = neteq_stat.expand_rate; + acm_stat->currentSpeechExpandRate = neteq_stat.speech_expand_rate; + acm_stat->currentPreemptiveRate = neteq_stat.preemptive_rate; + acm_stat->currentAccelerateRate = neteq_stat.accelerate_rate; + acm_stat->currentSecondaryDecodedRate = neteq_stat.secondary_decoded_rate; + acm_stat->currentSecondaryDiscardedRate = + neteq_stat.secondary_discarded_rate; + acm_stat->meanWaitingTimeMs = neteq_stat.mean_waiting_time_ms; + acm_stat->maxWaitingTimeMs = neteq_stat.max_waiting_time_ms; + } else { + neteq_stat = neteq_->CurrentNetworkStatistics(); + acm_stat->currentExpandRate = 0; + acm_stat->currentSpeechExpandRate = 0; + acm_stat->currentPreemptiveRate = 0; + acm_stat->currentAccelerateRate = 0; + acm_stat->currentSecondaryDecodedRate = 0; + acm_stat->currentSecondaryDiscardedRate = 0; + acm_stat->meanWaitingTimeMs = -1; + acm_stat->maxWaitingTimeMs = 1; + } + acm_stat->currentBufferSize = neteq_stat.current_buffer_size_ms; + acm_stat->preferredBufferSize = neteq_stat.preferred_buffer_size_ms; + acm_stat->jitterPeaksFound = neteq_stat.jitter_peaks_found ? true : false; + + NetEqLifetimeStatistics neteq_lifetime_stat = neteq_->GetLifetimeStatistics(); + acm_stat->totalSamplesReceived = neteq_lifetime_stat.total_samples_received; + acm_stat->concealedSamples = neteq_lifetime_stat.concealed_samples; + acm_stat->silentConcealedSamples = + neteq_lifetime_stat.silent_concealed_samples; + acm_stat->concealmentEvents = neteq_lifetime_stat.concealment_events; + acm_stat->jitterBufferDelayMs = neteq_lifetime_stat.jitter_buffer_delay_ms; + acm_stat->jitterBufferTargetDelayMs = + neteq_lifetime_stat.jitter_buffer_target_delay_ms; + acm_stat->jitterBufferMinimumDelayMs = + neteq_lifetime_stat.jitter_buffer_minimum_delay_ms; + acm_stat->jitterBufferEmittedCount = + neteq_lifetime_stat.jitter_buffer_emitted_count; + acm_stat->delayedPacketOutageSamples = + neteq_lifetime_stat.delayed_packet_outage_samples; + acm_stat->relativePacketArrivalDelayMs = + neteq_lifetime_stat.relative_packet_arrival_delay_ms; + acm_stat->interruptionCount = neteq_lifetime_stat.interruption_count; + acm_stat->totalInterruptionDurationMs = + neteq_lifetime_stat.total_interruption_duration_ms; + acm_stat->insertedSamplesForDeceleration = + neteq_lifetime_stat.inserted_samples_for_deceleration; + acm_stat->removedSamplesForAcceleration = + neteq_lifetime_stat.removed_samples_for_acceleration; + acm_stat->fecPacketsReceived = neteq_lifetime_stat.fec_packets_received; + acm_stat->fecPacketsDiscarded = neteq_lifetime_stat.fec_packets_discarded; + acm_stat->packetsDiscarded = neteq_lifetime_stat.packets_discarded; + + NetEqOperationsAndState neteq_operations_and_state = + neteq_->GetOperationsAndState(); + acm_stat->packetBufferFlushes = + neteq_operations_and_state.packet_buffer_flushes; +} + +int AcmReceiver::EnableNack(size_t max_nack_list_size) { + neteq_->EnableNack(max_nack_list_size); + return 0; +} + +void AcmReceiver::DisableNack() { + neteq_->DisableNack(); +} + +std::vector<uint16_t> AcmReceiver::GetNackList( + int64_t round_trip_time_ms) const { + return neteq_->GetNackList(round_trip_time_ms); +} + +void AcmReceiver::ResetInitialDelay() { + neteq_->SetMinimumDelay(0); + // TODO(turajs): Should NetEq Buffer be flushed? +} + +uint32_t AcmReceiver::NowInTimestamp(int decoder_sampling_rate) const { + // Down-cast the time to (32-6)-bit since we only care about + // the least significant bits. (32-6) bits cover 2^(32-6) = 67108864 ms. + // We masked 6 most significant bits of 32-bit so there is no overflow in + // the conversion from milliseconds to timestamp. + const uint32_t now_in_ms = + static_cast<uint32_t>(clock_->TimeInMilliseconds() & 0x03ffffff); + return static_cast<uint32_t>((decoder_sampling_rate / 1000) * now_in_ms); +} + +void AcmReceiver::GetDecodingCallStatistics( + AudioDecodingCallStats* stats) const { + MutexLock lock(&mutex_); + *stats = call_stats_.GetDecodingStatistics(); +} + +} // namespace acm2 + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver.h b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver.h new file mode 100644 index 0000000000..a61247627f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver.h @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_ACM2_ACM_RECEIVER_H_ +#define MODULES_AUDIO_CODING_ACM2_ACM_RECEIVER_H_ + +#include <stdint.h> + +#include <map> +#include <memory> +#include <string> +#include <utility> +#include <vector> +#include <atomic> + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio_codecs/audio_decoder.h" +#include "api/audio_codecs/audio_format.h" +#include "modules/audio_coding/acm2/acm_resampler.h" +#include "modules/audio_coding/acm2/call_statistics.h" +#include "modules/audio_coding/include/audio_coding_module.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/thread_annotations.h" + +namespace webrtc { + +class Clock; +class NetEq; +struct RTPHeader; + +namespace acm2 { + +class AcmReceiver { + public: + // Constructor of the class + explicit AcmReceiver(const AudioCodingModule::Config& config); + + // Destructor of the class. + ~AcmReceiver(); + + // + // Inserts a payload with its associated RTP-header into NetEq. + // + // Input: + // - rtp_header : RTP header for the incoming payload containing + // information about payload type, sequence number, + // timestamp, SSRC and marker bit. + // - incoming_payload : Incoming audio payload. + // - length_payload : Length of incoming audio payload in bytes. + // + // Return value : 0 if OK. + // <0 if NetEq returned an error. + // + int InsertPacket(const RTPHeader& rtp_header, + rtc::ArrayView<const uint8_t> incoming_payload); + + // + // Asks NetEq for 10 milliseconds of decoded audio. + // + // Input: + // -desired_freq_hz : specifies the sampling rate [Hz] of the output + // audio. If set -1 indicates to resampling is + // is required and the audio returned at the + // sampling rate of the decoder. + // + // Output: + // -audio_frame : an audio frame were output data and + // associated parameters are written to. + // -muted : if true, the sample data in audio_frame is not + // populated, and must be interpreted as all zero. + // + // Return value : 0 if OK. + // -1 if NetEq returned an error. + // + int GetAudio(int desired_freq_hz, AudioFrame* audio_frame, bool* muted); + + // Replace the current set of decoders with the specified set. + void SetCodecs(const std::map<int, SdpAudioFormat>& codecs); + + // + // Sets a minimum delay for packet buffer. The given delay is maintained, + // unless channel condition dictates a higher delay. + // + // Input: + // - delay_ms : minimum delay in milliseconds. + // + // Return value : 0 if OK. + // <0 if NetEq returned an error. + // + int SetMinimumDelay(int delay_ms); + + // + // Sets a maximum delay [ms] for the packet buffer. The target delay does not + // exceed the given value, even if channel condition requires so. + // + // Input: + // - delay_ms : maximum delay in milliseconds. + // + // Return value : 0 if OK. + // <0 if NetEq returned an error. + // + int SetMaximumDelay(int delay_ms); + + // Sets a base minimum delay in milliseconds for the packet buffer. + // Base minimum delay sets lower bound minimum delay value which + // is set via SetMinimumDelay. + // + // Returns true if value was successfully set, false overwise. + bool SetBaseMinimumDelayMs(int delay_ms); + + // Returns current value of base minimum delay in milliseconds. + int GetBaseMinimumDelayMs() const; + + // + // Resets the initial delay to zero. + // + void ResetInitialDelay(); + + // Returns the sample rate of the decoder associated with the last incoming + // packet. If no packet of a registered non-CNG codec has been received, the + // return value is empty. Also, if the decoder was unregistered since the last + // packet was inserted, the return value is empty. + absl::optional<int> last_packet_sample_rate_hz() const; + + // Returns last_output_sample_rate_hz from the NetEq instance. + int last_output_sample_rate_hz() const; + + // + // Get the current network statistics from NetEq. + // + // Output: + // - statistics : The current network statistics. + // + void GetNetworkStatistics(NetworkStatistics* statistics, + bool get_and_clear_legacy_stats = true) const; + + // + // Flushes the NetEq packet and speech buffers. + // + void FlushBuffers(); + + // + // Remove all registered codecs. + // + void RemoveAllCodecs(); + + // Returns the RTP timestamp for the last sample delivered by GetAudio(). + // The return value will be empty if no valid timestamp is available. + absl::optional<uint32_t> GetPlayoutTimestamp(); + + // Returns the current total delay from NetEq (packet buffer and sync buffer) + // in ms, with smoothing applied to even out short-time fluctuations due to + // jitter. The packet buffer part of the delay is not updated during DTX/CNG + // periods. + // + int FilteredCurrentDelayMs() const; + + // Returns the current target delay for NetEq in ms. + // + int TargetDelayMs() const; + + // + // Get payload type and format of the last non-CNG/non-DTMF received payload. + // If no non-CNG/non-DTMF packet is received absl::nullopt is returned. + // + absl::optional<std::pair<int, SdpAudioFormat>> LastDecoder() const; + + // + // Enable NACK and set the maximum size of the NACK list. If NACK is already + // enabled then the maximum NACK list size is modified accordingly. + // + // If the sequence number of last received packet is N, the sequence numbers + // of NACK list are in the range of [N - `max_nack_list_size`, N). + // + // `max_nack_list_size` should be positive (none zero) and less than or + // equal to `Nack::kNackListSizeLimit`. Otherwise, No change is applied and -1 + // is returned. 0 is returned at success. + // + int EnableNack(size_t max_nack_list_size); + + // Disable NACK. + void DisableNack(); + + // + // Get a list of packets to be retransmitted. `round_trip_time_ms` is an + // estimate of the round-trip-time (in milliseconds). Missing packets which + // will be playout in a shorter time than the round-trip-time (with respect + // to the time this API is called) will not be included in the list. + // + // Negative `round_trip_time_ms` results is an error message and empty list + // is returned. + // + std::vector<uint16_t> GetNackList(int64_t round_trip_time_ms) const; + + // + // Get statistics of calls to GetAudio(). + void GetDecodingCallStatistics(AudioDecodingCallStats* stats) const; + + private: + struct DecoderInfo { + int payload_type; + int sample_rate_hz; + int num_channels; + SdpAudioFormat sdp_format; + }; + + uint32_t NowInTimestamp(int decoder_sampling_rate) const; + + mutable Mutex mutex_; + absl::optional<DecoderInfo> last_decoder_ RTC_GUARDED_BY(mutex_); + ACMResampler resampler_; + + // After construction, this is only ever touched on the thread that calls + // AcmReceiver::GetAudio, and only modified in this method. + std::unique_ptr<int16_t[]> last_audio_buffer_; + CallStatistics call_stats_; + const std::unique_ptr<NetEq> neteq_; // NetEq is thread-safe; no lock needed. + Clock* const clock_; + std::atomic<bool> resampled_last_output_frame_; +}; + +} // namespace acm2 + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_ACM2_ACM_RECEIVER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc new file mode 100644 index 0000000000..6dd44b696e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc @@ -0,0 +1,455 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/acm2/acm_receiver.h" + +#include <algorithm> // std::min +#include <memory> + +#include "absl/types/optional.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/audio_codecs/builtin_audio_encoder_factory.h" +#include "modules/audio_coding/codecs/cng/audio_encoder_cng.h" +#include "modules/audio_coding/include/audio_coding_module.h" +#include "modules/audio_coding/neteq/tools/rtp_generator.h" +#include "modules/include/module_common_types.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "system_wrappers/include/clock.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +namespace acm2 { + +class AcmReceiverTestOldApi : public AudioPacketizationCallback, + public ::testing::Test { + protected: + AcmReceiverTestOldApi() + : timestamp_(0), + packet_sent_(false), + last_packet_send_timestamp_(timestamp_), + last_frame_type_(AudioFrameType::kEmptyFrame) { + config_.decoder_factory = decoder_factory_; + } + + ~AcmReceiverTestOldApi() {} + + void SetUp() override { + acm_.reset(AudioCodingModule::Create(config_)); + receiver_.reset(new AcmReceiver(config_)); + ASSERT_TRUE(receiver_.get() != NULL); + ASSERT_TRUE(acm_.get() != NULL); + acm_->InitializeReceiver(); + acm_->RegisterTransportCallback(this); + + rtp_header_.sequenceNumber = 0; + rtp_header_.timestamp = 0; + rtp_header_.markerBit = false; + rtp_header_.ssrc = 0x12345678; // Arbitrary. + rtp_header_.numCSRCs = 0; + rtp_header_.payloadType = 0; + } + + void TearDown() override {} + + AudioCodecInfo SetEncoder(int payload_type, + const SdpAudioFormat& format, + const std::map<int, int> cng_payload_types = {}) { + // Create the speech encoder. + absl::optional<AudioCodecInfo> info = + encoder_factory_->QueryAudioEncoder(format); + RTC_CHECK(info.has_value()); + std::unique_ptr<AudioEncoder> enc = + encoder_factory_->MakeAudioEncoder(payload_type, format, absl::nullopt); + + // If we have a compatible CN specification, stack a CNG on top. + auto it = cng_payload_types.find(info->sample_rate_hz); + if (it != cng_payload_types.end()) { + AudioEncoderCngConfig config; + config.speech_encoder = std::move(enc); + config.num_channels = 1; + config.payload_type = it->second; + config.vad_mode = Vad::kVadNormal; + enc = CreateComfortNoiseEncoder(std::move(config)); + } + + // Actually start using the new encoder. + acm_->SetEncoder(std::move(enc)); + return *info; + } + + int InsertOnePacketOfSilence(const AudioCodecInfo& info) { + // Frame setup according to the codec. + AudioFrame frame; + frame.sample_rate_hz_ = info.sample_rate_hz; + frame.samples_per_channel_ = info.sample_rate_hz / 100; // 10 ms. + frame.num_channels_ = info.num_channels; + frame.Mute(); + packet_sent_ = false; + last_packet_send_timestamp_ = timestamp_; + int num_10ms_frames = 0; + while (!packet_sent_) { + frame.timestamp_ = timestamp_; + timestamp_ += rtc::checked_cast<uint32_t>(frame.samples_per_channel_); + EXPECT_GE(acm_->Add10MsData(frame), 0); + ++num_10ms_frames; + } + return num_10ms_frames; + } + + int SendData(AudioFrameType frame_type, + uint8_t payload_type, + uint32_t timestamp, + const uint8_t* payload_data, + size_t payload_len_bytes, + int64_t absolute_capture_timestamp_ms) override { + if (frame_type == AudioFrameType::kEmptyFrame) + return 0; + + rtp_header_.payloadType = payload_type; + rtp_header_.timestamp = timestamp; + + int ret_val = receiver_->InsertPacket( + rtp_header_, + rtc::ArrayView<const uint8_t>(payload_data, payload_len_bytes)); + if (ret_val < 0) { + RTC_DCHECK_NOTREACHED(); + return -1; + } + rtp_header_.sequenceNumber++; + packet_sent_ = true; + last_frame_type_ = frame_type; + return 0; + } + + const rtc::scoped_refptr<AudioEncoderFactory> encoder_factory_ = + CreateBuiltinAudioEncoderFactory(); + const rtc::scoped_refptr<AudioDecoderFactory> decoder_factory_ = + CreateBuiltinAudioDecoderFactory(); + AudioCodingModule::Config config_; + std::unique_ptr<AcmReceiver> receiver_; + std::unique_ptr<AudioCodingModule> acm_; + RTPHeader rtp_header_; + uint32_t timestamp_; + bool packet_sent_; // Set when SendData is called reset when inserting audio. + uint32_t last_packet_send_timestamp_; + AudioFrameType last_frame_type_; +}; + +#if defined(WEBRTC_ANDROID) +#define MAYBE_SampleRate DISABLED_SampleRate +#else +#define MAYBE_SampleRate SampleRate +#endif +TEST_F(AcmReceiverTestOldApi, MAYBE_SampleRate) { + const std::map<int, SdpAudioFormat> codecs = {{0, {"OPUS", 48000, 2}}}; + receiver_->SetCodecs(codecs); + + constexpr int kOutSampleRateHz = 8000; // Different than codec sample rate. + for (size_t i = 0; i < codecs.size(); ++i) { + const int payload_type = rtc::checked_cast<int>(i); + const int num_10ms_frames = + InsertOnePacketOfSilence(SetEncoder(payload_type, codecs.at(i))); + for (int k = 0; k < num_10ms_frames; ++k) { + AudioFrame frame; + bool muted; + EXPECT_EQ(0, receiver_->GetAudio(kOutSampleRateHz, &frame, &muted)); + } + EXPECT_EQ(encoder_factory_->QueryAudioEncoder(codecs.at(i))->sample_rate_hz, + receiver_->last_output_sample_rate_hz()); + } +} + +class AcmReceiverTestFaxModeOldApi : public AcmReceiverTestOldApi { + protected: + AcmReceiverTestFaxModeOldApi() { + config_.neteq_config.for_test_no_time_stretching = true; + } + + void RunVerifyAudioFrame(const SdpAudioFormat& codec) { + // Make sure "fax mode" is enabled. This will avoid delay changes unless the + // packet-loss concealment is made. We do this in order to make the + // timestamp increments predictable; in normal mode, NetEq may decide to do + // accelerate or pre-emptive expand operations after some time, offsetting + // the timestamp. + EXPECT_TRUE(config_.neteq_config.for_test_no_time_stretching); + + constexpr int payload_type = 17; + receiver_->SetCodecs({{payload_type, codec}}); + + const AudioCodecInfo info = SetEncoder(payload_type, codec); + const int output_sample_rate_hz = info.sample_rate_hz; + const size_t output_channels = info.num_channels; + const size_t samples_per_ms = rtc::checked_cast<size_t>( + rtc::CheckedDivExact(output_sample_rate_hz, 1000)); + const AudioFrame::VADActivity expected_vad_activity = + output_sample_rate_hz > 16000 ? AudioFrame::kVadActive + : AudioFrame::kVadPassive; + + // Expect the first output timestamp to be 5*fs/8000 samples before the + // first inserted timestamp (because of NetEq's look-ahead). (This value is + // defined in Expand::overlap_length_.) + uint32_t expected_output_ts = + last_packet_send_timestamp_ - + rtc::CheckedDivExact(5 * output_sample_rate_hz, 8000); + + AudioFrame frame; + bool muted; + EXPECT_EQ(0, receiver_->GetAudio(output_sample_rate_hz, &frame, &muted)); + // Expect timestamp = 0 before first packet is inserted. + EXPECT_EQ(0u, frame.timestamp_); + for (int i = 0; i < 5; ++i) { + const int num_10ms_frames = InsertOnePacketOfSilence(info); + for (int k = 0; k < num_10ms_frames; ++k) { + EXPECT_EQ(0, + receiver_->GetAudio(output_sample_rate_hz, &frame, &muted)); + EXPECT_EQ(expected_output_ts, frame.timestamp_); + expected_output_ts += rtc::checked_cast<uint32_t>(10 * samples_per_ms); + EXPECT_EQ(10 * samples_per_ms, frame.samples_per_channel_); + EXPECT_EQ(output_sample_rate_hz, frame.sample_rate_hz_); + EXPECT_EQ(output_channels, frame.num_channels_); + EXPECT_EQ(AudioFrame::kNormalSpeech, frame.speech_type_); + EXPECT_EQ(expected_vad_activity, frame.vad_activity_); + EXPECT_FALSE(muted); + } + } + } +}; + +#if defined(WEBRTC_ANDROID) +#define MAYBE_VerifyAudioFramePCMU DISABLED_VerifyAudioFramePCMU +#else +#define MAYBE_VerifyAudioFramePCMU VerifyAudioFramePCMU +#endif +TEST_F(AcmReceiverTestFaxModeOldApi, MAYBE_VerifyAudioFramePCMU) { + RunVerifyAudioFrame({"PCMU", 8000, 1}); +} + +#if defined(WEBRTC_ANDROID) +#define MAYBE_VerifyAudioFrameOpus DISABLED_VerifyAudioFrameOpus +#else +#define MAYBE_VerifyAudioFrameOpus VerifyAudioFrameOpus +#endif +TEST_F(AcmReceiverTestFaxModeOldApi, MAYBE_VerifyAudioFrameOpus) { + RunVerifyAudioFrame({"opus", 48000, 2}); +} + +#if defined(WEBRTC_ANDROID) +#define MAYBE_PostdecodingVad DISABLED_PostdecodingVad +#else +#define MAYBE_PostdecodingVad PostdecodingVad +#endif +TEST_F(AcmReceiverTestOldApi, MAYBE_PostdecodingVad) { + EXPECT_TRUE(config_.neteq_config.enable_post_decode_vad); + constexpr int payload_type = 34; + const SdpAudioFormat codec = {"L16", 16000, 1}; + const AudioCodecInfo info = SetEncoder(payload_type, codec); + receiver_->SetCodecs({{payload_type, codec}}); + constexpr int kNumPackets = 5; + AudioFrame frame; + for (int n = 0; n < kNumPackets; ++n) { + const int num_10ms_frames = InsertOnePacketOfSilence(info); + for (int k = 0; k < num_10ms_frames; ++k) { + bool muted; + ASSERT_EQ(0, receiver_->GetAudio(info.sample_rate_hz, &frame, &muted)); + } + } + EXPECT_EQ(AudioFrame::kVadPassive, frame.vad_activity_); +} + +class AcmReceiverTestPostDecodeVadPassiveOldApi : public AcmReceiverTestOldApi { + protected: + AcmReceiverTestPostDecodeVadPassiveOldApi() { + config_.neteq_config.enable_post_decode_vad = false; + } +}; + +#if defined(WEBRTC_ANDROID) +#define MAYBE_PostdecodingVad DISABLED_PostdecodingVad +#else +#define MAYBE_PostdecodingVad PostdecodingVad +#endif +TEST_F(AcmReceiverTestPostDecodeVadPassiveOldApi, MAYBE_PostdecodingVad) { + EXPECT_FALSE(config_.neteq_config.enable_post_decode_vad); + constexpr int payload_type = 34; + const SdpAudioFormat codec = {"L16", 16000, 1}; + const AudioCodecInfo info = SetEncoder(payload_type, codec); + auto const value = encoder_factory_->QueryAudioEncoder(codec); + ASSERT_TRUE(value.has_value()); + receiver_->SetCodecs({{payload_type, codec}}); + const int kNumPackets = 5; + AudioFrame frame; + for (int n = 0; n < kNumPackets; ++n) { + const int num_10ms_frames = InsertOnePacketOfSilence(info); + for (int k = 0; k < num_10ms_frames; ++k) { + bool muted; + ASSERT_EQ(0, receiver_->GetAudio(info.sample_rate_hz, &frame, &muted)); + } + } + EXPECT_EQ(AudioFrame::kVadUnknown, frame.vad_activity_); +} + +#if defined(WEBRTC_ANDROID) +#define MAYBE_LastAudioCodec DISABLED_LastAudioCodec +#else +#define MAYBE_LastAudioCodec LastAudioCodec +#endif +#if defined(WEBRTC_CODEC_OPUS) +TEST_F(AcmReceiverTestOldApi, MAYBE_LastAudioCodec) { + const std::map<int, SdpAudioFormat> codecs = { + {0, {"PCMU", 8000, 1}}, {1, {"PCMA", 8000, 1}}, {2, {"L16", 32000, 1}}}; + const std::map<int, int> cng_payload_types = { + {8000, 100}, {16000, 101}, {32000, 102}}; + { + std::map<int, SdpAudioFormat> receive_codecs = codecs; + for (const auto& cng_type : cng_payload_types) { + receive_codecs.emplace(std::make_pair( + cng_type.second, SdpAudioFormat("CN", cng_type.first, 1))); + } + receiver_->SetCodecs(receive_codecs); + } + + // No audio payload is received. + EXPECT_EQ(absl::nullopt, receiver_->LastDecoder()); + + // Start with sending DTX. + packet_sent_ = false; + InsertOnePacketOfSilence( + SetEncoder(0, codecs.at(0), cng_payload_types)); // Enough to test + // with one codec. + ASSERT_TRUE(packet_sent_); + EXPECT_EQ(AudioFrameType::kAudioFrameCN, last_frame_type_); + + // Has received, only, DTX. Last Audio codec is undefined. + EXPECT_EQ(absl::nullopt, receiver_->LastDecoder()); + EXPECT_EQ(absl::nullopt, receiver_->last_packet_sample_rate_hz()); + + for (size_t i = 0; i < codecs.size(); ++i) { + // Set DTX off to send audio payload. + packet_sent_ = false; + const int payload_type = rtc::checked_cast<int>(i); + const AudioCodecInfo info_without_cng = + SetEncoder(payload_type, codecs.at(i)); + InsertOnePacketOfSilence(info_without_cng); + + // Sanity check if Actually an audio payload received, and it should be + // of type "speech." + ASSERT_TRUE(packet_sent_); + ASSERT_EQ(AudioFrameType::kAudioFrameSpeech, last_frame_type_); + EXPECT_EQ(info_without_cng.sample_rate_hz, + receiver_->last_packet_sample_rate_hz()); + + // Set VAD on to send DTX. Then check if the "Last Audio codec" returns + // the expected codec. Encode repeatedly until a DTX is sent. + const AudioCodecInfo info_with_cng = + SetEncoder(payload_type, codecs.at(i), cng_payload_types); + while (last_frame_type_ != AudioFrameType::kAudioFrameCN) { + packet_sent_ = false; + InsertOnePacketOfSilence(info_with_cng); + ASSERT_TRUE(packet_sent_); + } + EXPECT_EQ(info_with_cng.sample_rate_hz, + receiver_->last_packet_sample_rate_hz()); + EXPECT_EQ(codecs.at(i), receiver_->LastDecoder()->second); + } +} +#endif + +// Check if the statistics are initialized correctly. Before any call to ACM +// all fields have to be zero. +#if defined(WEBRTC_ANDROID) +#define MAYBE_InitializedToZero DISABLED_InitializedToZero +#else +#define MAYBE_InitializedToZero InitializedToZero +#endif +TEST_F(AcmReceiverTestOldApi, MAYBE_InitializedToZero) { + AudioDecodingCallStats stats; + receiver_->GetDecodingCallStatistics(&stats); + EXPECT_EQ(0, stats.calls_to_neteq); + EXPECT_EQ(0, stats.calls_to_silence_generator); + EXPECT_EQ(0, stats.decoded_normal); + EXPECT_EQ(0, stats.decoded_cng); + EXPECT_EQ(0, stats.decoded_neteq_plc); + EXPECT_EQ(0, stats.decoded_plc_cng); + EXPECT_EQ(0, stats.decoded_muted_output); +} + +// Insert some packets and pull audio. Check statistics are valid. Then, +// simulate packet loss and check if PLC and PLC-to-CNG statistics are +// correctly updated. +#if defined(WEBRTC_ANDROID) +#define MAYBE_NetEqCalls DISABLED_NetEqCalls +#else +#define MAYBE_NetEqCalls NetEqCalls +#endif +TEST_F(AcmReceiverTestOldApi, MAYBE_NetEqCalls) { + AudioDecodingCallStats stats; + const int kNumNormalCalls = 10; + const int kSampleRateHz = 16000; + const int kNumSamples10ms = kSampleRateHz / 100; + const int kFrameSizeMs = 10; // Multiple of 10. + const int kFrameSizeSamples = kFrameSizeMs / 10 * kNumSamples10ms; + const int kPayloadSizeBytes = kFrameSizeSamples * sizeof(int16_t); + const uint8_t kPayloadType = 111; + RTPHeader rtp_header; + AudioFrame audio_frame; + bool muted; + + receiver_->SetCodecs( + {{kPayloadType, SdpAudioFormat("L16", kSampleRateHz, 1)}}); + rtp_header.sequenceNumber = 0xABCD; + rtp_header.timestamp = 0xABCDEF01; + rtp_header.payloadType = kPayloadType; + rtp_header.markerBit = false; + rtp_header.ssrc = 0x1234; + rtp_header.numCSRCs = 0; + rtp_header.payload_type_frequency = kSampleRateHz; + + for (int num_calls = 0; num_calls < kNumNormalCalls; ++num_calls) { + const uint8_t kPayload[kPayloadSizeBytes] = {0}; + ASSERT_EQ(0, receiver_->InsertPacket(rtp_header, kPayload)); + ++rtp_header.sequenceNumber; + rtp_header.timestamp += kFrameSizeSamples; + ASSERT_EQ(0, receiver_->GetAudio(-1, &audio_frame, &muted)); + EXPECT_FALSE(muted); + } + receiver_->GetDecodingCallStatistics(&stats); + EXPECT_EQ(kNumNormalCalls, stats.calls_to_neteq); + EXPECT_EQ(0, stats.calls_to_silence_generator); + EXPECT_EQ(kNumNormalCalls, stats.decoded_normal); + EXPECT_EQ(0, stats.decoded_cng); + EXPECT_EQ(0, stats.decoded_neteq_plc); + EXPECT_EQ(0, stats.decoded_plc_cng); + EXPECT_EQ(0, stats.decoded_muted_output); + + const int kNumPlc = 3; + const int kNumPlcCng = 5; + + // Simulate packet-loss. NetEq first performs PLC then PLC fades to CNG. + for (int n = 0; n < kNumPlc + kNumPlcCng; ++n) { + ASSERT_EQ(0, receiver_->GetAudio(-1, &audio_frame, &muted)); + EXPECT_FALSE(muted); + } + receiver_->GetDecodingCallStatistics(&stats); + EXPECT_EQ(kNumNormalCalls + kNumPlc + kNumPlcCng, stats.calls_to_neteq); + EXPECT_EQ(0, stats.calls_to_silence_generator); + EXPECT_EQ(kNumNormalCalls, stats.decoded_normal); + EXPECT_EQ(0, stats.decoded_cng); + EXPECT_EQ(kNumPlc, stats.decoded_neteq_plc); + EXPECT_EQ(kNumPlcCng, stats.decoded_plc_cng); + EXPECT_EQ(0, stats.decoded_muted_output); + // TODO(henrik.lundin) Add a test with muted state enabled. +} + +} // namespace acm2 + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_remixing.cc b/third_party/libwebrtc/modules/audio_coding/acm2/acm_remixing.cc new file mode 100644 index 0000000000..13709dbbee --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_remixing.cc @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/acm2/acm_remixing.h" + +#include "rtc_base/checks.h" + +namespace webrtc { + +void DownMixFrame(const AudioFrame& input, rtc::ArrayView<int16_t> output) { + RTC_DCHECK_EQ(input.num_channels_, 2); + RTC_DCHECK_EQ(output.size(), input.samples_per_channel_); + + if (input.muted()) { + std::fill(output.begin(), output.begin() + input.samples_per_channel_, 0); + } else { + const int16_t* const input_data = input.data(); + for (size_t n = 0; n < input.samples_per_channel_; ++n) { + output[n] = rtc::dchecked_cast<int16_t>( + (int32_t{input_data[2 * n]} + int32_t{input_data[2 * n + 1]}) >> 1); + } + } +} + +void ReMixFrame(const AudioFrame& input, + size_t num_output_channels, + std::vector<int16_t>* output) { + const size_t output_size = num_output_channels * input.samples_per_channel_; + RTC_DCHECK(!(input.num_channels_ == 0 && num_output_channels > 0 && + input.samples_per_channel_ > 0)); + + if (output->size() != output_size) { + output->resize(output_size); + } + + // For muted frames, fill the frame with zeros. + if (input.muted()) { + std::fill(output->begin(), output->end(), 0); + return; + } + + // Ensure that the special case of zero input channels is handled correctly + // (zero samples per channel is already handled correctly in the code below). + if (input.num_channels_ == 0) { + return; + } + + const int16_t* const input_data = input.data(); + size_t out_index = 0; + + // When upmixing is needed and the input is mono copy the left channel + // into the left and right channels, and set any remaining channels to zero. + if (input.num_channels_ == 1 && input.num_channels_ < num_output_channels) { + for (size_t k = 0; k < input.samples_per_channel_; ++k) { + (*output)[out_index++] = input_data[k]; + (*output)[out_index++] = input_data[k]; + for (size_t j = 2; j < num_output_channels; ++j) { + (*output)[out_index++] = 0; + } + RTC_DCHECK_EQ(out_index, (k + 1) * num_output_channels); + } + RTC_DCHECK_EQ(out_index, input.samples_per_channel_ * num_output_channels); + return; + } + + size_t in_index = 0; + + // When upmixing is needed and the output is surround, copy the available + // channels directly, and set the remaining channels to zero. + if (input.num_channels_ < num_output_channels) { + for (size_t k = 0; k < input.samples_per_channel_; ++k) { + for (size_t j = 0; j < input.num_channels_; ++j) { + (*output)[out_index++] = input_data[in_index++]; + } + for (size_t j = input.num_channels_; j < num_output_channels; ++j) { + (*output)[out_index++] = 0; + } + RTC_DCHECK_EQ(in_index, (k + 1) * input.num_channels_); + RTC_DCHECK_EQ(out_index, (k + 1) * num_output_channels); + } + RTC_DCHECK_EQ(in_index, input.samples_per_channel_ * input.num_channels_); + RTC_DCHECK_EQ(out_index, input.samples_per_channel_ * num_output_channels); + + return; + } + + // When downmixing is needed, and the input is stereo, average the channels. + if (input.num_channels_ == 2) { + for (size_t n = 0; n < input.samples_per_channel_; ++n) { + (*output)[n] = rtc::dchecked_cast<int16_t>( + (int32_t{input_data[2 * n]} + int32_t{input_data[2 * n + 1]}) >> 1); + } + return; + } + + // When downmixing is needed, and the input is multichannel, drop the surplus + // channels. + const size_t num_channels_to_drop = input.num_channels_ - num_output_channels; + for (size_t k = 0; k < input.samples_per_channel_; ++k) { + for (size_t j = 0; j < num_output_channels; ++j) { + (*output)[out_index++] = input_data[in_index++]; + } + in_index += num_channels_to_drop; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_remixing.h b/third_party/libwebrtc/modules/audio_coding/acm2/acm_remixing.h new file mode 100644 index 0000000000..661569b033 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_remixing.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_ACM2_ACM_REMIXING_H_ +#define MODULES_AUDIO_CODING_ACM2_ACM_REMIXING_H_ + +#include <vector> + +#include "api/audio/audio_frame.h" + +namespace webrtc { + +// Stereo-to-mono downmixing. The length of the output must equal to the number +// of samples per channel in the input. +void DownMixFrame(const AudioFrame& input, rtc::ArrayView<int16_t> output); + +// Remixes the interleaved input frame to an interleaved output data vector. The +// remixed data replaces the data in the output vector which is resized if +// needed. The remixing supports any combination of input and output channels, +// as well as any number of samples per channel. +void ReMixFrame(const AudioFrame& input, + size_t num_output_channels, + std::vector<int16_t>* output); + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_ACM2_ACM_REMIXING_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_remixing_unittest.cc b/third_party/libwebrtc/modules/audio_coding/acm2/acm_remixing_unittest.cc new file mode 100644 index 0000000000..a1a816f727 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_remixing_unittest.cc @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/acm2/acm_remixing.h" + +#include <vector> + +#include "api/audio/audio_frame.h" +#include "system_wrappers/include/clock.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +using ::testing::AllOf; +using ::testing::Each; +using ::testing::ElementsAreArray; +using ::testing::SizeIs; + +namespace webrtc { + +TEST(AcmRemixing, DownMixFrame) { + std::vector<int16_t> out(480, 0); + AudioFrame in; + in.num_channels_ = 2; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + in_data[2 * k] = 2; + in_data[2 * k + 1] = 0; + } + + DownMixFrame(in, out); + + EXPECT_THAT(out, AllOf(SizeIs(480), Each(1))); +} + +TEST(AcmRemixing, DownMixMutedFrame) { + std::vector<int16_t> out(480, 0); + AudioFrame in; + in.num_channels_ = 2; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + in_data[2 * k] = 2; + in_data[2 * k + 1] = 0; + } + + in.Mute(); + + DownMixFrame(in, out); + + EXPECT_THAT(out, AllOf(SizeIs(480), Each(0))); +} + +TEST(AcmRemixing, RemixMutedStereoFrameTo6Channels) { + std::vector<int16_t> out(480, 0); + AudioFrame in; + in.num_channels_ = 2; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + in_data[2 * k] = 1; + in_data[2 * k + 1] = 2; + } + in.Mute(); + + ReMixFrame(in, 6, &out); + EXPECT_EQ(6 * 480u, out.size()); + + EXPECT_THAT(out, AllOf(SizeIs(in.samples_per_channel_ * 6), Each(0))); +} + +TEST(AcmRemixing, RemixStereoFrameTo6Channels) { + std::vector<int16_t> out(480, 0); + AudioFrame in; + in.num_channels_ = 2; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + in_data[2 * k] = 1; + in_data[2 * k + 1] = 2; + } + + ReMixFrame(in, 6, &out); + EXPECT_EQ(6 * 480u, out.size()); + + std::vector<int16_t> expected_output(in.samples_per_channel_ * 6); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + expected_output[6 * k] = 1; + expected_output[6 * k + 1] = 2; + } + + EXPECT_THAT(out, ElementsAreArray(expected_output)); +} + +TEST(AcmRemixing, RemixMonoFrameTo6Channels) { + std::vector<int16_t> out(480, 0); + AudioFrame in; + in.num_channels_ = 1; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + in_data[k] = 1; + } + + ReMixFrame(in, 6, &out); + EXPECT_EQ(6 * 480u, out.size()); + + std::vector<int16_t> expected_output(in.samples_per_channel_ * 6, 0); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + expected_output[6 * k] = 1; + expected_output[6 * k + 1] = 1; + } + + EXPECT_THAT(out, ElementsAreArray(expected_output)); +} + +TEST(AcmRemixing, RemixStereoFrameToMono) { + std::vector<int16_t> out(480, 0); + AudioFrame in; + in.num_channels_ = 2; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + in_data[2 * k] = 2; + in_data[2 * k + 1] = 0; + } + + ReMixFrame(in, 1, &out); + EXPECT_EQ(480u, out.size()); + + EXPECT_THAT(out, AllOf(SizeIs(in.samples_per_channel_), Each(1))); +} + +TEST(AcmRemixing, RemixMonoFrameToStereo) { + std::vector<int16_t> out(480, 0); + AudioFrame in; + in.num_channels_ = 1; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + in_data[k] = 1; + } + + ReMixFrame(in, 2, &out); + EXPECT_EQ(960u, out.size()); + + EXPECT_THAT(out, AllOf(SizeIs(2 * in.samples_per_channel_), Each(1))); +} + +TEST(AcmRemixing, Remix3ChannelFrameToStereo) { + std::vector<int16_t> out(480, 0); + AudioFrame in; + in.num_channels_ = 3; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + for (size_t j = 0; j < 3; ++j) { + in_data[3 * k + j] = j; + } + } + + ReMixFrame(in, 2, &out); + EXPECT_EQ(2 * 480u, out.size()); + + std::vector<int16_t> expected_output(in.samples_per_channel_ * 2); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + for (size_t j = 0; j < 2; ++j) { + expected_output[2 * k + j] = static_cast<int>(j); + } + } + + EXPECT_THAT(out, ElementsAreArray(expected_output)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_resampler.cc b/third_party/libwebrtc/modules/audio_coding/acm2/acm_resampler.cc new file mode 100644 index 0000000000..e307c6ca57 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_resampler.cc @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/acm2/acm_resampler.h" + +#include <string.h> + +#include "rtc_base/logging.h" + +namespace webrtc { +namespace acm2 { + +ACMResampler::ACMResampler() {} + +ACMResampler::~ACMResampler() {} + +int ACMResampler::Resample10Msec(const int16_t* in_audio, + int in_freq_hz, + int out_freq_hz, + size_t num_audio_channels, + size_t out_capacity_samples, + int16_t* out_audio) { + size_t in_length = in_freq_hz * num_audio_channels / 100; + if (in_freq_hz == out_freq_hz) { + if (out_capacity_samples < in_length) { + RTC_DCHECK_NOTREACHED(); + return -1; + } + memcpy(out_audio, in_audio, in_length * sizeof(int16_t)); + return static_cast<int>(in_length / num_audio_channels); + } + + if (resampler_.InitializeIfNeeded(in_freq_hz, out_freq_hz, + num_audio_channels) != 0) { + RTC_LOG(LS_ERROR) << "InitializeIfNeeded(" << in_freq_hz << ", " + << out_freq_hz << ", " << num_audio_channels + << ") failed."; + return -1; + } + + int out_length = + resampler_.Resample(in_audio, in_length, out_audio, out_capacity_samples); + if (out_length == -1) { + RTC_LOG(LS_ERROR) << "Resample(" << in_audio << ", " << in_length << ", " + << out_audio << ", " << out_capacity_samples + << ") failed."; + return -1; + } + + return static_cast<int>(out_length / num_audio_channels); +} + +} // namespace acm2 +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_resampler.h b/third_party/libwebrtc/modules/audio_coding/acm2/acm_resampler.h new file mode 100644 index 0000000000..96ba93a762 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_resampler.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_ACM2_ACM_RESAMPLER_H_ +#define MODULES_AUDIO_CODING_ACM2_ACM_RESAMPLER_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "common_audio/resampler/include/push_resampler.h" + +namespace webrtc { +namespace acm2 { + +class ACMResampler { + public: + ACMResampler(); + ~ACMResampler(); + + int Resample10Msec(const int16_t* in_audio, + int in_freq_hz, + int out_freq_hz, + size_t num_audio_channels, + size_t out_capacity_samples, + int16_t* out_audio); + + private: + PushResampler<int16_t> resampler_; +}; + +} // namespace acm2 +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_ACM2_ACM_RESAMPLER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_send_test.cc b/third_party/libwebrtc/modules/audio_coding/acm2/acm_send_test.cc new file mode 100644 index 0000000000..3e65f94b0d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_send_test.cc @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/acm2/acm_send_test.h" + +#include <stdio.h> +#include <string.h> + +#include "absl/strings/match.h" +#include "absl/strings/string_view.h" +#include "api/audio_codecs/audio_encoder.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/audio_codecs/builtin_audio_encoder_factory.h" +#include "modules/audio_coding/include/audio_coding_module.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" +#include "modules/audio_coding/neteq/tools/packet.h" +#include "rtc_base/checks.h" +#include "rtc_base/string_encode.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { + +AcmSendTestOldApi::AcmSendTestOldApi(InputAudioFile* audio_source, + int source_rate_hz, + int test_duration_ms) + : clock_(0), + acm_(webrtc::AudioCodingModule::Create([this] { + AudioCodingModule::Config config; + config.clock = &clock_; + config.decoder_factory = CreateBuiltinAudioDecoderFactory(); + return config; + }())), + audio_source_(audio_source), + source_rate_hz_(source_rate_hz), + input_block_size_samples_( + static_cast<size_t>(source_rate_hz_ * kBlockSizeMs / 1000)), + codec_registered_(false), + test_duration_ms_(test_duration_ms), + frame_type_(AudioFrameType::kAudioFrameSpeech), + payload_type_(0), + timestamp_(0), + sequence_number_(0) { + input_frame_.sample_rate_hz_ = source_rate_hz_; + input_frame_.num_channels_ = 1; + input_frame_.samples_per_channel_ = input_block_size_samples_; + RTC_DCHECK_LE(input_block_size_samples_ * input_frame_.num_channels_, + AudioFrame::kMaxDataSizeSamples); + acm_->RegisterTransportCallback(this); +} + +AcmSendTestOldApi::~AcmSendTestOldApi() = default; + +bool AcmSendTestOldApi::RegisterCodec(absl::string_view payload_name, + int clockrate_hz, + int num_channels, + int payload_type, + int frame_size_samples) { + SdpAudioFormat format(payload_name, clockrate_hz, num_channels); + if (absl::EqualsIgnoreCase(payload_name, "g722")) { + RTC_CHECK_EQ(16000, clockrate_hz); + format.clockrate_hz = 8000; + } else if (absl::EqualsIgnoreCase(payload_name, "opus")) { + RTC_CHECK(num_channels == 1 || num_channels == 2); + if (num_channels == 2) { + format.parameters["stereo"] = "1"; + } + format.num_channels = 2; + } + format.parameters["ptime"] = rtc::ToString(rtc::CheckedDivExact( + frame_size_samples, rtc::CheckedDivExact(clockrate_hz, 1000))); + auto factory = CreateBuiltinAudioEncoderFactory(); + acm_->SetEncoder( + factory->MakeAudioEncoder(payload_type, format, absl::nullopt)); + codec_registered_ = true; + input_frame_.num_channels_ = num_channels; + RTC_DCHECK_LE(input_block_size_samples_ * input_frame_.num_channels_, + AudioFrame::kMaxDataSizeSamples); + return codec_registered_; +} + +void AcmSendTestOldApi::RegisterExternalCodec( + std::unique_ptr<AudioEncoder> external_speech_encoder) { + input_frame_.num_channels_ = external_speech_encoder->NumChannels(); + acm_->SetEncoder(std::move(external_speech_encoder)); + RTC_DCHECK_LE(input_block_size_samples_ * input_frame_.num_channels_, + AudioFrame::kMaxDataSizeSamples); + codec_registered_ = true; +} + +std::unique_ptr<Packet> AcmSendTestOldApi::NextPacket() { + RTC_DCHECK(codec_registered_); + if (filter_.test(static_cast<size_t>(payload_type_))) { + // This payload type should be filtered out. Since the payload type is the + // same throughout the whole test run, no packet at all will be delivered. + // We can just as well signal that the test is over by returning NULL. + return nullptr; + } + // Insert audio and process until one packet is produced. + while (clock_.TimeInMilliseconds() < test_duration_ms_) { + clock_.AdvanceTimeMilliseconds(kBlockSizeMs); + RTC_CHECK(audio_source_->Read( + input_block_size_samples_ * input_frame_.num_channels_, + input_frame_.mutable_data())); + data_to_send_ = false; + RTC_CHECK_GE(acm_->Add10MsData(input_frame_), 0); + input_frame_.timestamp_ += static_cast<uint32_t>(input_block_size_samples_); + if (data_to_send_) { + // Encoded packet received. + return CreatePacket(); + } + } + // Test ended. + return nullptr; +} + +// This method receives the callback from ACM when a new packet is produced. +int32_t AcmSendTestOldApi::SendData(AudioFrameType frame_type, + uint8_t payload_type, + uint32_t timestamp, + const uint8_t* payload_data, + size_t payload_len_bytes, + int64_t absolute_capture_timestamp_ms) { + // Store the packet locally. + frame_type_ = frame_type; + payload_type_ = payload_type; + timestamp_ = timestamp; + last_payload_vec_.assign(payload_data, payload_data + payload_len_bytes); + RTC_DCHECK_EQ(last_payload_vec_.size(), payload_len_bytes); + data_to_send_ = true; + return 0; +} + +std::unique_ptr<Packet> AcmSendTestOldApi::CreatePacket() { + const size_t kRtpHeaderSize = 12; + rtc::CopyOnWriteBuffer packet_buffer(last_payload_vec_.size() + + kRtpHeaderSize); + uint8_t* packet_memory = packet_buffer.MutableData(); + // Populate the header bytes. + packet_memory[0] = 0x80; + packet_memory[1] = static_cast<uint8_t>(payload_type_); + packet_memory[2] = (sequence_number_ >> 8) & 0xFF; + packet_memory[3] = (sequence_number_)&0xFF; + packet_memory[4] = (timestamp_ >> 24) & 0xFF; + packet_memory[5] = (timestamp_ >> 16) & 0xFF; + packet_memory[6] = (timestamp_ >> 8) & 0xFF; + packet_memory[7] = timestamp_ & 0xFF; + // Set SSRC to 0x12345678. + packet_memory[8] = 0x12; + packet_memory[9] = 0x34; + packet_memory[10] = 0x56; + packet_memory[11] = 0x78; + + ++sequence_number_; + + // Copy the payload data. + memcpy(packet_memory + kRtpHeaderSize, &last_payload_vec_[0], + last_payload_vec_.size()); + auto packet = std::make_unique<Packet>(std::move(packet_buffer), + clock_.TimeInMilliseconds()); + RTC_DCHECK(packet); + RTC_DCHECK(packet->valid_header()); + return packet; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_send_test.h b/third_party/libwebrtc/modules/audio_coding/acm2/acm_send_test.h new file mode 100644 index 0000000000..0bd24705fd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_send_test.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_ACM2_ACM_SEND_TEST_H_ +#define MODULES_AUDIO_CODING_ACM2_ACM_SEND_TEST_H_ + +#include <memory> +#include <vector> + +#include "absl/strings/string_view.h" +#include "api/audio/audio_frame.h" +#include "modules/audio_coding/include/audio_coding_module.h" +#include "modules/audio_coding/neteq/tools/packet_source.h" +#include "system_wrappers/include/clock.h" + +namespace webrtc { +class AudioEncoder; + +namespace test { +class InputAudioFile; +class Packet; + +class AcmSendTestOldApi : public AudioPacketizationCallback, + public PacketSource { + public: + AcmSendTestOldApi(InputAudioFile* audio_source, + int source_rate_hz, + int test_duration_ms); + ~AcmSendTestOldApi() override; + + AcmSendTestOldApi(const AcmSendTestOldApi&) = delete; + AcmSendTestOldApi& operator=(const AcmSendTestOldApi&) = delete; + + // Registers the send codec. Returns true on success, false otherwise. + bool RegisterCodec(absl::string_view payload_name, + int sampling_freq_hz, + int channels, + int payload_type, + int frame_size_samples); + + // Registers an external send codec. + void RegisterExternalCodec( + std::unique_ptr<AudioEncoder> external_speech_encoder); + + // Inherited from PacketSource. + std::unique_ptr<Packet> NextPacket() override; + + // Inherited from AudioPacketizationCallback. + int32_t SendData(AudioFrameType frame_type, + uint8_t payload_type, + uint32_t timestamp, + const uint8_t* payload_data, + size_t payload_len_bytes, + int64_t absolute_capture_timestamp_ms) override; + + AudioCodingModule* acm() { return acm_.get(); } + + private: + static const int kBlockSizeMs = 10; + + // Creates a Packet object from the last packet produced by ACM (and received + // through the SendData method as a callback). + std::unique_ptr<Packet> CreatePacket(); + + SimulatedClock clock_; + std::unique_ptr<AudioCodingModule> acm_; + InputAudioFile* audio_source_; + int source_rate_hz_; + const size_t input_block_size_samples_; + AudioFrame input_frame_; + bool codec_registered_; + int test_duration_ms_; + // The following member variables are set whenever SendData() is called. + AudioFrameType frame_type_; + int payload_type_; + uint32_t timestamp_; + uint16_t sequence_number_; + std::vector<uint8_t> last_payload_vec_; + bool data_to_send_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_ACM2_ACM_SEND_TEST_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/audio_coding_module.cc b/third_party/libwebrtc/modules/audio_coding/acm2/audio_coding_module.cc new file mode 100644 index 0000000000..2c186273b6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/audio_coding_module.cc @@ -0,0 +1,637 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/include/audio_coding_module.h" + +#include <algorithm> +#include <cstdint> + +#include "absl/strings/match.h" +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "modules/audio_coding/acm2/acm_receiver.h" +#include "modules/audio_coding/acm2/acm_remixing.h" +#include "modules/audio_coding/acm2/acm_resampler.h" +#include "modules/include/module_common_types.h" +#include "modules/include/module_common_types_public.h" +#include "rtc_base/buffer.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/thread_annotations.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { + +// Initial size for the buffer in InputBuffer. This matches 6 channels of 10 ms +// 48 kHz data. +constexpr size_t kInitialInputDataBufferSize = 6 * 480; + +constexpr int32_t kMaxInputSampleRateHz = 192000; + +class AudioCodingModuleImpl final : public AudioCodingModule { + public: + explicit AudioCodingModuleImpl(const AudioCodingModule::Config& config); + ~AudioCodingModuleImpl() override; + + ///////////////////////////////////////// + // Sender + // + + void ModifyEncoder(rtc::FunctionView<void(std::unique_ptr<AudioEncoder>*)> + modifier) override; + + // Register a transport callback which will be + // called to deliver the encoded buffers. + int RegisterTransportCallback(AudioPacketizationCallback* transport) override; + + // Add 10 ms of raw (PCM) audio data to the encoder. + int Add10MsData(const AudioFrame& audio_frame) override; + + ///////////////////////////////////////// + // (FEC) Forward Error Correction (codec internal) + // + + // Set target packet loss rate + int SetPacketLossRate(int loss_rate) override; + + ///////////////////////////////////////// + // Receiver + // + + // Initialize receiver, resets codec database etc. + int InitializeReceiver() override; + + void SetReceiveCodecs(const std::map<int, SdpAudioFormat>& codecs) override; + + // Incoming packet from network parsed and ready for decode. + int IncomingPacket(const uint8_t* incoming_payload, + const size_t payload_length, + const RTPHeader& rtp_info) override; + + // Get 10 milliseconds of raw audio data to play out, and + // automatic resample to the requested frequency if > 0. + int PlayoutData10Ms(int desired_freq_hz, + AudioFrame* audio_frame, + bool* muted) override; + + ///////////////////////////////////////// + // Statistics + // + + int GetNetworkStatistics(NetworkStatistics* statistics) override; + + ANAStats GetANAStats() const override; + + int GetTargetBitrate() const override; + + private: + struct InputData { + InputData() : buffer(kInitialInputDataBufferSize) {} + uint32_t input_timestamp; + const int16_t* audio; + size_t length_per_channel; + size_t audio_channel; + // If a re-mix is required (up or down), this buffer will store a re-mixed + // version of the input. + std::vector<int16_t> buffer; + }; + + InputData input_data_ RTC_GUARDED_BY(acm_mutex_); + + // This member class writes values to the named UMA histogram, but only if + // the value has changed since the last time (and always for the first call). + class ChangeLogger { + public: + explicit ChangeLogger(absl::string_view histogram_name) + : histogram_name_(histogram_name) {} + // Logs the new value if it is different from the last logged value, or if + // this is the first call. + void MaybeLog(int value); + + private: + int last_value_ = 0; + int first_time_ = true; + const std::string histogram_name_; + }; + + int Add10MsDataInternal(const AudioFrame& audio_frame, InputData* input_data) + RTC_EXCLUSIVE_LOCKS_REQUIRED(acm_mutex_); + + // TODO(bugs.webrtc.org/10739): change `absolute_capture_timestamp_ms` to + // int64_t when it always receives a valid value. + int Encode(const InputData& input_data, + absl::optional<int64_t> absolute_capture_timestamp_ms) + RTC_EXCLUSIVE_LOCKS_REQUIRED(acm_mutex_); + + int InitializeReceiverSafe() RTC_EXCLUSIVE_LOCKS_REQUIRED(acm_mutex_); + + bool HaveValidEncoder(absl::string_view caller_name) const + RTC_EXCLUSIVE_LOCKS_REQUIRED(acm_mutex_); + + // Preprocessing of input audio, including resampling and down-mixing if + // required, before pushing audio into encoder's buffer. + // + // in_frame: input audio-frame + // ptr_out: pointer to output audio_frame. If no preprocessing is required + // `ptr_out` will be pointing to `in_frame`, otherwise pointing to + // `preprocess_frame_`. + // + // Return value: + // -1: if encountering an error. + // 0: otherwise. + int PreprocessToAddData(const AudioFrame& in_frame, + const AudioFrame** ptr_out) + RTC_EXCLUSIVE_LOCKS_REQUIRED(acm_mutex_); + + // Change required states after starting to receive the codec corresponding + // to `index`. + int UpdateUponReceivingCodec(int index); + + mutable Mutex acm_mutex_; + rtc::Buffer encode_buffer_ RTC_GUARDED_BY(acm_mutex_); + uint32_t expected_codec_ts_ RTC_GUARDED_BY(acm_mutex_); + uint32_t expected_in_ts_ RTC_GUARDED_BY(acm_mutex_); + acm2::ACMResampler resampler_ RTC_GUARDED_BY(acm_mutex_); + acm2::AcmReceiver receiver_; // AcmReceiver has it's own internal lock. + ChangeLogger bitrate_logger_ RTC_GUARDED_BY(acm_mutex_); + + // Current encoder stack, provided by a call to RegisterEncoder. + std::unique_ptr<AudioEncoder> encoder_stack_ RTC_GUARDED_BY(acm_mutex_); + + // This is to keep track of CN instances where we can send DTMFs. + uint8_t previous_pltype_ RTC_GUARDED_BY(acm_mutex_); + + bool receiver_initialized_ RTC_GUARDED_BY(acm_mutex_); + + AudioFrame preprocess_frame_ RTC_GUARDED_BY(acm_mutex_); + bool first_10ms_data_ RTC_GUARDED_BY(acm_mutex_); + + bool first_frame_ RTC_GUARDED_BY(acm_mutex_); + uint32_t last_timestamp_ RTC_GUARDED_BY(acm_mutex_); + uint32_t last_rtp_timestamp_ RTC_GUARDED_BY(acm_mutex_); + + Mutex callback_mutex_; + AudioPacketizationCallback* packetization_callback_ + RTC_GUARDED_BY(callback_mutex_); + + int codec_histogram_bins_log_[static_cast<size_t>( + AudioEncoder::CodecType::kMaxLoggedAudioCodecTypes)]; + int number_of_consecutive_empty_packets_; +}; + +// Adds a codec usage sample to the histogram. +void UpdateCodecTypeHistogram(size_t codec_type) { + RTC_HISTOGRAM_ENUMERATION( + "WebRTC.Audio.Encoder.CodecType", static_cast<int>(codec_type), + static_cast<int>( + webrtc::AudioEncoder::CodecType::kMaxLoggedAudioCodecTypes)); +} + +void AudioCodingModuleImpl::ChangeLogger::MaybeLog(int value) { + if (value != last_value_ || first_time_) { + first_time_ = false; + last_value_ = value; + RTC_HISTOGRAM_COUNTS_SPARSE_100(histogram_name_, value); + } +} + +AudioCodingModuleImpl::AudioCodingModuleImpl( + const AudioCodingModule::Config& config) + : expected_codec_ts_(0xD87F3F9F), + expected_in_ts_(0xD87F3F9F), + receiver_(config), + bitrate_logger_("WebRTC.Audio.TargetBitrateInKbps"), + encoder_stack_(nullptr), + previous_pltype_(255), + receiver_initialized_(false), + first_10ms_data_(false), + first_frame_(true), + packetization_callback_(NULL), + codec_histogram_bins_log_(), + number_of_consecutive_empty_packets_(0) { + if (InitializeReceiverSafe() < 0) { + RTC_LOG(LS_ERROR) << "Cannot initialize receiver"; + } + RTC_LOG(LS_INFO) << "Created"; +} + +AudioCodingModuleImpl::~AudioCodingModuleImpl() = default; + +int32_t AudioCodingModuleImpl::Encode( + const InputData& input_data, + absl::optional<int64_t> absolute_capture_timestamp_ms) { + // TODO(bugs.webrtc.org/10739): add dcheck that + // `audio_frame.absolute_capture_timestamp_ms()` always has a value. + AudioEncoder::EncodedInfo encoded_info; + uint8_t previous_pltype; + + // Check if there is an encoder before. + if (!HaveValidEncoder("Process")) + return -1; + + if (!first_frame_) { + RTC_DCHECK(IsNewerTimestamp(input_data.input_timestamp, last_timestamp_)) + << "Time should not move backwards"; + } + + // Scale the timestamp to the codec's RTP timestamp rate. + uint32_t rtp_timestamp = + first_frame_ + ? input_data.input_timestamp + : last_rtp_timestamp_ + + rtc::dchecked_cast<uint32_t>(rtc::CheckedDivExact( + int64_t{input_data.input_timestamp - last_timestamp_} * + encoder_stack_->RtpTimestampRateHz(), + int64_t{encoder_stack_->SampleRateHz()})); + + last_timestamp_ = input_data.input_timestamp; + last_rtp_timestamp_ = rtp_timestamp; + first_frame_ = false; + + // Clear the buffer before reuse - encoded data will get appended. + encode_buffer_.Clear(); + encoded_info = encoder_stack_->Encode( + rtp_timestamp, + rtc::ArrayView<const int16_t>( + input_data.audio, + input_data.audio_channel * input_data.length_per_channel), + &encode_buffer_); + + bitrate_logger_.MaybeLog(encoder_stack_->GetTargetBitrate() / 1000); + if (encode_buffer_.size() == 0 && !encoded_info.send_even_if_empty) { + // Not enough data. + return 0; + } + previous_pltype = previous_pltype_; // Read it while we have the critsect. + + // Log codec type to histogram once every 500 packets. + if (encoded_info.encoded_bytes == 0) { + ++number_of_consecutive_empty_packets_; + } else { + size_t codec_type = static_cast<size_t>(encoded_info.encoder_type); + codec_histogram_bins_log_[codec_type] += + number_of_consecutive_empty_packets_ + 1; + number_of_consecutive_empty_packets_ = 0; + if (codec_histogram_bins_log_[codec_type] >= 500) { + codec_histogram_bins_log_[codec_type] -= 500; + UpdateCodecTypeHistogram(codec_type); + } + } + + AudioFrameType frame_type; + if (encode_buffer_.size() == 0 && encoded_info.send_even_if_empty) { + frame_type = AudioFrameType::kEmptyFrame; + encoded_info.payload_type = previous_pltype; + } else { + RTC_DCHECK_GT(encode_buffer_.size(), 0); + frame_type = encoded_info.speech ? AudioFrameType::kAudioFrameSpeech + : AudioFrameType::kAudioFrameCN; + } + + { + MutexLock lock(&callback_mutex_); + if (packetization_callback_) { + packetization_callback_->SendData( + frame_type, encoded_info.payload_type, encoded_info.encoded_timestamp, + encode_buffer_.data(), encode_buffer_.size(), + absolute_capture_timestamp_ms.value_or(-1)); + } + } + previous_pltype_ = encoded_info.payload_type; + return static_cast<int32_t>(encode_buffer_.size()); +} + +///////////////////////////////////////// +// Sender +// + +void AudioCodingModuleImpl::ModifyEncoder( + rtc::FunctionView<void(std::unique_ptr<AudioEncoder>*)> modifier) { + MutexLock lock(&acm_mutex_); + modifier(&encoder_stack_); +} + +// Register a transport callback which will be called to deliver +// the encoded buffers. +int AudioCodingModuleImpl::RegisterTransportCallback( + AudioPacketizationCallback* transport) { + MutexLock lock(&callback_mutex_); + packetization_callback_ = transport; + return 0; +} + +// Add 10MS of raw (PCM) audio data to the encoder. +int AudioCodingModuleImpl::Add10MsData(const AudioFrame& audio_frame) { + MutexLock lock(&acm_mutex_); + int r = Add10MsDataInternal(audio_frame, &input_data_); + // TODO(bugs.webrtc.org/10739): add dcheck that + // `audio_frame.absolute_capture_timestamp_ms()` always has a value. + return r < 0 + ? r + : Encode(input_data_, audio_frame.absolute_capture_timestamp_ms()); +} + +int AudioCodingModuleImpl::Add10MsDataInternal(const AudioFrame& audio_frame, + InputData* input_data) { + if (audio_frame.samples_per_channel_ == 0) { + RTC_DCHECK_NOTREACHED(); + RTC_LOG(LS_ERROR) << "Cannot Add 10 ms audio, payload length is zero"; + return -1; + } + + if (audio_frame.sample_rate_hz_ > kMaxInputSampleRateHz) { + RTC_DCHECK_NOTREACHED(); + RTC_LOG(LS_ERROR) << "Cannot Add 10 ms audio, input frequency not valid"; + return -1; + } + + // If the length and frequency matches. We currently just support raw PCM. + if (static_cast<size_t>(audio_frame.sample_rate_hz_ / 100) != + audio_frame.samples_per_channel_) { + RTC_LOG(LS_ERROR) + << "Cannot Add 10 ms audio, input frequency and length doesn't match"; + return -1; + } + + if (audio_frame.num_channels_ != 1 && audio_frame.num_channels_ != 2 && + audio_frame.num_channels_ != 4 && audio_frame.num_channels_ != 6 && + audio_frame.num_channels_ != 8) { + RTC_LOG(LS_ERROR) << "Cannot Add 10 ms audio, invalid number of channels."; + return -1; + } + + // Do we have a codec registered? + if (!HaveValidEncoder("Add10MsData")) { + return -1; + } + + const AudioFrame* ptr_frame; + // Perform a resampling, also down-mix if it is required and can be + // performed before resampling (a down mix prior to resampling will take + // place if both primary and secondary encoders are mono and input is in + // stereo). + if (PreprocessToAddData(audio_frame, &ptr_frame) < 0) { + return -1; + } + + // Check whether we need an up-mix or down-mix? + const size_t current_num_channels = encoder_stack_->NumChannels(); + const bool same_num_channels = + ptr_frame->num_channels_ == current_num_channels; + + // TODO(yujo): Skip encode of muted frames. + input_data->input_timestamp = ptr_frame->timestamp_; + input_data->length_per_channel = ptr_frame->samples_per_channel_; + input_data->audio_channel = current_num_channels; + + if (!same_num_channels) { + // Remixes the input frame to the output data and in the process resize the + // output data if needed. + ReMixFrame(*ptr_frame, current_num_channels, &input_data->buffer); + + // For pushing data to primary, point the `ptr_audio` to correct buffer. + input_data->audio = input_data->buffer.data(); + RTC_DCHECK_GE(input_data->buffer.size(), + input_data->length_per_channel * input_data->audio_channel); + } else { + // When adding data to encoders this pointer is pointing to an audio buffer + // with correct number of channels. + input_data->audio = ptr_frame->data(); + } + + return 0; +} + +// Perform a resampling and down-mix if required. We down-mix only if +// encoder is mono and input is stereo. In case of dual-streaming, both +// encoders has to be mono for down-mix to take place. +// |*ptr_out| will point to the pre-processed audio-frame. If no pre-processing +// is required, |*ptr_out| points to `in_frame`. +// TODO(yujo): Make this more efficient for muted frames. +int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame, + const AudioFrame** ptr_out) { + const bool resample = + in_frame.sample_rate_hz_ != encoder_stack_->SampleRateHz(); + + // This variable is true if primary codec and secondary codec (if exists) + // are both mono and input is stereo. + // TODO(henrik.lundin): This condition should probably be + // in_frame.num_channels_ > encoder_stack_->NumChannels() + const bool down_mix = + in_frame.num_channels_ == 2 && encoder_stack_->NumChannels() == 1; + + if (!first_10ms_data_) { + expected_in_ts_ = in_frame.timestamp_; + expected_codec_ts_ = in_frame.timestamp_; + first_10ms_data_ = true; + } else if (in_frame.timestamp_ != expected_in_ts_) { + RTC_LOG(LS_WARNING) << "Unexpected input timestamp: " << in_frame.timestamp_ + << ", expected: " << expected_in_ts_; + expected_codec_ts_ += + (in_frame.timestamp_ - expected_in_ts_) * + static_cast<uint32_t>( + static_cast<double>(encoder_stack_->SampleRateHz()) / + static_cast<double>(in_frame.sample_rate_hz_)); + expected_in_ts_ = in_frame.timestamp_; + } + + if (!down_mix && !resample) { + // No pre-processing is required. + if (expected_in_ts_ == expected_codec_ts_) { + // If we've never resampled, we can use the input frame as-is + *ptr_out = &in_frame; + } else { + // Otherwise we'll need to alter the timestamp. Since in_frame is const, + // we'll have to make a copy of it. + preprocess_frame_.CopyFrom(in_frame); + preprocess_frame_.timestamp_ = expected_codec_ts_; + *ptr_out = &preprocess_frame_; + } + + expected_in_ts_ += static_cast<uint32_t>(in_frame.samples_per_channel_); + expected_codec_ts_ += static_cast<uint32_t>(in_frame.samples_per_channel_); + return 0; + } + + *ptr_out = &preprocess_frame_; + preprocess_frame_.num_channels_ = in_frame.num_channels_; + preprocess_frame_.samples_per_channel_ = in_frame.samples_per_channel_; + std::array<int16_t, AudioFrame::kMaxDataSizeSamples> audio; + const int16_t* src_ptr_audio; + if (down_mix) { + // If a resampling is required, the output of a down-mix is written into a + // local buffer, otherwise, it will be written to the output frame. + int16_t* dest_ptr_audio = + resample ? audio.data() : preprocess_frame_.mutable_data(); + RTC_DCHECK_GE(audio.size(), preprocess_frame_.samples_per_channel_); + RTC_DCHECK_GE(audio.size(), in_frame.samples_per_channel_); + DownMixFrame(in_frame, + rtc::ArrayView<int16_t>( + dest_ptr_audio, preprocess_frame_.samples_per_channel_)); + preprocess_frame_.num_channels_ = 1; + + // Set the input of the resampler to the down-mixed signal. + src_ptr_audio = audio.data(); + } else { + // Set the input of the resampler to the original data. + src_ptr_audio = in_frame.data(); + } + + preprocess_frame_.timestamp_ = expected_codec_ts_; + preprocess_frame_.sample_rate_hz_ = in_frame.sample_rate_hz_; + // If it is required, we have to do a resampling. + if (resample) { + // The result of the resampler is written to output frame. + int16_t* dest_ptr_audio = preprocess_frame_.mutable_data(); + + int samples_per_channel = resampler_.Resample10Msec( + src_ptr_audio, in_frame.sample_rate_hz_, encoder_stack_->SampleRateHz(), + preprocess_frame_.num_channels_, AudioFrame::kMaxDataSizeSamples, + dest_ptr_audio); + + if (samples_per_channel < 0) { + RTC_LOG(LS_ERROR) << "Cannot add 10 ms audio, resampling failed"; + return -1; + } + preprocess_frame_.samples_per_channel_ = + static_cast<size_t>(samples_per_channel); + preprocess_frame_.sample_rate_hz_ = encoder_stack_->SampleRateHz(); + } + + expected_codec_ts_ += + static_cast<uint32_t>(preprocess_frame_.samples_per_channel_); + expected_in_ts_ += static_cast<uint32_t>(in_frame.samples_per_channel_); + + return 0; +} + +///////////////////////////////////////// +// (FEC) Forward Error Correction (codec internal) +// + +int AudioCodingModuleImpl::SetPacketLossRate(int loss_rate) { + MutexLock lock(&acm_mutex_); + if (HaveValidEncoder("SetPacketLossRate")) { + encoder_stack_->OnReceivedUplinkPacketLossFraction(loss_rate / 100.0); + } + return 0; +} + +///////////////////////////////////////// +// Receiver +// + +int AudioCodingModuleImpl::InitializeReceiver() { + MutexLock lock(&acm_mutex_); + return InitializeReceiverSafe(); +} + +// Initialize receiver, resets codec database etc. +int AudioCodingModuleImpl::InitializeReceiverSafe() { + // If the receiver is already initialized then we want to destroy any + // existing decoders. After a call to this function, we should have a clean + // start-up. + if (receiver_initialized_) + receiver_.RemoveAllCodecs(); + receiver_.FlushBuffers(); + + receiver_initialized_ = true; + return 0; +} + +void AudioCodingModuleImpl::SetReceiveCodecs( + const std::map<int, SdpAudioFormat>& codecs) { + MutexLock lock(&acm_mutex_); + receiver_.SetCodecs(codecs); +} + +// Incoming packet from network parsed and ready for decode. +int AudioCodingModuleImpl::IncomingPacket(const uint8_t* incoming_payload, + const size_t payload_length, + const RTPHeader& rtp_header) { + RTC_DCHECK_EQ(payload_length == 0, incoming_payload == nullptr); + return receiver_.InsertPacket( + rtp_header, + rtc::ArrayView<const uint8_t>(incoming_payload, payload_length)); +} + +// Get 10 milliseconds of raw audio data to play out. +// Automatic resample to the requested frequency. +int AudioCodingModuleImpl::PlayoutData10Ms(int desired_freq_hz, + AudioFrame* audio_frame, + bool* muted) { + // GetAudio always returns 10 ms, at the requested sample rate. + if (receiver_.GetAudio(desired_freq_hz, audio_frame, muted) != 0) { + RTC_LOG(LS_ERROR) << "PlayoutData failed, RecOut Failed"; + return -1; + } + return 0; +} + +///////////////////////////////////////// +// Statistics +// + +// TODO(turajs) change the return value to void. Also change the corresponding +// NetEq function. +int AudioCodingModuleImpl::GetNetworkStatistics(NetworkStatistics* statistics) { + receiver_.GetNetworkStatistics(statistics); + return 0; +} + +bool AudioCodingModuleImpl::HaveValidEncoder( + absl::string_view caller_name) const { + if (!encoder_stack_) { + RTC_LOG(LS_ERROR) << caller_name << " failed: No send codec is registered."; + return false; + } + return true; +} + +ANAStats AudioCodingModuleImpl::GetANAStats() const { + MutexLock lock(&acm_mutex_); + if (encoder_stack_) + return encoder_stack_->GetANAStats(); + // If no encoder is set, return default stats. + return ANAStats(); +} + +int AudioCodingModuleImpl::GetTargetBitrate() const { + MutexLock lock(&acm_mutex_); + if (!encoder_stack_) { + return -1; + } + return encoder_stack_->GetTargetBitrate(); +} + +} // namespace + +AudioCodingModule::Config::Config( + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory) + : neteq_config(), + clock(Clock::GetRealTimeClockRaw()), + decoder_factory(decoder_factory) { + // Post-decode VAD is disabled by default in NetEq, however, Audio + // Conference Mixer relies on VAD decisions and fails without them. + neteq_config.enable_post_decode_vad = true; +} + +AudioCodingModule::Config::Config(const Config&) = default; +AudioCodingModule::Config::~Config() = default; + +AudioCodingModule* AudioCodingModule::Create(const Config& config) { + return new AudioCodingModuleImpl(config); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/audio_coding_module_unittest.cc b/third_party/libwebrtc/modules/audio_coding/acm2/audio_coding_module_unittest.cc new file mode 100644 index 0000000000..f1eb81c015 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/audio_coding_module_unittest.cc @@ -0,0 +1,1278 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/include/audio_coding_module.h" + +#include <stdio.h> +#include <string.h> + +#include <atomic> +#include <memory> +#include <vector> + +#include "absl/strings/string_view.h" +#include "api/audio_codecs/audio_encoder.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/audio_codecs/builtin_audio_encoder_factory.h" +#include "api/audio_codecs/opus/audio_decoder_multi_channel_opus.h" +#include "api/audio_codecs/opus/audio_decoder_opus.h" +#include "api/audio_codecs/opus/audio_encoder_multi_channel_opus.h" +#include "api/audio_codecs/opus/audio_encoder_opus.h" +#include "modules/audio_coding/acm2/acm_receive_test.h" +#include "modules/audio_coding/acm2/acm_send_test.h" +#include "modules/audio_coding/codecs/cng/audio_encoder_cng.h" +#include "modules/audio_coding/codecs/g711/audio_decoder_pcm.h" +#include "modules/audio_coding/codecs/g711/audio_encoder_pcm.h" +#include "modules/audio_coding/include/audio_coding_module_typedefs.h" +#include "modules/audio_coding/neteq/tools/audio_checksum.h" +#include "modules/audio_coding/neteq/tools/audio_loop.h" +#include "modules/audio_coding/neteq/tools/constant_pcm_packet_source.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" +#include "modules/audio_coding/neteq/tools/output_audio_file.h" +#include "modules/audio_coding/neteq/tools/output_wav_file.h" +#include "modules/audio_coding/neteq/tools/packet.h" +#include "modules/audio_coding/neteq/tools/rtp_file_source.h" +#include "rtc_base/event.h" +#include "rtc_base/message_digest.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/platform_thread.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/system/arch.h" +#include "rtc_base/thread_annotations.h" +#include "system_wrappers/include/clock.h" +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "system_wrappers/include/sleep.h" +#include "test/audio_decoder_proxy_factory.h" +#include "test/gtest.h" +#include "test/mock_audio_decoder.h" +#include "test/mock_audio_encoder.h" +#include "test/testsupport/file_utils.h" +#include "test/testsupport/rtc_expect_death.h" + +using ::testing::_; +using ::testing::AtLeast; +using ::testing::Invoke; + +namespace webrtc { + +namespace { +const int kSampleRateHz = 16000; +const int kNumSamples10ms = kSampleRateHz / 100; +const int kFrameSizeMs = 10; // Multiple of 10. +const int kFrameSizeSamples = kFrameSizeMs / 10 * kNumSamples10ms; +const int kPayloadSizeBytes = kFrameSizeSamples * sizeof(int16_t); +const uint8_t kPayloadType = 111; +} // namespace + +class RtpData { + public: + RtpData(int samples_per_packet, uint8_t payload_type) + : samples_per_packet_(samples_per_packet), payload_type_(payload_type) {} + + virtual ~RtpData() {} + + void Populate(RTPHeader* rtp_header) { + rtp_header->sequenceNumber = 0xABCD; + rtp_header->timestamp = 0xABCDEF01; + rtp_header->payloadType = payload_type_; + rtp_header->markerBit = false; + rtp_header->ssrc = 0x1234; + rtp_header->numCSRCs = 0; + + rtp_header->payload_type_frequency = kSampleRateHz; + } + + void Forward(RTPHeader* rtp_header) { + ++rtp_header->sequenceNumber; + rtp_header->timestamp += samples_per_packet_; + } + + private: + int samples_per_packet_; + uint8_t payload_type_; +}; + +class PacketizationCallbackStubOldApi : public AudioPacketizationCallback { + public: + PacketizationCallbackStubOldApi() + : num_calls_(0), + last_frame_type_(AudioFrameType::kEmptyFrame), + last_payload_type_(-1), + last_timestamp_(0) {} + + int32_t SendData(AudioFrameType frame_type, + uint8_t payload_type, + uint32_t timestamp, + const uint8_t* payload_data, + size_t payload_len_bytes, + int64_t absolute_capture_timestamp_ms) override { + MutexLock lock(&mutex_); + ++num_calls_; + last_frame_type_ = frame_type; + last_payload_type_ = payload_type; + last_timestamp_ = timestamp; + last_payload_vec_.assign(payload_data, payload_data + payload_len_bytes); + return 0; + } + + int num_calls() const { + MutexLock lock(&mutex_); + return num_calls_; + } + + int last_payload_len_bytes() const { + MutexLock lock(&mutex_); + return rtc::checked_cast<int>(last_payload_vec_.size()); + } + + AudioFrameType last_frame_type() const { + MutexLock lock(&mutex_); + return last_frame_type_; + } + + int last_payload_type() const { + MutexLock lock(&mutex_); + return last_payload_type_; + } + + uint32_t last_timestamp() const { + MutexLock lock(&mutex_); + return last_timestamp_; + } + + void SwapBuffers(std::vector<uint8_t>* payload) { + MutexLock lock(&mutex_); + last_payload_vec_.swap(*payload); + } + + private: + int num_calls_ RTC_GUARDED_BY(mutex_); + AudioFrameType last_frame_type_ RTC_GUARDED_BY(mutex_); + int last_payload_type_ RTC_GUARDED_BY(mutex_); + uint32_t last_timestamp_ RTC_GUARDED_BY(mutex_); + std::vector<uint8_t> last_payload_vec_ RTC_GUARDED_BY(mutex_); + mutable Mutex mutex_; +}; + +class AudioCodingModuleTestOldApi : public ::testing::Test { + protected: + AudioCodingModuleTestOldApi() + : rtp_utility_(new RtpData(kFrameSizeSamples, kPayloadType)), + clock_(Clock::GetRealTimeClock()) {} + + ~AudioCodingModuleTestOldApi() {} + + void TearDown() {} + + void SetUp() { + acm_.reset(AudioCodingModule::Create([this] { + AudioCodingModule::Config config; + config.clock = clock_; + config.decoder_factory = CreateBuiltinAudioDecoderFactory(); + return config; + }())); + + rtp_utility_->Populate(&rtp_header_); + + input_frame_.sample_rate_hz_ = kSampleRateHz; + input_frame_.num_channels_ = 1; + input_frame_.samples_per_channel_ = kSampleRateHz * 10 / 1000; // 10 ms. + static_assert(kSampleRateHz * 10 / 1000 <= AudioFrame::kMaxDataSizeSamples, + "audio frame too small"); + input_frame_.Mute(); + + ASSERT_EQ(0, acm_->RegisterTransportCallback(&packet_cb_)); + + SetUpL16Codec(); + } + + // Set up L16 codec. + virtual void SetUpL16Codec() { + audio_format_ = SdpAudioFormat("L16", kSampleRateHz, 1); + pac_size_ = 160; + } + + virtual void RegisterCodec() { + acm_->SetReceiveCodecs({{kPayloadType, *audio_format_}}); + acm_->SetEncoder(CreateBuiltinAudioEncoderFactory()->MakeAudioEncoder( + kPayloadType, *audio_format_, absl::nullopt)); + } + + virtual void InsertPacketAndPullAudio() { + InsertPacket(); + PullAudio(); + } + + virtual void InsertPacket() { + const uint8_t kPayload[kPayloadSizeBytes] = {0}; + ASSERT_EQ(0, + acm_->IncomingPacket(kPayload, kPayloadSizeBytes, rtp_header_)); + rtp_utility_->Forward(&rtp_header_); + } + + virtual void PullAudio() { + AudioFrame audio_frame; + bool muted; + ASSERT_EQ(0, acm_->PlayoutData10Ms(-1, &audio_frame, &muted)); + ASSERT_FALSE(muted); + } + + virtual void InsertAudio() { + ASSERT_GE(acm_->Add10MsData(input_frame_), 0); + input_frame_.timestamp_ += kNumSamples10ms; + } + + virtual void VerifyEncoding() { + int last_length = packet_cb_.last_payload_len_bytes(); + EXPECT_TRUE(last_length == 2 * pac_size_ || last_length == 0) + << "Last encoded packet was " << last_length << " bytes."; + } + + virtual void InsertAudioAndVerifyEncoding() { + InsertAudio(); + VerifyEncoding(); + } + + std::unique_ptr<RtpData> rtp_utility_; + std::unique_ptr<AudioCodingModule> acm_; + PacketizationCallbackStubOldApi packet_cb_; + RTPHeader rtp_header_; + AudioFrame input_frame_; + + absl::optional<SdpAudioFormat> audio_format_; + int pac_size_ = -1; + + Clock* clock_; +}; + +class AudioCodingModuleTestOldApiDeathTest + : public AudioCodingModuleTestOldApi {}; + +TEST_F(AudioCodingModuleTestOldApi, VerifyOutputFrame) { + AudioFrame audio_frame; + const int kSampleRateHz = 32000; + bool muted; + EXPECT_EQ(0, acm_->PlayoutData10Ms(kSampleRateHz, &audio_frame, &muted)); + ASSERT_FALSE(muted); + EXPECT_EQ(0u, audio_frame.timestamp_); + EXPECT_GT(audio_frame.num_channels_, 0u); + EXPECT_EQ(static_cast<size_t>(kSampleRateHz / 100), + audio_frame.samples_per_channel_); + EXPECT_EQ(kSampleRateHz, audio_frame.sample_rate_hz_); +} + +// The below test is temporarily disabled on Windows due to problems +// with clang debug builds. +// TODO(tommi): Re-enable when we've figured out what the problem is. +// http://crbug.com/615050 +#if !defined(WEBRTC_WIN) && defined(__clang__) && RTC_DCHECK_IS_ON && \ + GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +TEST_F(AudioCodingModuleTestOldApiDeathTest, FailOnZeroDesiredFrequency) { + AudioFrame audio_frame; + bool muted; + RTC_EXPECT_DEATH(acm_->PlayoutData10Ms(0, &audio_frame, &muted), + "dst_sample_rate_hz"); +} +#endif + +// Checks that the transport callback is invoked once for each speech packet. +// Also checks that the frame type is kAudioFrameSpeech. +TEST_F(AudioCodingModuleTestOldApi, TransportCallbackIsInvokedForEachPacket) { + const int k10MsBlocksPerPacket = 3; + pac_size_ = k10MsBlocksPerPacket * kSampleRateHz / 100; + audio_format_->parameters["ptime"] = "30"; + RegisterCodec(); + const int kLoops = 10; + for (int i = 0; i < kLoops; ++i) { + EXPECT_EQ(i / k10MsBlocksPerPacket, packet_cb_.num_calls()); + if (packet_cb_.num_calls() > 0) + EXPECT_EQ(AudioFrameType::kAudioFrameSpeech, + packet_cb_.last_frame_type()); + InsertAudioAndVerifyEncoding(); + } + EXPECT_EQ(kLoops / k10MsBlocksPerPacket, packet_cb_.num_calls()); + EXPECT_EQ(AudioFrameType::kAudioFrameSpeech, packet_cb_.last_frame_type()); +} + +// Introduce this class to set different expectations on the number of encoded +// bytes. This class expects all encoded packets to be 9 bytes (matching one +// CNG SID frame) or 0 bytes. This test depends on `input_frame_` containing +// (near-)zero values. It also introduces a way to register comfort noise with +// a custom payload type. +class AudioCodingModuleTestWithComfortNoiseOldApi + : public AudioCodingModuleTestOldApi { + protected: + void RegisterCngCodec(int rtp_payload_type) { + acm_->SetReceiveCodecs({{kPayloadType, *audio_format_}, + {rtp_payload_type, {"cn", kSampleRateHz, 1}}}); + acm_->ModifyEncoder([&](std::unique_ptr<AudioEncoder>* enc) { + AudioEncoderCngConfig config; + config.speech_encoder = std::move(*enc); + config.num_channels = 1; + config.payload_type = rtp_payload_type; + config.vad_mode = Vad::kVadNormal; + *enc = CreateComfortNoiseEncoder(std::move(config)); + }); + } + + void VerifyEncoding() override { + int last_length = packet_cb_.last_payload_len_bytes(); + EXPECT_TRUE(last_length == 9 || last_length == 0) + << "Last encoded packet was " << last_length << " bytes."; + } + + void DoTest(int blocks_per_packet, int cng_pt) { + const int kLoops = 40; + // This array defines the expected frame types, and when they should arrive. + // We expect a frame to arrive each time the speech encoder would have + // produced a packet, and once every 100 ms the frame should be non-empty, + // that is contain comfort noise. + const struct { + int ix; + AudioFrameType type; + } expectation[] = {{2, AudioFrameType::kAudioFrameCN}, + {5, AudioFrameType::kEmptyFrame}, + {8, AudioFrameType::kEmptyFrame}, + {11, AudioFrameType::kAudioFrameCN}, + {14, AudioFrameType::kEmptyFrame}, + {17, AudioFrameType::kEmptyFrame}, + {20, AudioFrameType::kAudioFrameCN}, + {23, AudioFrameType::kEmptyFrame}, + {26, AudioFrameType::kEmptyFrame}, + {29, AudioFrameType::kEmptyFrame}, + {32, AudioFrameType::kAudioFrameCN}, + {35, AudioFrameType::kEmptyFrame}, + {38, AudioFrameType::kEmptyFrame}}; + for (int i = 0; i < kLoops; ++i) { + int num_calls_before = packet_cb_.num_calls(); + EXPECT_EQ(i / blocks_per_packet, num_calls_before); + InsertAudioAndVerifyEncoding(); + int num_calls = packet_cb_.num_calls(); + if (num_calls == num_calls_before + 1) { + EXPECT_EQ(expectation[num_calls - 1].ix, i); + EXPECT_EQ(expectation[num_calls - 1].type, packet_cb_.last_frame_type()) + << "Wrong frame type for lap " << i; + EXPECT_EQ(cng_pt, packet_cb_.last_payload_type()); + } else { + EXPECT_EQ(num_calls, num_calls_before); + } + } + } +}; + +// Checks that the transport callback is invoked once per frame period of the +// underlying speech encoder, even when comfort noise is produced. +// Also checks that the frame type is kAudioFrameCN or kEmptyFrame. +TEST_F(AudioCodingModuleTestWithComfortNoiseOldApi, + TransportCallbackTestForComfortNoiseRegisterCngLast) { + const int k10MsBlocksPerPacket = 3; + pac_size_ = k10MsBlocksPerPacket * kSampleRateHz / 100; + audio_format_->parameters["ptime"] = "30"; + RegisterCodec(); + const int kCngPayloadType = 105; + RegisterCngCodec(kCngPayloadType); + DoTest(k10MsBlocksPerPacket, kCngPayloadType); +} + +// A multi-threaded test for ACM that uses the PCM16b 16 kHz codec. +class AudioCodingModuleMtTestOldApi : public AudioCodingModuleTestOldApi { + protected: + static const int kNumPackets = 500; + static const int kNumPullCalls = 500; + + AudioCodingModuleMtTestOldApi() + : AudioCodingModuleTestOldApi(), + send_count_(0), + insert_packet_count_(0), + pull_audio_count_(0), + next_insert_packet_time_ms_(0), + fake_clock_(new SimulatedClock(0)) { + clock_ = fake_clock_.get(); + } + + void SetUp() { + AudioCodingModuleTestOldApi::SetUp(); + RegisterCodec(); // Must be called before the threads start below. + StartThreads(); + } + + void StartThreads() { + quit_.store(false); + + const auto attributes = + rtc::ThreadAttributes().SetPriority(rtc::ThreadPriority::kRealtime); + send_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { + while (!quit_.load()) { + CbSendImpl(); + } + }, + "send", attributes); + insert_packet_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { + while (!quit_.load()) { + CbInsertPacketImpl(); + } + }, + "insert_packet", attributes); + pull_audio_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { + while (!quit_.load()) { + CbPullAudioImpl(); + } + }, + "pull_audio", attributes); + } + + void TearDown() { + AudioCodingModuleTestOldApi::TearDown(); + quit_.store(true); + pull_audio_thread_.Finalize(); + send_thread_.Finalize(); + insert_packet_thread_.Finalize(); + } + + bool RunTest() { return test_complete_.Wait(TimeDelta::Minutes(10)); } + + virtual bool TestDone() { + if (packet_cb_.num_calls() > kNumPackets) { + MutexLock lock(&mutex_); + if (pull_audio_count_ > kNumPullCalls) { + // Both conditions for completion are met. End the test. + return true; + } + } + return false; + } + + // The send thread doesn't have to care about the current simulated time, + // since only the AcmReceiver is using the clock. + void CbSendImpl() { + SleepMs(1); + if (HasFatalFailure()) { + // End the test early if a fatal failure (ASSERT_*) has occurred. + test_complete_.Set(); + } + ++send_count_; + InsertAudioAndVerifyEncoding(); + if (TestDone()) { + test_complete_.Set(); + } + } + + void CbInsertPacketImpl() { + SleepMs(1); + { + MutexLock lock(&mutex_); + if (clock_->TimeInMilliseconds() < next_insert_packet_time_ms_) { + return; + } + next_insert_packet_time_ms_ += 10; + } + // Now we're not holding the crit sect when calling ACM. + ++insert_packet_count_; + InsertPacket(); + } + + void CbPullAudioImpl() { + SleepMs(1); + { + MutexLock lock(&mutex_); + // Don't let the insert thread fall behind. + if (next_insert_packet_time_ms_ < clock_->TimeInMilliseconds()) { + return; + } + ++pull_audio_count_; + } + // Now we're not holding the crit sect when calling ACM. + PullAudio(); + fake_clock_->AdvanceTimeMilliseconds(10); + } + + rtc::PlatformThread send_thread_; + rtc::PlatformThread insert_packet_thread_; + rtc::PlatformThread pull_audio_thread_; + // Used to force worker threads to stop looping. + std::atomic<bool> quit_; + + rtc::Event test_complete_; + int send_count_; + int insert_packet_count_; + int pull_audio_count_ RTC_GUARDED_BY(mutex_); + Mutex mutex_; + int64_t next_insert_packet_time_ms_ RTC_GUARDED_BY(mutex_); + std::unique_ptr<SimulatedClock> fake_clock_; +}; + +#if defined(WEBRTC_IOS) +#define MAYBE_DoTest DISABLED_DoTest +#else +#define MAYBE_DoTest DoTest +#endif +TEST_F(AudioCodingModuleMtTestOldApi, MAYBE_DoTest) { + EXPECT_TRUE(RunTest()); +} + +// Disabling all of these tests on iOS until file support has been added. +// See https://code.google.com/p/webrtc/issues/detail?id=4752 for details. +#if !defined(WEBRTC_IOS) + +// This test verifies bit exactness for the send-side of ACM. The test setup is +// a chain of three different test classes: +// +// test::AcmSendTest -> AcmSenderBitExactness -> test::AcmReceiveTest +// +// The receiver side is driving the test by requesting new packets from +// AcmSenderBitExactness::NextPacket(). This method, in turn, asks for the +// packet from test::AcmSendTest::NextPacket, which inserts audio from the +// input file until one packet is produced. (The input file loops indefinitely.) +// Before passing the packet to the receiver, this test class verifies the +// packet header and updates a payload checksum with the new payload. The +// decoded output from the receiver is also verified with a (separate) checksum. +class AcmSenderBitExactnessOldApi : public ::testing::Test, + public test::PacketSource { + protected: + static const int kTestDurationMs = 1000; + + AcmSenderBitExactnessOldApi() + : frame_size_rtp_timestamps_(0), + packet_count_(0), + payload_type_(0), + last_sequence_number_(0), + last_timestamp_(0), + payload_checksum_(rtc::MessageDigestFactory::Create(rtc::DIGEST_MD5)) {} + + // Sets up the test::AcmSendTest object. Returns true on success, otherwise + // false. + bool SetUpSender(absl::string_view input_file_name, int source_rate) { + // Note that `audio_source_` will loop forever. The test duration is set + // explicitly by `kTestDurationMs`. + audio_source_.reset(new test::InputAudioFile(input_file_name)); + send_test_.reset(new test::AcmSendTestOldApi(audio_source_.get(), + source_rate, kTestDurationMs)); + return send_test_.get() != NULL; + } + + // Registers a send codec in the test::AcmSendTest object. Returns true on + // success, false on failure. + bool RegisterSendCodec(absl::string_view payload_name, + int sampling_freq_hz, + int channels, + int payload_type, + int frame_size_samples, + int frame_size_rtp_timestamps) { + payload_type_ = payload_type; + frame_size_rtp_timestamps_ = frame_size_rtp_timestamps; + return send_test_->RegisterCodec(payload_name, sampling_freq_hz, channels, + payload_type, frame_size_samples); + } + + void RegisterExternalSendCodec( + std::unique_ptr<AudioEncoder> external_speech_encoder, + int payload_type) { + payload_type_ = payload_type; + frame_size_rtp_timestamps_ = rtc::checked_cast<uint32_t>( + external_speech_encoder->Num10MsFramesInNextPacket() * + external_speech_encoder->RtpTimestampRateHz() / 100); + send_test_->RegisterExternalCodec(std::move(external_speech_encoder)); + } + + // Runs the test. SetUpSender() and RegisterSendCodec() must have been called + // before calling this method. + void Run(absl::string_view audio_checksum_ref, + absl::string_view payload_checksum_ref, + int expected_packets, + test::AcmReceiveTestOldApi::NumOutputChannels expected_channels, + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory = nullptr) { + if (!decoder_factory) { + decoder_factory = CreateBuiltinAudioDecoderFactory(); + } + // Set up the receiver used to decode the packets and verify the decoded + // output. + test::AudioChecksum audio_checksum; + const std::string output_file_name = + webrtc::test::OutputPath() + + ::testing::UnitTest::GetInstance() + ->current_test_info() + ->test_case_name() + + "_" + ::testing::UnitTest::GetInstance()->current_test_info()->name() + + "_output.wav"; + const int kOutputFreqHz = 8000; + test::OutputWavFile output_file(output_file_name, kOutputFreqHz, + expected_channels); + // Have the output audio sent both to file and to the checksum calculator. + test::AudioSinkFork output(&audio_checksum, &output_file); + test::AcmReceiveTestOldApi receive_test(this, &output, kOutputFreqHz, + expected_channels, decoder_factory); + ASSERT_NO_FATAL_FAILURE(receive_test.RegisterDefaultCodecs()); + + // This is where the actual test is executed. + receive_test.Run(); + + // Extract and verify the audio checksum. + std::string checksum_string = audio_checksum.Finish(); + ExpectChecksumEq(audio_checksum_ref, checksum_string); + + // Extract and verify the payload checksum. + rtc::Buffer checksum_result(payload_checksum_->Size()); + payload_checksum_->Finish(checksum_result.data(), checksum_result.size()); + checksum_string = rtc::hex_encode(checksum_result); + ExpectChecksumEq(payload_checksum_ref, checksum_string); + + // Verify number of packets produced. + EXPECT_EQ(expected_packets, packet_count_); + + // Delete the output file. + remove(output_file_name.c_str()); + } + + // Helper: result must be one the "|"-separated checksums. + void ExpectChecksumEq(absl::string_view ref, absl::string_view result) { + if (ref.size() == result.size()) { + // Only one checksum: clearer message. + EXPECT_EQ(ref, result); + } else { + EXPECT_NE(ref.find(result), absl::string_view::npos) + << result << " must be one of these:\n" + << ref; + } + } + + // Inherited from test::PacketSource. + std::unique_ptr<test::Packet> NextPacket() override { + auto packet = send_test_->NextPacket(); + if (!packet) + return NULL; + + VerifyPacket(packet.get()); + // TODO(henrik.lundin) Save the packet to file as well. + + // Pass it on to the caller. The caller becomes the owner of `packet`. + return packet; + } + + // Verifies the packet. + void VerifyPacket(const test::Packet* packet) { + EXPECT_TRUE(packet->valid_header()); + // (We can check the header fields even if valid_header() is false.) + EXPECT_EQ(payload_type_, packet->header().payloadType); + if (packet_count_ > 0) { + // This is not the first packet. + uint16_t sequence_number_diff = + packet->header().sequenceNumber - last_sequence_number_; + EXPECT_EQ(1, sequence_number_diff); + uint32_t timestamp_diff = packet->header().timestamp - last_timestamp_; + EXPECT_EQ(frame_size_rtp_timestamps_, timestamp_diff); + } + ++packet_count_; + last_sequence_number_ = packet->header().sequenceNumber; + last_timestamp_ = packet->header().timestamp; + // Update the checksum. + payload_checksum_->Update(packet->payload(), + packet->payload_length_bytes()); + } + + void SetUpTest(absl::string_view codec_name, + int codec_sample_rate_hz, + int channels, + int payload_type, + int codec_frame_size_samples, + int codec_frame_size_rtp_timestamps) { + ASSERT_TRUE(SetUpSender( + channels == 1 ? kTestFileMono32kHz : kTestFileFakeStereo32kHz, 32000)); + ASSERT_TRUE(RegisterSendCodec(codec_name, codec_sample_rate_hz, channels, + payload_type, codec_frame_size_samples, + codec_frame_size_rtp_timestamps)); + } + + void SetUpTestExternalEncoder( + std::unique_ptr<AudioEncoder> external_speech_encoder, + int payload_type) { + ASSERT_TRUE(send_test_); + RegisterExternalSendCodec(std::move(external_speech_encoder), payload_type); + } + + std::unique_ptr<test::AcmSendTestOldApi> send_test_; + std::unique_ptr<test::InputAudioFile> audio_source_; + uint32_t frame_size_rtp_timestamps_; + int packet_count_; + uint8_t payload_type_; + uint16_t last_sequence_number_; + uint32_t last_timestamp_; + std::unique_ptr<rtc::MessageDigest> payload_checksum_; + const std::string kTestFileMono32kHz = + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + const std::string kTestFileFakeStereo32kHz = + webrtc::test::ResourcePath("audio_coding/testfile_fake_stereo_32kHz", + "pcm"); + const std::string kTestFileQuad48kHz = webrtc::test::ResourcePath( + "audio_coding/speech_4_channels_48k_one_second", + "wav"); +}; + +class AcmSenderBitExactnessNewApi : public AcmSenderBitExactnessOldApi {}; + +TEST_F(AcmSenderBitExactnessOldApi, Pcm16_8000khz_10ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("L16", 8000, 1, 107, 80, 80)); + Run(/*audio_checksum_ref=*/"69118ed438ac76252d023e0463819471", + /*payload_checksum_ref=*/"c1edd36339ce0326cc4550041ad719a0", + /*expected_packets=*/100, + /*expected_channels=*/test::AcmReceiveTestOldApi::kMonoOutput); +} + +TEST_F(AcmSenderBitExactnessOldApi, Pcm16_16000khz_10ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("L16", 16000, 1, 108, 160, 160)); + Run(/*audio_checksum_ref=*/"f95c87bdd33f631bcf80f4b19445bbd2", + /*payload_checksum_ref=*/"ad786526383178b08d80d6eee06e9bad", + /*expected_packets=*/100, + /*expected_channels=*/test::AcmReceiveTestOldApi::kMonoOutput); +} + +TEST_F(AcmSenderBitExactnessOldApi, Pcm16_32000khz_10ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("L16", 32000, 1, 109, 320, 320)); + Run(/*audio_checksum_ref=*/"c50244419c5c3a2f04cc69a022c266a2", + /*payload_checksum_ref=*/"5ef82ea885e922263606c6fdbc49f651", + /*expected_packets=*/100, + /*expected_channels=*/test::AcmReceiveTestOldApi::kMonoOutput); +} + +TEST_F(AcmSenderBitExactnessOldApi, Pcm16_stereo_8000khz_10ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("L16", 8000, 2, 111, 80, 80)); + Run(/*audio_checksum_ref=*/"4fccf4cc96f1e8e8de4b9fadf62ded9e", + /*payload_checksum_ref=*/"62ce5adb0d4965d0a52ec98ae7f98974", + /*expected_packets=*/100, + /*expected_channels=*/test::AcmReceiveTestOldApi::kStereoOutput); +} + +TEST_F(AcmSenderBitExactnessOldApi, Pcm16_stereo_16000khz_10ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("L16", 16000, 2, 112, 160, 160)); + Run(/*audio_checksum_ref=*/"e15e388d9d4af8c02a59fe1552fedee3", + /*payload_checksum_ref=*/"41ca8edac4b8c71cd54fd9f25ec14870", + /*expected_packets=*/100, + /*expected_channels=*/test::AcmReceiveTestOldApi::kStereoOutput); +} + +TEST_F(AcmSenderBitExactnessOldApi, Pcm16_stereo_32000khz_10ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("L16", 32000, 2, 113, 320, 320)); + Run(/*audio_checksum_ref=*/"b240520c0d05003fde7a174ae5957286", + /*payload_checksum_ref=*/"50e58502fb04421bf5b857dda4c96879", + /*expected_packets=*/100, + /*expected_channels=*/test::AcmReceiveTestOldApi::kStereoOutput); +} + +TEST_F(AcmSenderBitExactnessOldApi, Pcmu_20ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("PCMU", 8000, 1, 0, 160, 160)); + Run(/*audio_checksum_ref=*/"c8d1fc677f33c2022ec5f83c7f302280", + /*payload_checksum_ref=*/"8f9b8750bd80fe26b6cbf6659b89f0f9", + /*expected_packets=*/50, + /*expected_channels=*/test::AcmReceiveTestOldApi::kMonoOutput); +} + +TEST_F(AcmSenderBitExactnessOldApi, Pcma_20ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("PCMA", 8000, 1, 8, 160, 160)); + Run(/*audio_checksum_ref=*/"47eb60e855eb12d1b0e6da9c975754a4", + /*payload_checksum_ref=*/"6ad745e55aa48981bfc790d0eeef2dd1", + /*expected_packets=*/50, + /*expected_channels=*/test::AcmReceiveTestOldApi::kMonoOutput); +} + +TEST_F(AcmSenderBitExactnessOldApi, Pcmu_stereo_20ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("PCMU", 8000, 2, 110, 160, 160)); + Run(/*audio_checksum_ref=*/"6ef2f57d4934714787fd0a834e3ea18e", + /*payload_checksum_ref=*/"60b6f25e8d1e74cb679cfe756dd9bca5", + /*expected_packets=*/50, + /*expected_channels=*/test::AcmReceiveTestOldApi::kStereoOutput); +} + +TEST_F(AcmSenderBitExactnessOldApi, Pcma_stereo_20ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("PCMA", 8000, 2, 118, 160, 160)); + Run(/*audio_checksum_ref=*/"a84d75e098d87ab6b260687eb4b612a2", + /*payload_checksum_ref=*/"92b282c83efd20e7eeef52ba40842cf7", + /*expected_packets=*/50, + /*expected_channels=*/test::AcmReceiveTestOldApi::kStereoOutput); +} + +#if defined(WEBRTC_CODEC_ILBC) && defined(WEBRTC_LINUX) && \ + defined(WEBRTC_ARCH_X86_64) +TEST_F(AcmSenderBitExactnessOldApi, Ilbc_30ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("ILBC", 8000, 1, 102, 240, 240)); + Run(/*audio_checksum_ref=*/"b14dba0de36efa5ec88a32c0b320b70f", + /*payload_checksum_ref=*/"cfae2e9f6aba96e145f2bcdd5050ce78", + /*expected_packets=*/33, + /*expected_channels=*/test::AcmReceiveTestOldApi::kMonoOutput); +} +#endif + +#if defined(WEBRTC_LINUX) && defined(WEBRTC_ARCH_X86_64) +TEST_F(AcmSenderBitExactnessOldApi, G722_20ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("G722", 16000, 1, 9, 320, 160)); + Run(/*audio_checksum_ref=*/"f5264affff25cf2cbd2e1e8a5217f9a3", + /*payload_checksum_ref=*/"fc68a87e1380614e658087cb35d5ca10", + /*expected_packets=*/50, + /*expected_channels=*/test::AcmReceiveTestOldApi::kMonoOutput); +} +#endif + +#if defined(WEBRTC_LINUX) && defined(WEBRTC_ARCH_X86_64) +TEST_F(AcmSenderBitExactnessOldApi, G722_stereo_20ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("G722", 16000, 2, 119, 320, 160)); + Run(/*audio_checksum_ref=*/"be0b8528ff9db3a2219f55ddd36faf7f", + /*payload_checksum_ref=*/"66516152eeaa1e650ad94ff85f668dac", + /*expected_packets=*/50, + /*expected_channels=*/test::AcmReceiveTestOldApi::kStereoOutput); +} +#endif + +namespace { +// Checksum depends on libopus being compiled with or without SSE. +const std::string audio_checksum = + "6a76fe2ffba057c06eb63239b3c47abe" + "|0c4f9d33b4a7379a34ee0c0d5718afe6"; +const std::string payload_checksum = + "b43bdf7638b2bc2a5a6f30bdc640b9ed" + "|c30d463e7ed10bdd1da9045f80561f27"; +} // namespace + +#if defined(WEBRTC_LINUX) && defined(WEBRTC_ARCH_X86_64) +TEST_F(AcmSenderBitExactnessOldApi, Opus_stereo_20ms) { + ASSERT_NO_FATAL_FAILURE(SetUpTest("opus", 48000, 2, 120, 960, 960)); + Run(audio_checksum, payload_checksum, /*expected_packets=*/50, + /*expected_channels=*/test::AcmReceiveTestOldApi::kStereoOutput); +} +#endif + +#if defined(WEBRTC_LINUX) && defined(WEBRTC_ARCH_X86_64) +TEST_F(AcmSenderBitExactnessNewApi, OpusFromFormat_stereo_20ms) { + const auto config = AudioEncoderOpus::SdpToConfig( + SdpAudioFormat("opus", 48000, 2, {{"stereo", "1"}})); + ASSERT_TRUE(SetUpSender(kTestFileFakeStereo32kHz, 32000)); + ASSERT_NO_FATAL_FAILURE(SetUpTestExternalEncoder( + AudioEncoderOpus::MakeAudioEncoder(*config, 120), 120)); + Run(audio_checksum, payload_checksum, /*expected_packets=*/50, + /*expected_channels=*/test::AcmReceiveTestOldApi::kStereoOutput); +} +#endif + +// TODO(webrtc:8649): Disabled until the Encoder counterpart of +// https://webrtc-review.googlesource.com/c/src/+/129768 lands. +#if defined(WEBRTC_LINUX) && defined(WEBRTC_ARCH_X86_64) +TEST_F(AcmSenderBitExactnessNewApi, DISABLED_OpusManyChannels) { + constexpr int kNumChannels = 4; + constexpr int kOpusPayloadType = 120; + + // Read a 4 channel file at 48kHz. + ASSERT_TRUE(SetUpSender(kTestFileQuad48kHz, 48000)); + + const auto sdp_format = SdpAudioFormat("multiopus", 48000, kNumChannels, + {{"channel_mapping", "0,1,2,3"}, + {"coupled_streams", "2"}, + {"num_streams", "2"}}); + const auto encoder_config = + AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format); + + ASSERT_TRUE(encoder_config.has_value()); + + ASSERT_NO_FATAL_FAILURE( + SetUpTestExternalEncoder(AudioEncoderMultiChannelOpus::MakeAudioEncoder( + *encoder_config, kOpusPayloadType), + kOpusPayloadType)); + + const auto decoder_config = + AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format); + const auto opus_decoder = + AudioDecoderMultiChannelOpus::MakeAudioDecoder(*decoder_config); + + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory = + rtc::make_ref_counted<test::AudioDecoderProxyFactory>(opus_decoder.get()); + + // Set up an EXTERNAL DECODER to parse 4 channels. + Run("audio checksum check downstream|8051617907766bec5f4e4a4f7c6d5291", + "payload checksum check downstream|b09c52e44b2bdd9a0809e3a5b1623a76", + /*expected_packets=*/50, + /*expected_channels=*/test::AcmReceiveTestOldApi::kQuadOutput, + decoder_factory); +} +#endif + +#if defined(WEBRTC_LINUX) && defined(WEBRTC_ARCH_X86_64) +TEST_F(AcmSenderBitExactnessNewApi, OpusFromFormat_stereo_20ms_voip) { + auto config = AudioEncoderOpus::SdpToConfig( + SdpAudioFormat("opus", 48000, 2, {{"stereo", "1"}})); + // If not set, default will be kAudio in case of stereo. + config->application = AudioEncoderOpusConfig::ApplicationMode::kVoip; + ASSERT_TRUE(SetUpSender(kTestFileFakeStereo32kHz, 32000)); + ASSERT_NO_FATAL_FAILURE(SetUpTestExternalEncoder( + AudioEncoderOpus::MakeAudioEncoder(*config, 120), 120)); + const std::string audio_maybe_sse = + "1010e60ad34cee73c939edaf563d0593" + "|c05b4523d4c3fad2bab96d2a56baa2d0"; + + const std::string payload_maybe_sse = + "ea48d94e43217793af9b7e15ece94e54" + "|bd93c492087093daf662cdd968f6cdda"; + + Run(audio_maybe_sse, payload_maybe_sse, /*expected_packets=*/50, + /*expected_channels=*/test::AcmReceiveTestOldApi::kStereoOutput); +} +#endif + +// This test is for verifying the SetBitRate function. The bitrate is changed at +// the beginning, and the number of generated bytes are checked. +class AcmSetBitRateTest : public ::testing::Test { + protected: + static const int kTestDurationMs = 1000; + + // Sets up the test::AcmSendTest object. Returns true on success, otherwise + // false. + bool SetUpSender() { + const std::string input_file_name = + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + // Note that `audio_source_` will loop forever. The test duration is set + // explicitly by `kTestDurationMs`. + audio_source_.reset(new test::InputAudioFile(input_file_name)); + static const int kSourceRateHz = 32000; + send_test_.reset(new test::AcmSendTestOldApi( + audio_source_.get(), kSourceRateHz, kTestDurationMs)); + return send_test_.get(); + } + + // Registers a send codec in the test::AcmSendTest object. Returns true on + // success, false on failure. + virtual bool RegisterSendCodec(absl::string_view payload_name, + int sampling_freq_hz, + int channels, + int payload_type, + int frame_size_samples, + int frame_size_rtp_timestamps) { + return send_test_->RegisterCodec(payload_name, sampling_freq_hz, channels, + payload_type, frame_size_samples); + } + + void RegisterExternalSendCodec( + std::unique_ptr<AudioEncoder> external_speech_encoder, + int payload_type) { + send_test_->RegisterExternalCodec(std::move(external_speech_encoder)); + } + + void RunInner(int min_expected_total_bits, int max_expected_total_bits) { + int nr_bytes = 0; + while (std::unique_ptr<test::Packet> next_packet = + send_test_->NextPacket()) { + nr_bytes += rtc::checked_cast<int>(next_packet->payload_length_bytes()); + } + EXPECT_LE(min_expected_total_bits, nr_bytes * 8); + EXPECT_GE(max_expected_total_bits, nr_bytes * 8); + } + + void SetUpTest(absl::string_view codec_name, + int codec_sample_rate_hz, + int channels, + int payload_type, + int codec_frame_size_samples, + int codec_frame_size_rtp_timestamps) { + ASSERT_TRUE(SetUpSender()); + ASSERT_TRUE(RegisterSendCodec(codec_name, codec_sample_rate_hz, channels, + payload_type, codec_frame_size_samples, + codec_frame_size_rtp_timestamps)); + } + + std::unique_ptr<test::AcmSendTestOldApi> send_test_; + std::unique_ptr<test::InputAudioFile> audio_source_; +}; + +class AcmSetBitRateNewApi : public AcmSetBitRateTest { + protected: + // Runs the test. SetUpSender() must have been called and a codec must be set + // up before calling this method. + void Run(int min_expected_total_bits, int max_expected_total_bits) { + RunInner(min_expected_total_bits, max_expected_total_bits); + } +}; + +TEST_F(AcmSetBitRateNewApi, OpusFromFormat_48khz_20ms_10kbps) { + const auto config = AudioEncoderOpus::SdpToConfig( + SdpAudioFormat("opus", 48000, 2, {{"maxaveragebitrate", "10000"}})); + ASSERT_TRUE(SetUpSender()); + RegisterExternalSendCodec(AudioEncoderOpus::MakeAudioEncoder(*config, 107), + 107); + RunInner(7000, 12000); +} + +TEST_F(AcmSetBitRateNewApi, OpusFromFormat_48khz_20ms_50kbps) { + const auto config = AudioEncoderOpus::SdpToConfig( + SdpAudioFormat("opus", 48000, 2, {{"maxaveragebitrate", "50000"}})); + ASSERT_TRUE(SetUpSender()); + RegisterExternalSendCodec(AudioEncoderOpus::MakeAudioEncoder(*config, 107), + 107); + RunInner(40000, 60000); +} + +// Verify that it works when the data to send is mono and the encoder is set to +// send surround audio. +TEST_F(AudioCodingModuleTestOldApi, SendingMultiChannelForMonoInput) { + constexpr int kSampleRateHz = 48000; + constexpr int kSamplesPerChannel = kSampleRateHz * 10 / 1000; + + audio_format_ = SdpAudioFormat({"multiopus", + kSampleRateHz, + 6, + {{"minptime", "10"}, + {"useinbandfec", "1"}, + {"channel_mapping", "0,4,1,2,3,5"}, + {"num_streams", "4"}, + {"coupled_streams", "2"}}}); + + RegisterCodec(); + + input_frame_.sample_rate_hz_ = kSampleRateHz; + input_frame_.num_channels_ = 1; + input_frame_.samples_per_channel_ = kSamplesPerChannel; + for (size_t k = 0; k < 10; ++k) { + ASSERT_GE(acm_->Add10MsData(input_frame_), 0); + input_frame_.timestamp_ += kSamplesPerChannel; + } +} + +// Verify that it works when the data to send is stereo and the encoder is set +// to send surround audio. +TEST_F(AudioCodingModuleTestOldApi, SendingMultiChannelForStereoInput) { + constexpr int kSampleRateHz = 48000; + constexpr int kSamplesPerChannel = (kSampleRateHz * 10) / 1000; + + audio_format_ = SdpAudioFormat({"multiopus", + kSampleRateHz, + 6, + {{"minptime", "10"}, + {"useinbandfec", "1"}, + {"channel_mapping", "0,4,1,2,3,5"}, + {"num_streams", "4"}, + {"coupled_streams", "2"}}}); + + RegisterCodec(); + + input_frame_.sample_rate_hz_ = kSampleRateHz; + input_frame_.num_channels_ = 2; + input_frame_.samples_per_channel_ = kSamplesPerChannel; + for (size_t k = 0; k < 10; ++k) { + ASSERT_GE(acm_->Add10MsData(input_frame_), 0); + input_frame_.timestamp_ += kSamplesPerChannel; + } +} + +// Verify that it works when the data to send is mono and the encoder is set to +// send stereo audio. +TEST_F(AudioCodingModuleTestOldApi, SendingStereoForMonoInput) { + constexpr int kSampleRateHz = 48000; + constexpr int kSamplesPerChannel = (kSampleRateHz * 10) / 1000; + + audio_format_ = SdpAudioFormat("L16", kSampleRateHz, 2); + + RegisterCodec(); + + input_frame_.sample_rate_hz_ = kSampleRateHz; + input_frame_.num_channels_ = 1; + input_frame_.samples_per_channel_ = kSamplesPerChannel; + for (size_t k = 0; k < 10; ++k) { + ASSERT_GE(acm_->Add10MsData(input_frame_), 0); + input_frame_.timestamp_ += kSamplesPerChannel; + } +} + +// Verify that it works when the data to send is stereo and the encoder is set +// to send mono audio. +TEST_F(AudioCodingModuleTestOldApi, SendingMonoForStereoInput) { + constexpr int kSampleRateHz = 48000; + constexpr int kSamplesPerChannel = (kSampleRateHz * 10) / 1000; + + audio_format_ = SdpAudioFormat("L16", kSampleRateHz, 1); + + RegisterCodec(); + + input_frame_.sample_rate_hz_ = kSampleRateHz; + input_frame_.num_channels_ = 1; + input_frame_.samples_per_channel_ = kSamplesPerChannel; + for (size_t k = 0; k < 10; ++k) { + ASSERT_GE(acm_->Add10MsData(input_frame_), 0); + input_frame_.timestamp_ += kSamplesPerChannel; + } +} + +// The result on the Android platforms is inconsistent for this test case. +// On android_rel the result is different from android and android arm64 rel. +#if defined(WEBRTC_ANDROID) +#define MAYBE_OpusFromFormat_48khz_20ms_100kbps \ + DISABLED_OpusFromFormat_48khz_20ms_100kbps +#else +#define MAYBE_OpusFromFormat_48khz_20ms_100kbps \ + OpusFromFormat_48khz_20ms_100kbps +#endif +TEST_F(AcmSetBitRateNewApi, MAYBE_OpusFromFormat_48khz_20ms_100kbps) { + const auto config = AudioEncoderOpus::SdpToConfig( + SdpAudioFormat("opus", 48000, 2, {{"maxaveragebitrate", "100000"}})); + ASSERT_TRUE(SetUpSender()); + RegisterExternalSendCodec(AudioEncoderOpus::MakeAudioEncoder(*config, 107), + 107); + RunInner(80000, 120000); +} + +TEST_F(AcmSenderBitExactnessOldApi, External_Pcmu_20ms) { + AudioEncoderPcmU::Config config; + config.frame_size_ms = 20; + config.num_channels = 1; + config.payload_type = 0; + AudioEncoderPcmU encoder(config); + auto mock_encoder = std::make_unique<MockAudioEncoder>(); + // Set expectations on the mock encoder and also delegate the calls to the + // real encoder. + EXPECT_CALL(*mock_encoder, SampleRateHz()) + .Times(AtLeast(1)) + .WillRepeatedly(Invoke(&encoder, &AudioEncoderPcmU::SampleRateHz)); + EXPECT_CALL(*mock_encoder, NumChannels()) + .Times(AtLeast(1)) + .WillRepeatedly(Invoke(&encoder, &AudioEncoderPcmU::NumChannels)); + EXPECT_CALL(*mock_encoder, RtpTimestampRateHz()) + .Times(AtLeast(1)) + .WillRepeatedly(Invoke(&encoder, &AudioEncoderPcmU::RtpTimestampRateHz)); + EXPECT_CALL(*mock_encoder, Num10MsFramesInNextPacket()) + .Times(AtLeast(1)) + .WillRepeatedly( + Invoke(&encoder, &AudioEncoderPcmU::Num10MsFramesInNextPacket)); + EXPECT_CALL(*mock_encoder, GetTargetBitrate()) + .Times(AtLeast(1)) + .WillRepeatedly(Invoke(&encoder, &AudioEncoderPcmU::GetTargetBitrate)); + EXPECT_CALL(*mock_encoder, EncodeImpl(_, _, _)) + .Times(AtLeast(1)) + .WillRepeatedly(Invoke( + &encoder, static_cast<AudioEncoder::EncodedInfo (AudioEncoder::*)( + uint32_t, rtc::ArrayView<const int16_t>, rtc::Buffer*)>( + &AudioEncoderPcmU::Encode))); + ASSERT_TRUE(SetUpSender(kTestFileMono32kHz, 32000)); + ASSERT_NO_FATAL_FAILURE( + SetUpTestExternalEncoder(std::move(mock_encoder), config.payload_type)); + Run("c8d1fc677f33c2022ec5f83c7f302280", "8f9b8750bd80fe26b6cbf6659b89f0f9", + 50, test::AcmReceiveTestOldApi::kMonoOutput); +} + +// This test fixture is implemented to run ACM and change the desired output +// frequency during the call. The input packets are simply PCM16b-wb encoded +// payloads with a constant value of `kSampleValue`. The test fixture itself +// acts as PacketSource in between the receive test class and the constant- +// payload packet source class. The output is both written to file, and analyzed +// in this test fixture. +class AcmSwitchingOutputFrequencyOldApi : public ::testing::Test, + public test::PacketSource, + public test::AudioSink { + protected: + static const size_t kTestNumPackets = 50; + static const int kEncodedSampleRateHz = 16000; + static const size_t kPayloadLenSamples = 30 * kEncodedSampleRateHz / 1000; + static const int kPayloadType = 108; // Default payload type for PCM16b-wb. + + AcmSwitchingOutputFrequencyOldApi() + : first_output_(true), + num_packets_(0), + packet_source_(kPayloadLenSamples, + kSampleValue, + kEncodedSampleRateHz, + kPayloadType), + output_freq_2_(0), + has_toggled_(false) {} + + void Run(int output_freq_1, int output_freq_2, int toggle_period_ms) { + // Set up the receiver used to decode the packets and verify the decoded + // output. + const std::string output_file_name = + webrtc::test::OutputPath() + + ::testing::UnitTest::GetInstance() + ->current_test_info() + ->test_case_name() + + "_" + ::testing::UnitTest::GetInstance()->current_test_info()->name() + + "_output.pcm"; + test::OutputAudioFile output_file(output_file_name); + // Have the output audio sent both to file and to the WriteArray method in + // this class. + test::AudioSinkFork output(this, &output_file); + test::AcmReceiveTestToggleOutputFreqOldApi receive_test( + this, &output, output_freq_1, output_freq_2, toggle_period_ms, + test::AcmReceiveTestOldApi::kMonoOutput); + ASSERT_NO_FATAL_FAILURE(receive_test.RegisterDefaultCodecs()); + output_freq_2_ = output_freq_2; + + // This is where the actual test is executed. + receive_test.Run(); + + // Delete output file. + remove(output_file_name.c_str()); + } + + // Inherited from test::PacketSource. + std::unique_ptr<test::Packet> NextPacket() override { + // Check if it is time to terminate the test. The packet source is of type + // ConstantPcmPacketSource, which is infinite, so we must end the test + // "manually". + if (num_packets_++ > kTestNumPackets) { + EXPECT_TRUE(has_toggled_); + return NULL; // Test ended. + } + + // Get the next packet from the source. + return packet_source_.NextPacket(); + } + + // Inherited from test::AudioSink. + bool WriteArray(const int16_t* audio, size_t num_samples) override { + // Skip checking the first output frame, since it has a number of zeros + // due to how NetEq is initialized. + if (first_output_) { + first_output_ = false; + return true; + } + for (size_t i = 0; i < num_samples; ++i) { + EXPECT_EQ(kSampleValue, audio[i]); + } + if (num_samples == + static_cast<size_t>(output_freq_2_ / 100)) // Size of 10 ms frame. + has_toggled_ = true; + // The return value does not say if the values match the expectation, just + // that the method could process the samples. + return true; + } + + const int16_t kSampleValue = 1000; + bool first_output_; + size_t num_packets_; + test::ConstantPcmPacketSource packet_source_; + int output_freq_2_; + bool has_toggled_; +}; + +TEST_F(AcmSwitchingOutputFrequencyOldApi, TestWithoutToggling) { + Run(16000, 16000, 1000); +} + +TEST_F(AcmSwitchingOutputFrequencyOldApi, Toggle16KhzTo32Khz) { + Run(16000, 32000, 1000); +} + +TEST_F(AcmSwitchingOutputFrequencyOldApi, Toggle32KhzTo16Khz) { + Run(32000, 16000, 1000); +} + +TEST_F(AcmSwitchingOutputFrequencyOldApi, Toggle16KhzTo8Khz) { + Run(16000, 8000, 1000); +} + +TEST_F(AcmSwitchingOutputFrequencyOldApi, Toggle8KhzTo16Khz) { + Run(8000, 16000, 1000); +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/call_statistics.cc b/third_party/libwebrtc/modules/audio_coding/acm2/call_statistics.cc new file mode 100644 index 0000000000..9f3bdadc88 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/call_statistics.cc @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/acm2/call_statistics.h" + +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace acm2 { + +void CallStatistics::DecodedByNetEq(AudioFrame::SpeechType speech_type, + bool muted) { + ++decoding_stat_.calls_to_neteq; + if (muted) { + ++decoding_stat_.decoded_muted_output; + } + switch (speech_type) { + case AudioFrame::kNormalSpeech: { + ++decoding_stat_.decoded_normal; + break; + } + case AudioFrame::kPLC: { + ++decoding_stat_.decoded_neteq_plc; + break; + } + case AudioFrame::kCodecPLC: { + ++decoding_stat_.decoded_codec_plc; + break; + } + case AudioFrame::kCNG: { + ++decoding_stat_.decoded_cng; + break; + } + case AudioFrame::kPLCCNG: { + ++decoding_stat_.decoded_plc_cng; + break; + } + case AudioFrame::kUndefined: { + // If the audio is decoded by NetEq, `kUndefined` is not an option. + RTC_DCHECK_NOTREACHED(); + } + } +} + +void CallStatistics::DecodedBySilenceGenerator() { + ++decoding_stat_.calls_to_silence_generator; +} + +const AudioDecodingCallStats& CallStatistics::GetDecodingStatistics() const { + return decoding_stat_; +} + +} // namespace acm2 + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/call_statistics.h b/third_party/libwebrtc/modules/audio_coding/acm2/call_statistics.h new file mode 100644 index 0000000000..a2db2a29f4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/call_statistics.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_ACM2_CALL_STATISTICS_H_ +#define MODULES_AUDIO_CODING_ACM2_CALL_STATISTICS_H_ + +#include "api/audio/audio_frame.h" +#include "modules/audio_coding/include/audio_coding_module_typedefs.h" + +// +// This class is for book keeping of calls to ACM. It is not useful to log API +// calls which are supposed to be called every 10ms, e.g. PlayoutData10Ms(), +// however, it is useful to know the number of such calls in a given time +// interval. The current implementation covers calls to PlayoutData10Ms() with +// detailed accounting of the decoded speech type. +// +// Thread Safety +// ============= +// Please note that this class in not thread safe. The class must be protected +// if different APIs are called from different threads. +// + +namespace webrtc { + +namespace acm2 { + +class CallStatistics { + public: + CallStatistics() {} + ~CallStatistics() {} + + // Call this method to indicate that NetEq engaged in decoding. `speech_type` + // is the audio-type according to NetEq, and `muted` indicates if the decoded + // frame was produced in muted state. + void DecodedByNetEq(AudioFrame::SpeechType speech_type, bool muted); + + // Call this method to indicate that a decoding call resulted in generating + // silence, i.e. call to NetEq is bypassed and the output audio is zero. + void DecodedBySilenceGenerator(); + + // Get statistics for decoding. The statistics include the number of calls to + // NetEq and silence generator, as well as the type of speech pulled of off + // NetEq, c.f. declaration of AudioDecodingCallStats for detailed description. + const AudioDecodingCallStats& GetDecodingStatistics() const; + + private: + // Reset the decoding statistics. + void ResetDecodingStatistics(); + + AudioDecodingCallStats decoding_stat_; +}; + +} // namespace acm2 + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_ACM2_CALL_STATISTICS_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/call_statistics_unittest.cc b/third_party/libwebrtc/modules/audio_coding/acm2/call_statistics_unittest.cc new file mode 100644 index 0000000000..b96977b8e4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/acm2/call_statistics_unittest.cc @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/acm2/call_statistics.h" + +#include "test/gtest.h" + +namespace webrtc { + +namespace acm2 { + +TEST(CallStatisticsTest, InitializedZero) { + CallStatistics call_stats; + AudioDecodingCallStats stats; + + stats = call_stats.GetDecodingStatistics(); + EXPECT_EQ(0, stats.calls_to_neteq); + EXPECT_EQ(0, stats.calls_to_silence_generator); + EXPECT_EQ(0, stats.decoded_normal); + EXPECT_EQ(0, stats.decoded_cng); + EXPECT_EQ(0, stats.decoded_neteq_plc); + EXPECT_EQ(0, stats.decoded_plc_cng); + EXPECT_EQ(0, stats.decoded_muted_output); +} + +TEST(CallStatisticsTest, AllCalls) { + CallStatistics call_stats; + AudioDecodingCallStats stats; + + call_stats.DecodedBySilenceGenerator(); + call_stats.DecodedByNetEq(AudioFrame::kNormalSpeech, false); + call_stats.DecodedByNetEq(AudioFrame::kPLC, false); + call_stats.DecodedByNetEq(AudioFrame::kCodecPLC, false); + call_stats.DecodedByNetEq(AudioFrame::kPLCCNG, true); // Let this be muted. + call_stats.DecodedByNetEq(AudioFrame::kCNG, false); + + stats = call_stats.GetDecodingStatistics(); + EXPECT_EQ(5, stats.calls_to_neteq); + EXPECT_EQ(1, stats.calls_to_silence_generator); + EXPECT_EQ(1, stats.decoded_normal); + EXPECT_EQ(1, stats.decoded_cng); + EXPECT_EQ(1, stats.decoded_neteq_plc); + EXPECT_EQ(1, stats.decoded_codec_plc); + EXPECT_EQ(1, stats.decoded_plc_cng); + EXPECT_EQ(1, stats.decoded_muted_output); +} + +} // namespace acm2 + +} // namespace webrtc |