diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /third_party/libwebrtc/modules/audio_coding/neteq | |
parent | Initial commit. (diff) | |
download | firefox-43a97878ce14b72f0981164f87f2e35e14151312.tar.xz firefox-43a97878ce14b72f0981164f87f2e35e14151312.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/libwebrtc/modules/audio_coding/neteq')
188 files changed, 32768 insertions, 0 deletions
diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/accelerate.cc b/third_party/libwebrtc/modules/audio_coding/neteq/accelerate.cc new file mode 100644 index 0000000000..f4ef6cdccb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/accelerate.cc @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/accelerate.h" + + +#include "api/array_view.h" +#include "modules/audio_coding/neteq/audio_multi_vector.h" + +namespace webrtc { + +Accelerate::ReturnCodes Accelerate::Process(const int16_t* input, + size_t input_length, + bool fast_accelerate, + AudioMultiVector* output, + size_t* length_change_samples) { + // Input length must be (almost) 30 ms. + static const size_t k15ms = 120; // 15 ms = 120 samples at 8 kHz sample rate. + if (num_channels_ == 0 || + input_length / num_channels_ < (2 * k15ms - 1) * fs_mult_) { + // Length of input data too short to do accelerate. Simply move all data + // from input to output. + output->PushBackInterleaved( + rtc::ArrayView<const int16_t>(input, input_length)); + return kError; + } + return TimeStretch::Process(input, input_length, fast_accelerate, output, + length_change_samples); +} + +void Accelerate::SetParametersForPassiveSpeech(size_t /*len*/, + int16_t* best_correlation, + size_t* /*peak_index*/) const { + // When the signal does not contain any active speech, the correlation does + // not matter. Simply set it to zero. + *best_correlation = 0; +} + +Accelerate::ReturnCodes Accelerate::CheckCriteriaAndStretch( + const int16_t* input, + size_t input_length, + size_t peak_index, + int16_t best_correlation, + bool active_speech, + bool fast_mode, + AudioMultiVector* output) const { + // Check for strong correlation or passive speech. + // Use 8192 (0.5 in Q14) in fast mode. + const int correlation_threshold = fast_mode ? 8192 : kCorrelationThreshold; + if ((best_correlation > correlation_threshold) || !active_speech) { + // Do accelerate operation by overlap add. + + // Pre-calculate common multiplication with `fs_mult_`. + // 120 corresponds to 15 ms. + size_t fs_mult_120 = fs_mult_ * 120; + + if (fast_mode) { + // Fit as many multiples of `peak_index` as possible in fs_mult_120. + // TODO(henrik.lundin) Consider finding multiple correlation peaks and + // pick the one with the longest correlation lag in this case. + peak_index = (fs_mult_120 / peak_index) * peak_index; + } + + RTC_DCHECK_GE(fs_mult_120, peak_index); // Should be handled in Process(). + // Copy first part; 0 to 15 ms. + output->PushBackInterleaved( + rtc::ArrayView<const int16_t>(input, fs_mult_120 * num_channels_)); + // Copy the `peak_index` starting at 15 ms to `temp_vector`. + AudioMultiVector temp_vector(num_channels_); + temp_vector.PushBackInterleaved(rtc::ArrayView<const int16_t>( + &input[fs_mult_120 * num_channels_], peak_index * num_channels_)); + // Cross-fade `temp_vector` onto the end of `output`. + output->CrossFade(temp_vector, peak_index); + // Copy the last unmodified part, 15 ms + pitch period until the end. + output->PushBackInterleaved(rtc::ArrayView<const int16_t>( + &input[(fs_mult_120 + peak_index) * num_channels_], + input_length - (fs_mult_120 + peak_index) * num_channels_)); + + if (active_speech) { + return kSuccess; + } else { + return kSuccessLowEnergy; + } + } else { + // Accelerate not allowed. Simply move all data from decoded to outData. + output->PushBackInterleaved( + rtc::ArrayView<const int16_t>(input, input_length)); + return kNoStretch; + } +} + +Accelerate* AccelerateFactory::Create( + int sample_rate_hz, + size_t num_channels, + const BackgroundNoise& background_noise) const { + return new Accelerate(sample_rate_hz, num_channels, background_noise); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/accelerate.h b/third_party/libwebrtc/modules/audio_coding/neteq/accelerate.h new file mode 100644 index 0000000000..01fe874d54 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/accelerate.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_ACCELERATE_H_ +#define MODULES_AUDIO_CODING_NETEQ_ACCELERATE_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "modules/audio_coding/neteq/time_stretch.h" + +namespace webrtc { + +class AudioMultiVector; +class BackgroundNoise; + +// This class implements the Accelerate operation. Most of the work is done +// in the base class TimeStretch, which is shared with the PreemptiveExpand +// operation. In the Accelerate class, the operations that are specific to +// Accelerate are implemented. +class Accelerate : public TimeStretch { + public: + Accelerate(int sample_rate_hz, + size_t num_channels, + const BackgroundNoise& background_noise) + : TimeStretch(sample_rate_hz, num_channels, background_noise) {} + + Accelerate(const Accelerate&) = delete; + Accelerate& operator=(const Accelerate&) = delete; + + // This method performs the actual Accelerate operation. The samples are + // read from `input`, of length `input_length` elements, and are written to + // `output`. The number of samples removed through time-stretching is + // is provided in the output `length_change_samples`. The method returns + // the outcome of the operation as an enumerator value. If `fast_accelerate` + // is true, the algorithm will relax the requirements on finding strong + // correlations, and may remove multiple pitch periods if possible. + ReturnCodes Process(const int16_t* input, + size_t input_length, + bool fast_accelerate, + AudioMultiVector* output, + size_t* length_change_samples); + + protected: + // Sets the parameters `best_correlation` and `peak_index` to suitable + // values when the signal contains no active speech. + void SetParametersForPassiveSpeech(size_t len, + int16_t* best_correlation, + size_t* peak_index) const override; + + // Checks the criteria for performing the time-stretching operation and, + // if possible, performs the time-stretching. + ReturnCodes CheckCriteriaAndStretch(const int16_t* input, + size_t input_length, + size_t peak_index, + int16_t best_correlation, + bool active_speech, + bool fast_mode, + AudioMultiVector* output) const override; +}; + +struct AccelerateFactory { + AccelerateFactory() {} + virtual ~AccelerateFactory() {} + + virtual Accelerate* Create(int sample_rate_hz, + size_t num_channels, + const BackgroundNoise& background_noise) const; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_ACCELERATE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc new file mode 100644 index 0000000000..bb5c6d167b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc @@ -0,0 +1,678 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdlib.h> + +#include <array> +#include <memory> +#include <string> +#include <vector> + +#include "api/audio_codecs/opus/audio_encoder_opus.h" +#include "modules/audio_coding/codecs/g711/audio_decoder_pcm.h" +#include "modules/audio_coding/codecs/g711/audio_encoder_pcm.h" +#include "modules/audio_coding/codecs/g722/audio_decoder_g722.h" +#include "modules/audio_coding/codecs/g722/audio_encoder_g722.h" +#include "modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.h" +#include "modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.h" +#include "modules/audio_coding/codecs/isac/fix/include/audio_decoder_isacfix.h" +#include "modules/audio_coding/codecs/isac/fix/include/audio_encoder_isacfix.h" +#include "modules/audio_coding/codecs/isac/main/include/audio_decoder_isac.h" +#include "modules/audio_coding/codecs/isac/main/include/audio_encoder_isac.h" +#include "modules/audio_coding/codecs/opus/audio_decoder_opus.h" +#include "modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h" +#include "modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.h" +#include "modules/audio_coding/neteq/tools/resample_input_audio_file.h" +#include "rtc_base/system/arch.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +namespace { + +constexpr int kOverheadBytesPerPacket = 50; + +// The absolute difference between the input and output (the first channel) is +// compared vs `tolerance`. The parameter `delay` is used to correct for codec +// delays. +void CompareInputOutput(const std::vector<int16_t>& input, + const std::vector<int16_t>& output, + size_t num_samples, + size_t channels, + int tolerance, + int delay) { + ASSERT_LE(num_samples, input.size()); + ASSERT_LE(num_samples * channels, output.size()); + for (unsigned int n = 0; n < num_samples - delay; ++n) { + ASSERT_NEAR(input[n], output[channels * n + delay], tolerance) + << "Exit test on first diff; n = " << n; + } +} + +// The absolute difference between the first two channels in `output` is +// compared vs `tolerance`. +void CompareTwoChannels(const std::vector<int16_t>& output, + size_t samples_per_channel, + size_t channels, + int tolerance) { + ASSERT_GE(channels, 2u); + ASSERT_LE(samples_per_channel * channels, output.size()); + for (unsigned int n = 0; n < samples_per_channel; ++n) + ASSERT_NEAR(output[channels * n], output[channels * n + 1], tolerance) + << "Stereo samples differ."; +} + +// Calculates mean-squared error between input and output (the first channel). +// The parameter `delay` is used to correct for codec delays. +double MseInputOutput(const std::vector<int16_t>& input, + const std::vector<int16_t>& output, + size_t num_samples, + size_t channels, + int delay) { + RTC_DCHECK_LT(delay, static_cast<int>(num_samples)); + RTC_DCHECK_LE(num_samples, input.size()); + RTC_DCHECK_LE(num_samples * channels, output.size()); + if (num_samples == 0) + return 0.0; + double squared_sum = 0.0; + for (unsigned int n = 0; n < num_samples - delay; ++n) { + squared_sum += (input[n] - output[channels * n + delay]) * + (input[n] - output[channels * n + delay]); + } + return squared_sum / (num_samples - delay); +} +} // namespace + +class AudioDecoderTest : public ::testing::Test { + protected: + AudioDecoderTest() + : input_audio_( + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"), + 32000), + codec_input_rate_hz_(32000), // Legacy default value. + frame_size_(0), + data_length_(0), + channels_(1), + payload_type_(17), + decoder_(NULL) {} + + ~AudioDecoderTest() override {} + + void SetUp() override { + if (audio_encoder_) + codec_input_rate_hz_ = audio_encoder_->SampleRateHz(); + // Create arrays. + ASSERT_GT(data_length_, 0u) << "The test must set data_length_ > 0"; + } + + void TearDown() override { + delete decoder_; + decoder_ = NULL; + } + + virtual void InitEncoder() {} + + // TODO(henrik.lundin) Change return type to size_t once most/all overriding + // implementations are gone. + virtual int EncodeFrame(const int16_t* input, + size_t input_len_samples, + rtc::Buffer* output) { + AudioEncoder::EncodedInfo encoded_info; + const size_t samples_per_10ms = audio_encoder_->SampleRateHz() / 100; + RTC_CHECK_EQ(samples_per_10ms * audio_encoder_->Num10MsFramesInNextPacket(), + input_len_samples); + std::unique_ptr<int16_t[]> interleaved_input( + new int16_t[channels_ * samples_per_10ms]); + for (size_t i = 0; i < audio_encoder_->Num10MsFramesInNextPacket(); ++i) { + EXPECT_EQ(0u, encoded_info.encoded_bytes); + + // Duplicate the mono input signal to however many channels the test + // wants. + test::InputAudioFile::DuplicateInterleaved(input + i * samples_per_10ms, + samples_per_10ms, channels_, + interleaved_input.get()); + + encoded_info = + audio_encoder_->Encode(0, + rtc::ArrayView<const int16_t>( + interleaved_input.get(), + audio_encoder_->NumChannels() * + audio_encoder_->SampleRateHz() / 100), + output); + } + EXPECT_EQ(payload_type_, encoded_info.payload_type); + return static_cast<int>(encoded_info.encoded_bytes); + } + + // Encodes and decodes audio. The absolute difference between the input and + // output is compared vs `tolerance`, and the mean-squared error is compared + // with `mse`. The encoded stream should contain `expected_bytes`. For stereo + // audio, the absolute difference between the two channels is compared vs + // `channel_diff_tolerance`. + void EncodeDecodeTest(size_t expected_bytes, + int tolerance, + double mse, + int delay = 0, + int channel_diff_tolerance = 0) { + ASSERT_GE(tolerance, 0) << "Test must define a tolerance >= 0"; + ASSERT_GE(channel_diff_tolerance, 0) + << "Test must define a channel_diff_tolerance >= 0"; + size_t processed_samples = 0u; + size_t encoded_bytes = 0u; + InitEncoder(); + std::vector<int16_t> input; + std::vector<int16_t> decoded; + while (processed_samples + frame_size_ <= data_length_) { + // Extend input vector with `frame_size_`. + input.resize(input.size() + frame_size_, 0); + // Read from input file. + ASSERT_GE(input.size() - processed_samples, frame_size_); + ASSERT_TRUE(input_audio_.Read(frame_size_, codec_input_rate_hz_, + &input[processed_samples])); + rtc::Buffer encoded; + size_t enc_len = + EncodeFrame(&input[processed_samples], frame_size_, &encoded); + // Make sure that frame_size_ * channels_ samples are allocated and free. + decoded.resize((processed_samples + frame_size_) * channels_, 0); + + const std::vector<AudioDecoder::ParseResult> parse_result = + decoder_->ParsePayload(std::move(encoded), /*timestamp=*/0); + RTC_CHECK_EQ(parse_result.size(), size_t{1}); + auto decode_result = parse_result[0].frame->Decode( + rtc::ArrayView<int16_t>(&decoded[processed_samples * channels_], + frame_size_ * channels_ * sizeof(int16_t))); + RTC_CHECK(decode_result.has_value()); + EXPECT_EQ(frame_size_ * channels_, decode_result->num_decoded_samples); + encoded_bytes += enc_len; + processed_samples += frame_size_; + } + // For some codecs it doesn't make sense to check expected number of bytes, + // since the number can vary for different platforms. Opus and iSAC are + // such codecs. In this case expected_bytes is set to 0. + if (expected_bytes) { + EXPECT_EQ(expected_bytes, encoded_bytes); + } + CompareInputOutput(input, decoded, processed_samples, channels_, tolerance, + delay); + if (channels_ == 2) + CompareTwoChannels(decoded, processed_samples, channels_, + channel_diff_tolerance); + EXPECT_LE( + MseInputOutput(input, decoded, processed_samples, channels_, delay), + mse); + } + + // Encodes a payload and decodes it twice with decoder re-init before each + // decode. Verifies that the decoded result is the same. + void ReInitTest() { + InitEncoder(); + std::unique_ptr<int16_t[]> input(new int16_t[frame_size_]); + ASSERT_TRUE( + input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get())); + std::array<rtc::Buffer, 2> encoded; + EncodeFrame(input.get(), frame_size_, &encoded[0]); + // Make a copy. + encoded[1].SetData(encoded[0].data(), encoded[0].size()); + + std::array<std::vector<int16_t>, 2> outputs; + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].resize(frame_size_ * channels_); + decoder_->Reset(); + const std::vector<AudioDecoder::ParseResult> parse_result = + decoder_->ParsePayload(std::move(encoded[i]), /*timestamp=*/0); + RTC_CHECK_EQ(parse_result.size(), size_t{1}); + auto decode_result = parse_result[0].frame->Decode(outputs[i]); + RTC_CHECK(decode_result.has_value()); + EXPECT_EQ(frame_size_ * channels_, decode_result->num_decoded_samples); + } + EXPECT_EQ(outputs[0], outputs[1]); + } + + // Call DecodePlc and verify that the correct number of samples is produced. + void DecodePlcTest() { + InitEncoder(); + std::unique_ptr<int16_t[]> input(new int16_t[frame_size_]); + ASSERT_TRUE( + input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get())); + rtc::Buffer encoded; + EncodeFrame(input.get(), frame_size_, &encoded); + decoder_->Reset(); + std::vector<int16_t> output(frame_size_ * channels_); + const std::vector<AudioDecoder::ParseResult> parse_result = + decoder_->ParsePayload(std::move(encoded), /*timestamp=*/0); + RTC_CHECK_EQ(parse_result.size(), size_t{1}); + auto decode_result = parse_result[0].frame->Decode(output); + RTC_CHECK(decode_result.has_value()); + EXPECT_EQ(frame_size_ * channels_, decode_result->num_decoded_samples); + // Call DecodePlc and verify that we get one frame of data. + // (Overwrite the output from the above Decode call, but that does not + // matter.) + size_t dec_len = + decoder_->DecodePlc(/*num_frames=*/1, /*decoded=*/output.data()); + EXPECT_EQ(frame_size_ * channels_, dec_len); + } + + test::ResampleInputAudioFile input_audio_; + int codec_input_rate_hz_; + size_t frame_size_; + size_t data_length_; + size_t channels_; + const int payload_type_; + AudioDecoder* decoder_; + std::unique_ptr<AudioEncoder> audio_encoder_; +}; + +class AudioDecoderPcmUTest : public AudioDecoderTest { + protected: + AudioDecoderPcmUTest() : AudioDecoderTest() { + frame_size_ = 160; + data_length_ = 10 * frame_size_; + decoder_ = new AudioDecoderPcmU(1); + AudioEncoderPcmU::Config config; + config.frame_size_ms = static_cast<int>(frame_size_ / 8); + config.payload_type = payload_type_; + audio_encoder_.reset(new AudioEncoderPcmU(config)); + } +}; + +class AudioDecoderPcmATest : public AudioDecoderTest { + protected: + AudioDecoderPcmATest() : AudioDecoderTest() { + frame_size_ = 160; + data_length_ = 10 * frame_size_; + decoder_ = new AudioDecoderPcmA(1); + AudioEncoderPcmA::Config config; + config.frame_size_ms = static_cast<int>(frame_size_ / 8); + config.payload_type = payload_type_; + audio_encoder_.reset(new AudioEncoderPcmA(config)); + } +}; + +class AudioDecoderPcm16BTest : public AudioDecoderTest { + protected: + AudioDecoderPcm16BTest() : AudioDecoderTest() { + codec_input_rate_hz_ = 16000; + frame_size_ = 20 * codec_input_rate_hz_ / 1000; + data_length_ = 10 * frame_size_; + decoder_ = new AudioDecoderPcm16B(codec_input_rate_hz_, 1); + RTC_DCHECK(decoder_); + AudioEncoderPcm16B::Config config; + config.sample_rate_hz = codec_input_rate_hz_; + config.frame_size_ms = + static_cast<int>(frame_size_ / (config.sample_rate_hz / 1000)); + config.payload_type = payload_type_; + audio_encoder_.reset(new AudioEncoderPcm16B(config)); + } +}; + +class AudioDecoderIlbcTest : public AudioDecoderTest { + protected: + AudioDecoderIlbcTest() : AudioDecoderTest() { + codec_input_rate_hz_ = 8000; + frame_size_ = 240; + data_length_ = 10 * frame_size_; + decoder_ = new AudioDecoderIlbcImpl; + RTC_DCHECK(decoder_); + AudioEncoderIlbcConfig config; + config.frame_size_ms = 30; + audio_encoder_.reset(new AudioEncoderIlbcImpl(config, payload_type_)); + } + + // Overload the default test since iLBC's function WebRtcIlbcfix_NetEqPlc does + // not return any data. It simply resets a few states and returns 0. + void DecodePlcTest() { + InitEncoder(); + std::unique_ptr<int16_t[]> input(new int16_t[frame_size_]); + ASSERT_TRUE( + input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get())); + rtc::Buffer encoded; + size_t enc_len = EncodeFrame(input.get(), frame_size_, &encoded); + AudioDecoder::SpeechType speech_type; + decoder_->Reset(); + std::unique_ptr<int16_t[]> output(new int16_t[frame_size_ * channels_]); + size_t dec_len = decoder_->Decode( + encoded.data(), enc_len, codec_input_rate_hz_, + frame_size_ * channels_ * sizeof(int16_t), output.get(), &speech_type); + EXPECT_EQ(frame_size_, dec_len); + // Simply call DecodePlc and verify that we get 0 as return value. + EXPECT_EQ(0U, decoder_->DecodePlc(1, output.get())); + } +}; + +class AudioDecoderIsacFloatTest : public AudioDecoderTest { + protected: + AudioDecoderIsacFloatTest() : AudioDecoderTest() { + codec_input_rate_hz_ = 16000; + frame_size_ = 480; + data_length_ = 10 * frame_size_; + AudioEncoderIsacFloatImpl::Config config; + config.payload_type = payload_type_; + config.sample_rate_hz = codec_input_rate_hz_; + config.frame_size_ms = + 1000 * static_cast<int>(frame_size_) / codec_input_rate_hz_; + audio_encoder_.reset(new AudioEncoderIsacFloatImpl(config)); + audio_encoder_->OnReceivedOverhead(kOverheadBytesPerPacket); + + AudioDecoderIsacFloatImpl::Config decoder_config; + decoder_config.sample_rate_hz = codec_input_rate_hz_; + decoder_ = new AudioDecoderIsacFloatImpl(decoder_config); + } +}; + +class AudioDecoderIsacSwbTest : public AudioDecoderTest { + protected: + AudioDecoderIsacSwbTest() : AudioDecoderTest() { + codec_input_rate_hz_ = 32000; + frame_size_ = 960; + data_length_ = 10 * frame_size_; + AudioEncoderIsacFloatImpl::Config config; + config.payload_type = payload_type_; + config.sample_rate_hz = codec_input_rate_hz_; + config.frame_size_ms = + 1000 * static_cast<int>(frame_size_) / codec_input_rate_hz_; + audio_encoder_.reset(new AudioEncoderIsacFloatImpl(config)); + audio_encoder_->OnReceivedOverhead(kOverheadBytesPerPacket); + + AudioDecoderIsacFloatImpl::Config decoder_config; + decoder_config.sample_rate_hz = codec_input_rate_hz_; + decoder_ = new AudioDecoderIsacFloatImpl(decoder_config); + } +}; + +class AudioDecoderIsacFixTest : public AudioDecoderTest { + protected: + AudioDecoderIsacFixTest() : AudioDecoderTest() { + codec_input_rate_hz_ = 16000; + frame_size_ = 480; + data_length_ = 10 * frame_size_; + AudioEncoderIsacFixImpl::Config config; + config.payload_type = payload_type_; + config.sample_rate_hz = codec_input_rate_hz_; + config.frame_size_ms = + 1000 * static_cast<int>(frame_size_) / codec_input_rate_hz_; + audio_encoder_.reset(new AudioEncoderIsacFixImpl(config)); + audio_encoder_->OnReceivedOverhead(kOverheadBytesPerPacket); + + AudioDecoderIsacFixImpl::Config decoder_config; + decoder_config.sample_rate_hz = codec_input_rate_hz_; + decoder_ = new AudioDecoderIsacFixImpl(decoder_config); + } +}; + +class AudioDecoderG722Test : public AudioDecoderTest { + protected: + AudioDecoderG722Test() : AudioDecoderTest() { + codec_input_rate_hz_ = 16000; + frame_size_ = 160; + data_length_ = 10 * frame_size_; + decoder_ = new AudioDecoderG722Impl; + RTC_DCHECK(decoder_); + AudioEncoderG722Config config; + config.frame_size_ms = 10; + config.num_channels = 1; + audio_encoder_.reset(new AudioEncoderG722Impl(config, payload_type_)); + } +}; + +class AudioDecoderG722StereoTest : public AudioDecoderTest { + protected: + AudioDecoderG722StereoTest() : AudioDecoderTest() { + channels_ = 2; + codec_input_rate_hz_ = 16000; + frame_size_ = 160; + data_length_ = 10 * frame_size_; + decoder_ = new AudioDecoderG722StereoImpl; + RTC_DCHECK(decoder_); + AudioEncoderG722Config config; + config.frame_size_ms = 10; + config.num_channels = 2; + audio_encoder_.reset(new AudioEncoderG722Impl(config, payload_type_)); + } +}; + +class AudioDecoderOpusTest + : public AudioDecoderTest, + public testing::WithParamInterface<std::tuple<int, int>> { + protected: + AudioDecoderOpusTest() : AudioDecoderTest() { + channels_ = opus_num_channels_; + codec_input_rate_hz_ = opus_sample_rate_hz_; + frame_size_ = rtc::CheckedDivExact(opus_sample_rate_hz_, 100); + data_length_ = 10 * frame_size_; + decoder_ = + new AudioDecoderOpusImpl(opus_num_channels_, opus_sample_rate_hz_); + AudioEncoderOpusConfig config; + config.frame_size_ms = 10; + config.sample_rate_hz = opus_sample_rate_hz_; + config.num_channels = opus_num_channels_; + config.application = opus_num_channels_ == 1 + ? AudioEncoderOpusConfig::ApplicationMode::kVoip + : AudioEncoderOpusConfig::ApplicationMode::kAudio; + audio_encoder_ = AudioEncoderOpus::MakeAudioEncoder(config, payload_type_); + audio_encoder_->OnReceivedOverhead(kOverheadBytesPerPacket); + } + const int opus_sample_rate_hz_{std::get<0>(GetParam())}; + const int opus_num_channels_{std::get<1>(GetParam())}; +}; + +INSTANTIATE_TEST_SUITE_P(Param, + AudioDecoderOpusTest, + testing::Combine(testing::Values(16000, 48000), + testing::Values(1, 2))); + +TEST_F(AudioDecoderPcmUTest, EncodeDecode) { + int tolerance = 251; + double mse = 1734.0; + EncodeDecodeTest(data_length_, tolerance, mse); + ReInitTest(); + EXPECT_FALSE(decoder_->HasDecodePlc()); +} + +namespace { +int SetAndGetTargetBitrate(AudioEncoder* audio_encoder, int rate) { + audio_encoder->OnReceivedUplinkBandwidth(rate, absl::nullopt); + return audio_encoder->GetTargetBitrate(); +} +void TestSetAndGetTargetBitratesWithFixedCodec(AudioEncoder* audio_encoder, + int fixed_rate) { + EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, 32000)); + EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, fixed_rate - 1)); + EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, fixed_rate)); + EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, fixed_rate + 1)); +} +} // namespace + +TEST_F(AudioDecoderPcmUTest, SetTargetBitrate) { + TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 64000); +} + +TEST_F(AudioDecoderPcmATest, EncodeDecode) { + int tolerance = 308; + double mse = 1931.0; + EncodeDecodeTest(data_length_, tolerance, mse); + ReInitTest(); + EXPECT_FALSE(decoder_->HasDecodePlc()); +} + +TEST_F(AudioDecoderPcmATest, SetTargetBitrate) { + TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 64000); +} + +TEST_F(AudioDecoderPcm16BTest, EncodeDecode) { + int tolerance = 0; + double mse = 0.0; + EncodeDecodeTest(2 * data_length_, tolerance, mse); + ReInitTest(); + EXPECT_FALSE(decoder_->HasDecodePlc()); +} + +TEST_F(AudioDecoderPcm16BTest, SetTargetBitrate) { + TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), + codec_input_rate_hz_ * 16); +} + +TEST_F(AudioDecoderIlbcTest, EncodeDecode) { + int tolerance = 6808; + double mse = 2.13e6; + int delay = 80; // Delay from input to output. + EncodeDecodeTest(500, tolerance, mse, delay); + ReInitTest(); + EXPECT_TRUE(decoder_->HasDecodePlc()); + DecodePlcTest(); +} + +TEST_F(AudioDecoderIlbcTest, SetTargetBitrate) { + TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 13333); +} + +TEST_F(AudioDecoderIsacFloatTest, EncodeDecode) { + int tolerance = 3399; + double mse = 434951.0; + int delay = 48; // Delay from input to output. + EncodeDecodeTest(0, tolerance, mse, delay); + ReInitTest(); + EXPECT_FALSE(decoder_->HasDecodePlc()); +} + +TEST_F(AudioDecoderIsacFloatTest, SetTargetBitrate) { + const int overhead_rate = + 8 * kOverheadBytesPerPacket * codec_input_rate_hz_ / frame_size_; + EXPECT_EQ(10000, + SetAndGetTargetBitrate(audio_encoder_.get(), 9999 + overhead_rate)); + EXPECT_EQ(10000, SetAndGetTargetBitrate(audio_encoder_.get(), + 10000 + overhead_rate)); + EXPECT_EQ(23456, SetAndGetTargetBitrate(audio_encoder_.get(), + 23456 + overhead_rate)); + EXPECT_EQ(32000, SetAndGetTargetBitrate(audio_encoder_.get(), + 32000 + overhead_rate)); + EXPECT_EQ(32000, SetAndGetTargetBitrate(audio_encoder_.get(), + 32001 + overhead_rate)); +} + +TEST_F(AudioDecoderIsacSwbTest, EncodeDecode) { + int tolerance = 19757; + double mse = 8.18e6; + int delay = 160; // Delay from input to output. + EncodeDecodeTest(0, tolerance, mse, delay); + ReInitTest(); + EXPECT_FALSE(decoder_->HasDecodePlc()); +} + +TEST_F(AudioDecoderIsacSwbTest, SetTargetBitrate) { + const int overhead_rate = + 8 * kOverheadBytesPerPacket * codec_input_rate_hz_ / frame_size_; + EXPECT_EQ(10000, + SetAndGetTargetBitrate(audio_encoder_.get(), 9999 + overhead_rate)); + EXPECT_EQ(10000, SetAndGetTargetBitrate(audio_encoder_.get(), + 10000 + overhead_rate)); + EXPECT_EQ(23456, SetAndGetTargetBitrate(audio_encoder_.get(), + 23456 + overhead_rate)); + EXPECT_EQ(56000, SetAndGetTargetBitrate(audio_encoder_.get(), + 56000 + overhead_rate)); + EXPECT_EQ(56000, SetAndGetTargetBitrate(audio_encoder_.get(), + 56001 + overhead_rate)); +} + +// Run bit exactness test only for release builds. +#if defined(NDEBUG) +TEST_F(AudioDecoderIsacFixTest, EncodeDecode) { + int tolerance = 11034; + double mse = 3.46e6; + int delay = 54; // Delay from input to output. +#if defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM) + static const int kEncodedBytes = 685; +#elif defined(WEBRTC_MAC) && defined(WEBRTC_ARCH_ARM64) // M1 Mac + static const int kEncodedBytes = 673; +#elif defined(WEBRTC_ARCH_ARM64) + static const int kEncodedBytes = 673; +#elif defined(WEBRTC_WIN) && defined(_MSC_VER) && !defined(__clang__) + static const int kEncodedBytes = 671; +#elif defined(WEBRTC_IOS) && defined(WEBRTC_ARCH_X86_64) + static const int kEncodedBytes = 671; +#else + static const int kEncodedBytes = 671; +#endif + EncodeDecodeTest(kEncodedBytes, tolerance, mse, delay); + ReInitTest(); + EXPECT_FALSE(decoder_->HasDecodePlc()); +} +#endif + +TEST_F(AudioDecoderIsacFixTest, SetTargetBitrate) { + const int overhead_rate = + 8 * kOverheadBytesPerPacket * codec_input_rate_hz_ / frame_size_; + EXPECT_EQ(10000, + SetAndGetTargetBitrate(audio_encoder_.get(), 9999 + overhead_rate)); + EXPECT_EQ(10000, SetAndGetTargetBitrate(audio_encoder_.get(), + 10000 + overhead_rate)); + EXPECT_EQ(23456, SetAndGetTargetBitrate(audio_encoder_.get(), + 23456 + overhead_rate)); + EXPECT_EQ(32000, SetAndGetTargetBitrate(audio_encoder_.get(), + 32000 + overhead_rate)); + EXPECT_EQ(32000, SetAndGetTargetBitrate(audio_encoder_.get(), + 32001 + overhead_rate)); +} + +TEST_F(AudioDecoderG722Test, EncodeDecode) { + int tolerance = 6176; + double mse = 238630.0; + int delay = 22; // Delay from input to output. + EncodeDecodeTest(data_length_ / 2, tolerance, mse, delay); + ReInitTest(); + EXPECT_FALSE(decoder_->HasDecodePlc()); +} + +TEST_F(AudioDecoderG722Test, SetTargetBitrate) { + TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 64000); +} + +TEST_F(AudioDecoderG722StereoTest, EncodeDecode) { + int tolerance = 6176; + int channel_diff_tolerance = 0; + double mse = 238630.0; + int delay = 22; // Delay from input to output. + EncodeDecodeTest(data_length_, tolerance, mse, delay, channel_diff_tolerance); + ReInitTest(); + EXPECT_FALSE(decoder_->HasDecodePlc()); +} + +TEST_F(AudioDecoderG722StereoTest, SetTargetBitrate) { + TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 128000); +} + +// TODO(http://bugs.webrtc.org/12518): Enable the test after Opus has been +// updated. +TEST_P(AudioDecoderOpusTest, DISABLED_EncodeDecode) { + constexpr int tolerance = 6176; + constexpr int channel_diff_tolerance = 6; + constexpr double mse = 238630.0; + constexpr int delay = 22; // Delay from input to output. + EncodeDecodeTest(0, tolerance, mse, delay, channel_diff_tolerance); + ReInitTest(); + EXPECT_FALSE(decoder_->HasDecodePlc()); +} + +TEST_P(AudioDecoderOpusTest, SetTargetBitrate) { + const int overhead_rate = + 8 * kOverheadBytesPerPacket * codec_input_rate_hz_ / frame_size_; + EXPECT_EQ(6000, + SetAndGetTargetBitrate(audio_encoder_.get(), 5999 + overhead_rate)); + EXPECT_EQ(6000, + SetAndGetTargetBitrate(audio_encoder_.get(), 6000 + overhead_rate)); + EXPECT_EQ(32000, SetAndGetTargetBitrate(audio_encoder_.get(), + 32000 + overhead_rate)); + EXPECT_EQ(510000, SetAndGetTargetBitrate(audio_encoder_.get(), + 510000 + overhead_rate)); + EXPECT_EQ(510000, SetAndGetTargetBitrate(audio_encoder_.get(), + 511000 + overhead_rate)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/audio_multi_vector.cc b/third_party/libwebrtc/modules/audio_coding/neteq/audio_multi_vector.cc new file mode 100644 index 0000000000..220d5a17d7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/audio_multi_vector.cc @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/audio_multi_vector.h" + + +#include <algorithm> + +#include "rtc_base/checks.h" + +namespace webrtc { + +AudioMultiVector::AudioMultiVector(size_t N) { + RTC_DCHECK_GT(N, 0); + if (N < 1) + N = 1; + for (size_t n = 0; n < N; ++n) { + channels_.push_back(new AudioVector); + } + num_channels_ = N; +} + +AudioMultiVector::AudioMultiVector(size_t N, size_t initial_size) { + RTC_DCHECK_GT(N, 0); + if (N < 1) + N = 1; + for (size_t n = 0; n < N; ++n) { + channels_.push_back(new AudioVector(initial_size)); + } + num_channels_ = N; +} + +AudioMultiVector::~AudioMultiVector() { + std::vector<AudioVector*>::iterator it = channels_.begin(); + while (it != channels_.end()) { + delete (*it); + ++it; + } +} + +void AudioMultiVector::Clear() { + for (size_t i = 0; i < num_channels_; ++i) { + channels_[i]->Clear(); + } +} + +void AudioMultiVector::Zeros(size_t length) { + for (size_t i = 0; i < num_channels_; ++i) { + channels_[i]->Clear(); + channels_[i]->Extend(length); + } +} + +void AudioMultiVector::CopyTo(AudioMultiVector* copy_to) const { + if (copy_to) { + for (size_t i = 0; i < num_channels_; ++i) { + channels_[i]->CopyTo(&(*copy_to)[i]); + } + } +} + +void AudioMultiVector::PushBackInterleaved( + rtc::ArrayView<const int16_t> append_this) { + RTC_DCHECK_EQ(append_this.size() % num_channels_, 0); + if (num_channels_ == 1) { + // Special case to avoid extra allocation and data shuffling. + channels_[0]->PushBack(append_this.data(), append_this.size()); + return; + } + size_t length_per_channel = append_this.size() / num_channels_; + int16_t* temp_array = new int16_t[length_per_channel]; // Temporary storage. + for (size_t channel = 0; channel < num_channels_; ++channel) { + // Copy elements to `temp_array`. + // Set `source_ptr` to first element of this channel. + const int16_t* source_ptr = &append_this[channel]; + for (size_t i = 0; i < length_per_channel; ++i) { + temp_array[i] = *source_ptr; + source_ptr += num_channels_; // Jump to next element of this channel. + } + channels_[channel]->PushBack(temp_array, length_per_channel); + } + delete[] temp_array; +} + +void AudioMultiVector::PushBack(const AudioMultiVector& append_this) { + RTC_DCHECK_EQ(num_channels_, append_this.num_channels_); + if (num_channels_ == append_this.num_channels_) { + for (size_t i = 0; i < num_channels_; ++i) { + channels_[i]->PushBack(append_this[i]); + } + } +} + +void AudioMultiVector::PushBackFromIndex(const AudioMultiVector& append_this, + size_t index) { + RTC_DCHECK_LT(index, append_this.Size()); + index = std::min(index, append_this.Size() - 1); + size_t length = append_this.Size() - index; + RTC_DCHECK_EQ(num_channels_, append_this.num_channels_); + if (num_channels_ == append_this.num_channels_) { + for (size_t i = 0; i < num_channels_; ++i) { + channels_[i]->PushBack(append_this[i], length, index); + } + } +} + +void AudioMultiVector::PopFront(size_t length) { + for (size_t i = 0; i < num_channels_; ++i) { + channels_[i]->PopFront(length); + } +} + +void AudioMultiVector::PopBack(size_t length) { + for (size_t i = 0; i < num_channels_; ++i) { + channels_[i]->PopBack(length); + } +} + +size_t AudioMultiVector::ReadInterleaved(size_t length, + int16_t* destination) const { + return ReadInterleavedFromIndex(0, length, destination); +} + +size_t AudioMultiVector::ReadInterleavedFromIndex(size_t start_index, + size_t length, + int16_t* destination) const { + RTC_DCHECK(destination); + size_t index = 0; // Number of elements written to `destination` so far. + RTC_DCHECK_LE(start_index, Size()); + start_index = std::min(start_index, Size()); + if (length + start_index > Size()) { + length = Size() - start_index; + } + if (num_channels_ == 1) { + // Special case to avoid the nested for loop below. + (*this)[0].CopyTo(length, start_index, destination); + return length; + } + for (size_t i = 0; i < length; ++i) { + for (size_t channel = 0; channel < num_channels_; ++channel) { + destination[index] = (*this)[channel][i + start_index]; + ++index; + } + } + return index; +} + +size_t AudioMultiVector::ReadInterleavedFromEnd(size_t length, + int16_t* destination) const { + length = std::min(length, Size()); // Cannot read more than Size() elements. + return ReadInterleavedFromIndex(Size() - length, length, destination); +} + +void AudioMultiVector::OverwriteAt(const AudioMultiVector& insert_this, + size_t length, + size_t position) { + RTC_DCHECK_EQ(num_channels_, insert_this.num_channels_); + // Cap `length` at the length of `insert_this`. + RTC_DCHECK_LE(length, insert_this.Size()); + length = std::min(length, insert_this.Size()); + if (num_channels_ == insert_this.num_channels_) { + for (size_t i = 0; i < num_channels_; ++i) { + channels_[i]->OverwriteAt(insert_this[i], length, position); + } + } +} + +void AudioMultiVector::CrossFade(const AudioMultiVector& append_this, + size_t fade_length) { + RTC_DCHECK_EQ(num_channels_, append_this.num_channels_); + if (num_channels_ == append_this.num_channels_) { + for (size_t i = 0; i < num_channels_; ++i) { + channels_[i]->CrossFade(append_this[i], fade_length); + } + } +} + +size_t AudioMultiVector::Channels() const { + return num_channels_; +} + +size_t AudioMultiVector::Size() const { + RTC_DCHECK(channels_[0]); + return channels_[0]->Size(); +} + +void AudioMultiVector::AssertSize(size_t required_size) { + if (Size() < required_size) { + size_t extend_length = required_size - Size(); + for (size_t channel = 0; channel < num_channels_; ++channel) { + channels_[channel]->Extend(extend_length); + } + } +} + +bool AudioMultiVector::Empty() const { + RTC_DCHECK(channels_[0]); + return channels_[0]->Empty(); +} + +void AudioMultiVector::CopyChannel(size_t from_channel, size_t to_channel) { + RTC_DCHECK_LT(from_channel, num_channels_); + RTC_DCHECK_LT(to_channel, num_channels_); + channels_[from_channel]->CopyTo(channels_[to_channel]); +} + +const AudioVector& AudioMultiVector::operator[](size_t index) const { + return *(channels_[index]); +} + +AudioVector& AudioMultiVector::operator[](size_t index) { + return *(channels_[index]); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/audio_multi_vector.h b/third_party/libwebrtc/modules/audio_coding/neteq/audio_multi_vector.h new file mode 100644 index 0000000000..715ec6dfc7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/audio_multi_vector.h @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_AUDIO_MULTI_VECTOR_H_ +#define MODULES_AUDIO_CODING_NETEQ_AUDIO_MULTI_VECTOR_H_ + +#include <stdint.h> +#include <string.h> + +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_coding/neteq/audio_vector.h" + +namespace webrtc { + +class AudioMultiVector { + public: + // Creates an empty AudioMultiVector with `N` audio channels. `N` must be + // larger than 0. + explicit AudioMultiVector(size_t N); + + // Creates an AudioMultiVector with `N` audio channels, each channel having + // an initial size. `N` must be larger than 0. + AudioMultiVector(size_t N, size_t initial_size); + + virtual ~AudioMultiVector(); + + AudioMultiVector(const AudioMultiVector&) = delete; + AudioMultiVector& operator=(const AudioMultiVector&) = delete; + + // Deletes all values and make the vector empty. + virtual void Clear(); + + // Clears the vector and inserts `length` zeros into each channel. + virtual void Zeros(size_t length); + + // Copies all values from this vector to `copy_to`. Any contents in `copy_to` + // are deleted. After the operation is done, `copy_to` will be an exact + // replica of this object. The source and the destination must have the same + // number of channels. + virtual void CopyTo(AudioMultiVector* copy_to) const; + + // Appends the contents of `append_this` to the end of this object. The array + // is assumed to be channel-interleaved. The length must be an even multiple + // of this object's number of channels. The length of this object is increased + // with the length of the array divided by the number of channels. + void PushBackInterleaved(rtc::ArrayView<const int16_t> append_this); + + // Appends the contents of AudioMultiVector `append_this` to this object. The + // length of this object is increased with the length of `append_this`. + virtual void PushBack(const AudioMultiVector& append_this); + + // Appends the contents of AudioMultiVector `append_this` to this object, + // taken from `index` up until the end of `append_this`. The length of this + // object is increased. + virtual void PushBackFromIndex(const AudioMultiVector& append_this, + size_t index); + + // Removes `length` elements from the beginning of this object, from each + // channel. + virtual void PopFront(size_t length); + + // Removes `length` elements from the end of this object, from each + // channel. + virtual void PopBack(size_t length); + + // Reads `length` samples from each channel and writes them interleaved to + // `destination`. The total number of elements written to `destination` is + // returned, i.e., `length` * number of channels. If the AudioMultiVector + // contains less than `length` samples per channel, this is reflected in the + // return value. + virtual size_t ReadInterleaved(size_t length, int16_t* destination) const; + + // Like ReadInterleaved() above, but reads from `start_index` instead of from + // the beginning. + virtual size_t ReadInterleavedFromIndex(size_t start_index, + size_t length, + int16_t* destination) const; + + // Like ReadInterleaved() above, but reads from the end instead of from + // the beginning. + virtual size_t ReadInterleavedFromEnd(size_t length, + int16_t* destination) const; + + // Overwrites each channel in this AudioMultiVector with values taken from + // `insert_this`. The values are taken from the beginning of `insert_this` and + // are inserted starting at `position`. `length` values are written into each + // channel. If `length` and `position` are selected such that the new data + // extends beyond the end of the current AudioVector, the vector is extended + // to accommodate the new data. `length` is limited to the length of + // `insert_this`. + virtual void OverwriteAt(const AudioMultiVector& insert_this, + size_t length, + size_t position); + + // Appends `append_this` to the end of the current vector. Lets the two + // vectors overlap by `fade_length` samples (per channel), and cross-fade + // linearly in this region. + virtual void CrossFade(const AudioMultiVector& append_this, + size_t fade_length); + + // Returns the number of channels. + virtual size_t Channels() const; + + // Returns the number of elements per channel in this AudioMultiVector. + virtual size_t Size() const; + + // Verify that each channel can hold at least `required_size` elements. If + // not, extend accordingly. + virtual void AssertSize(size_t required_size); + + virtual bool Empty() const; + + // Copies the data between two channels in the AudioMultiVector. The method + // does not add any new channel. Thus, `from_channel` and `to_channel` must + // both be valid channel numbers. + virtual void CopyChannel(size_t from_channel, size_t to_channel); + + // Accesses and modifies a channel (i.e., an AudioVector object) of this + // AudioMultiVector. + const AudioVector& operator[](size_t index) const; + AudioVector& operator[](size_t index); + + protected: + std::vector<AudioVector*> channels_; + size_t num_channels_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_AUDIO_MULTI_VECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/audio_multi_vector_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/audio_multi_vector_unittest.cc new file mode 100644 index 0000000000..329377a18e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/audio_multi_vector_unittest.cc @@ -0,0 +1,317 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/audio_multi_vector.h" + +#include <stdlib.h> + +#include <string> +#include <vector> + +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gtest.h" + +namespace webrtc { + +// This is a value-parameterized test. The test cases are instantiated with +// different values for the test parameter, which is used to determine the +// number of channels in the AudioMultiBuffer. Note that it is not possible +// to combine typed testing with value-parameterized testing, and since the +// tests for AudioVector already covers a number of different type parameters, +// this test focuses on testing different number of channels, and keeping the +// value type constant. + +class AudioMultiVectorTest : public ::testing::TestWithParam<size_t> { + protected: + AudioMultiVectorTest() + : num_channels_(GetParam()), // Get the test parameter. + array_interleaved_(num_channels_ * array_length()) {} + + ~AudioMultiVectorTest() = default; + + virtual void SetUp() { + // Populate test arrays. + for (size_t i = 0; i < array_length(); ++i) { + array_[i] = static_cast<int16_t>(i); + } + int16_t* ptr = array_interleaved_.data(); + // Write 100, 101, 102, ... for first channel. + // Write 200, 201, 202, ... for second channel. + // And so on. + for (size_t i = 0; i < array_length(); ++i) { + for (size_t j = 1; j <= num_channels_; ++j) { + *ptr = rtc::checked_cast<int16_t>(j * 100 + i); + ++ptr; + } + } + } + + size_t array_length() const { return sizeof(array_) / sizeof(array_[0]); } + + const size_t num_channels_; + int16_t array_[10]; + std::vector<int16_t> array_interleaved_; +}; + +// Create and destroy AudioMultiVector objects, both empty and with a predefined +// length. +TEST_P(AudioMultiVectorTest, CreateAndDestroy) { + AudioMultiVector vec1(num_channels_); + EXPECT_TRUE(vec1.Empty()); + EXPECT_EQ(num_channels_, vec1.Channels()); + EXPECT_EQ(0u, vec1.Size()); + + size_t initial_size = 17; + AudioMultiVector vec2(num_channels_, initial_size); + EXPECT_FALSE(vec2.Empty()); + EXPECT_EQ(num_channels_, vec2.Channels()); + EXPECT_EQ(initial_size, vec2.Size()); +} + +// Test the subscript operator [] for getting and setting. +TEST_P(AudioMultiVectorTest, SubscriptOperator) { + AudioMultiVector vec(num_channels_, array_length()); + for (size_t channel = 0; channel < num_channels_; ++channel) { + for (size_t i = 0; i < array_length(); ++i) { + vec[channel][i] = static_cast<int16_t>(i); + // Make sure to use the const version. + const AudioVector& audio_vec = vec[channel]; + EXPECT_EQ(static_cast<int16_t>(i), audio_vec[i]); + } + } +} + +// Test the PushBackInterleaved method and the CopyFrom method. The Clear +// method is also invoked. +TEST_P(AudioMultiVectorTest, PushBackInterleavedAndCopy) { + AudioMultiVector vec(num_channels_); + vec.PushBackInterleaved(array_interleaved_); + AudioMultiVector vec_copy(num_channels_); + vec.CopyTo(&vec_copy); // Copy from `vec` to `vec_copy`. + ASSERT_EQ(num_channels_, vec.Channels()); + ASSERT_EQ(array_length(), vec.Size()); + ASSERT_EQ(num_channels_, vec_copy.Channels()); + ASSERT_EQ(array_length(), vec_copy.Size()); + for (size_t channel = 0; channel < vec.Channels(); ++channel) { + for (size_t i = 0; i < array_length(); ++i) { + EXPECT_EQ(static_cast<int16_t>((channel + 1) * 100 + i), vec[channel][i]); + EXPECT_EQ(vec[channel][i], vec_copy[channel][i]); + } + } + + // Clear `vec` and verify that it is empty. + vec.Clear(); + EXPECT_TRUE(vec.Empty()); + + // Now copy the empty vector and verify that the copy becomes empty too. + vec.CopyTo(&vec_copy); + EXPECT_TRUE(vec_copy.Empty()); +} + +// Try to copy to a NULL pointer. Nothing should happen. +TEST_P(AudioMultiVectorTest, CopyToNull) { + AudioMultiVector vec(num_channels_); + AudioMultiVector* vec_copy = NULL; + vec.PushBackInterleaved(array_interleaved_); + vec.CopyTo(vec_copy); +} + +// Test the PushBack method with another AudioMultiVector as input argument. +TEST_P(AudioMultiVectorTest, PushBackVector) { + AudioMultiVector vec1(num_channels_, array_length()); + AudioMultiVector vec2(num_channels_, array_length()); + // Set the first vector to [0, 1, ..., array_length() - 1] + + // 100 * channel_number. + // Set the second vector to [array_length(), array_length() + 1, ..., + // 2 * array_length() - 1] + 100 * channel_number. + for (size_t channel = 0; channel < num_channels_; ++channel) { + for (size_t i = 0; i < array_length(); ++i) { + vec1[channel][i] = static_cast<int16_t>(i + 100 * channel); + vec2[channel][i] = + static_cast<int16_t>(i + 100 * channel + array_length()); + } + } + // Append vec2 to the back of vec1. + vec1.PushBack(vec2); + ASSERT_EQ(2u * array_length(), vec1.Size()); + for (size_t channel = 0; channel < num_channels_; ++channel) { + for (size_t i = 0; i < 2 * array_length(); ++i) { + EXPECT_EQ(static_cast<int16_t>(i + 100 * channel), vec1[channel][i]); + } + } +} + +// Test the PushBackFromIndex method. +TEST_P(AudioMultiVectorTest, PushBackFromIndex) { + AudioMultiVector vec1(num_channels_); + vec1.PushBackInterleaved(array_interleaved_); + AudioMultiVector vec2(num_channels_); + + // Append vec1 to the back of vec2 (which is empty). Read vec1 from the second + // last element. + vec2.PushBackFromIndex(vec1, array_length() - 2); + ASSERT_EQ(2u, vec2.Size()); + for (size_t channel = 0; channel < num_channels_; ++channel) { + for (size_t i = 0; i < 2; ++i) { + EXPECT_EQ(array_interleaved_[channel + + num_channels_ * (array_length() - 2 + i)], + vec2[channel][i]); + } + } +} + +// Starts with pushing some values to the vector, then test the Zeros method. +TEST_P(AudioMultiVectorTest, Zeros) { + AudioMultiVector vec(num_channels_); + vec.PushBackInterleaved(array_interleaved_); + vec.Zeros(2 * array_length()); + ASSERT_EQ(num_channels_, vec.Channels()); + ASSERT_EQ(2u * array_length(), vec.Size()); + for (size_t channel = 0; channel < num_channels_; ++channel) { + for (size_t i = 0; i < 2 * array_length(); ++i) { + EXPECT_EQ(0, vec[channel][i]); + } + } +} + +// Test the ReadInterleaved method +TEST_P(AudioMultiVectorTest, ReadInterleaved) { + AudioMultiVector vec(num_channels_); + vec.PushBackInterleaved(array_interleaved_); + int16_t* output = new int16_t[array_interleaved_.size()]; + // Read 5 samples. + size_t read_samples = 5; + EXPECT_EQ(num_channels_ * read_samples, + vec.ReadInterleaved(read_samples, output)); + EXPECT_EQ(0, memcmp(array_interleaved_.data(), output, + read_samples * sizeof(int16_t))); + + // Read too many samples. Expect to get all samples from the vector. + EXPECT_EQ(array_interleaved_.size(), + vec.ReadInterleaved(array_length() + 1, output)); + EXPECT_EQ(0, memcmp(array_interleaved_.data(), output, + read_samples * sizeof(int16_t))); + + delete[] output; +} + +// Test the PopFront method. +TEST_P(AudioMultiVectorTest, PopFront) { + AudioMultiVector vec(num_channels_); + vec.PushBackInterleaved(array_interleaved_); + vec.PopFront(1); // Remove one element from each channel. + ASSERT_EQ(array_length() - 1u, vec.Size()); + // Let `ptr` point to the second element of the first channel in the + // interleaved array. + int16_t* ptr = &array_interleaved_[num_channels_]; + for (size_t i = 0; i < array_length() - 1; ++i) { + for (size_t channel = 0; channel < num_channels_; ++channel) { + EXPECT_EQ(*ptr, vec[channel][i]); + ++ptr; + } + } + vec.PopFront(array_length()); // Remove more elements than vector size. + EXPECT_EQ(0u, vec.Size()); +} + +// Test the PopBack method. +TEST_P(AudioMultiVectorTest, PopBack) { + AudioMultiVector vec(num_channels_); + vec.PushBackInterleaved(array_interleaved_); + vec.PopBack(1); // Remove one element from each channel. + ASSERT_EQ(array_length() - 1u, vec.Size()); + // Let `ptr` point to the first element of the first channel in the + // interleaved array. + int16_t* ptr = array_interleaved_.data(); + for (size_t i = 0; i < array_length() - 1; ++i) { + for (size_t channel = 0; channel < num_channels_; ++channel) { + EXPECT_EQ(*ptr, vec[channel][i]); + ++ptr; + } + } + vec.PopBack(array_length()); // Remove more elements than vector size. + EXPECT_EQ(0u, vec.Size()); +} + +// Test the AssertSize method. +TEST_P(AudioMultiVectorTest, AssertSize) { + AudioMultiVector vec(num_channels_, array_length()); + EXPECT_EQ(array_length(), vec.Size()); + // Start with asserting with smaller sizes than already allocated. + vec.AssertSize(0); + vec.AssertSize(array_length() - 1); + // Nothing should have changed. + EXPECT_EQ(array_length(), vec.Size()); + // Assert with one element longer than already allocated. + vec.AssertSize(array_length() + 1); + // Expect vector to have grown. + EXPECT_EQ(array_length() + 1, vec.Size()); + // Also check the individual AudioVectors. + for (size_t channel = 0; channel < vec.Channels(); ++channel) { + EXPECT_EQ(array_length() + 1u, vec[channel].Size()); + } +} + +// Test the PushBack method with another AudioMultiVector as input argument. +TEST_P(AudioMultiVectorTest, OverwriteAt) { + AudioMultiVector vec1(num_channels_); + vec1.PushBackInterleaved(array_interleaved_); + AudioMultiVector vec2(num_channels_); + vec2.Zeros(3); // 3 zeros in each channel. + // Overwrite vec2 at position 5. + vec1.OverwriteAt(vec2, 3, 5); + // Verify result. + // Length remains the same. + ASSERT_EQ(array_length(), vec1.Size()); + int16_t* ptr = array_interleaved_.data(); + for (size_t i = 0; i < array_length() - 1; ++i) { + for (size_t channel = 0; channel < num_channels_; ++channel) { + if (i >= 5 && i <= 7) { + // Elements 5, 6, 7 should have been replaced with zeros. + EXPECT_EQ(0, vec1[channel][i]); + } else { + EXPECT_EQ(*ptr, vec1[channel][i]); + } + ++ptr; + } + } +} + +// Test the CopyChannel method, when the test is instantiated with at least two +// channels. +TEST_P(AudioMultiVectorTest, CopyChannel) { + if (num_channels_ < 2) + return; + + AudioMultiVector vec(num_channels_); + vec.PushBackInterleaved(array_interleaved_); + // Create a reference copy. + AudioMultiVector ref(num_channels_); + ref.PushBack(vec); + // Copy from first to last channel. + vec.CopyChannel(0, num_channels_ - 1); + // Verify that the first and last channels are identical; the others should + // be left untouched. + for (size_t i = 0; i < array_length(); ++i) { + // Verify that all but the last channel are untouched. + for (size_t channel = 0; channel < num_channels_ - 1; ++channel) { + EXPECT_EQ(ref[channel][i], vec[channel][i]); + } + // Verify that the last and the first channels are identical. + EXPECT_EQ(vec[0][i], vec[num_channels_ - 1][i]); + } +} + +INSTANTIATE_TEST_SUITE_P(TestNumChannels, + AudioMultiVectorTest, + ::testing::Values(static_cast<size_t>(1), + static_cast<size_t>(2), + static_cast<size_t>(5))); +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/audio_vector.cc b/third_party/libwebrtc/modules/audio_coding/neteq/audio_vector.cc new file mode 100644 index 0000000000..10e8936447 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/audio_vector.cc @@ -0,0 +1,381 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/audio_vector.h" + + +#include <algorithm> +#include <memory> + +#include "rtc_base/checks.h" + +namespace webrtc { + +AudioVector::AudioVector() : AudioVector(kDefaultInitialSize) { + Clear(); +} + +AudioVector::AudioVector(size_t initial_size) + : array_(new int16_t[initial_size + 1]), + capacity_(initial_size + 1), + begin_index_(0), + end_index_(capacity_ - 1) { + memset(array_.get(), 0, capacity_ * sizeof(int16_t)); +} + +AudioVector::~AudioVector() = default; + +void AudioVector::Clear() { + end_index_ = begin_index_ = 0; +} + +void AudioVector::CopyTo(AudioVector* copy_to) const { + RTC_DCHECK(copy_to); + copy_to->Reserve(Size()); + CopyTo(Size(), 0, copy_to->array_.get()); + copy_to->begin_index_ = 0; + copy_to->end_index_ = Size(); +} + +void AudioVector::CopyTo(size_t length, + size_t position, + int16_t* copy_to) const { + if (length == 0) + return; + length = std::min(length, Size() - position); + const size_t copy_index = (begin_index_ + position) % capacity_; + const size_t first_chunk_length = std::min(length, capacity_ - copy_index); + memcpy(copy_to, &array_[copy_index], first_chunk_length * sizeof(int16_t)); + const size_t remaining_length = length - first_chunk_length; + if (remaining_length > 0) { + memcpy(©_to[first_chunk_length], array_.get(), + remaining_length * sizeof(int16_t)); + } +} + +void AudioVector::PushFront(const AudioVector& prepend_this) { + const size_t length = prepend_this.Size(); + if (length == 0) + return; + + // Although the subsequent calling to PushFront does Reserve in it, it is + // always more efficient to do a big Reserve first. + Reserve(Size() + length); + + const size_t first_chunk_length = + std::min(length, prepend_this.capacity_ - prepend_this.begin_index_); + const size_t remaining_length = length - first_chunk_length; + if (remaining_length > 0) + PushFront(prepend_this.array_.get(), remaining_length); + PushFront(&prepend_this.array_[prepend_this.begin_index_], + first_chunk_length); +} + +void AudioVector::PushFront(const int16_t* prepend_this, size_t length) { + if (length == 0) + return; + Reserve(Size() + length); + const size_t first_chunk_length = std::min(length, begin_index_); + memcpy(&array_[begin_index_ - first_chunk_length], + &prepend_this[length - first_chunk_length], + first_chunk_length * sizeof(int16_t)); + const size_t remaining_length = length - first_chunk_length; + if (remaining_length > 0) { + memcpy(&array_[capacity_ - remaining_length], prepend_this, + remaining_length * sizeof(int16_t)); + } + begin_index_ = (begin_index_ + capacity_ - length) % capacity_; +} + +void AudioVector::PushBack(const AudioVector& append_this) { + PushBack(append_this, append_this.Size(), 0); +} + +void AudioVector::PushBack(const AudioVector& append_this, + size_t length, + size_t position) { + RTC_DCHECK_LE(position, append_this.Size()); + RTC_DCHECK_LE(length, append_this.Size() - position); + + if (length == 0) + return; + + // Although the subsequent calling to PushBack does Reserve in it, it is + // always more efficient to do a big Reserve first. + Reserve(Size() + length); + + const size_t start_index = + (append_this.begin_index_ + position) % append_this.capacity_; + const size_t first_chunk_length = + std::min(length, append_this.capacity_ - start_index); + PushBack(&append_this.array_[start_index], first_chunk_length); + + const size_t remaining_length = length - first_chunk_length; + if (remaining_length > 0) + PushBack(append_this.array_.get(), remaining_length); +} + +void AudioVector::PushBack(const int16_t* append_this, size_t length) { + if (length == 0) + return; + Reserve(Size() + length); + const size_t first_chunk_length = std::min(length, capacity_ - end_index_); + memcpy(&array_[end_index_], append_this, + first_chunk_length * sizeof(int16_t)); + const size_t remaining_length = length - first_chunk_length; + if (remaining_length > 0) { + memcpy(array_.get(), &append_this[first_chunk_length], + remaining_length * sizeof(int16_t)); + } + end_index_ = (end_index_ + length) % capacity_; +} + +void AudioVector::PopFront(size_t length) { + if (length == 0) + return; + length = std::min(length, Size()); + begin_index_ = (begin_index_ + length) % capacity_; +} + +void AudioVector::PopBack(size_t length) { + if (length == 0) + return; + // Never remove more than what is in the array. + length = std::min(length, Size()); + end_index_ = (end_index_ + capacity_ - length) % capacity_; +} + +void AudioVector::Extend(size_t extra_length) { + if (extra_length == 0) + return; + InsertZerosByPushBack(extra_length, Size()); +} + +void AudioVector::InsertAt(const int16_t* insert_this, + size_t length, + size_t position) { + if (length == 0) + return; + // Cap the insert position at the current array length. + position = std::min(Size(), position); + + // When inserting to a position closer to the beginning, it is more efficient + // to insert by pushing front than to insert by pushing back, since less data + // will be moved, vice versa. + if (position <= Size() - position) { + InsertByPushFront(insert_this, length, position); + } else { + InsertByPushBack(insert_this, length, position); + } +} + +void AudioVector::InsertZerosAt(size_t length, size_t position) { + if (length == 0) + return; + // Cap the insert position at the current array length. + position = std::min(Size(), position); + + // When inserting to a position closer to the beginning, it is more efficient + // to insert by pushing front than to insert by pushing back, since less data + // will be moved, vice versa. + if (position <= Size() - position) { + InsertZerosByPushFront(length, position); + } else { + InsertZerosByPushBack(length, position); + } +} + +void AudioVector::OverwriteAt(const AudioVector& insert_this, + size_t length, + size_t position) { + RTC_DCHECK_LE(length, insert_this.Size()); + if (length == 0) + return; + + // Cap the insert position at the current array length. + position = std::min(Size(), position); + + // Although the subsequent calling to OverwriteAt does Reserve in it, it is + // always more efficient to do a big Reserve first. + size_t new_size = std::max(Size(), position + length); + Reserve(new_size); + + const size_t first_chunk_length = + std::min(length, insert_this.capacity_ - insert_this.begin_index_); + OverwriteAt(&insert_this.array_[insert_this.begin_index_], first_chunk_length, + position); + const size_t remaining_length = length - first_chunk_length; + if (remaining_length > 0) { + OverwriteAt(insert_this.array_.get(), remaining_length, + position + first_chunk_length); + } +} + +void AudioVector::OverwriteAt(const int16_t* insert_this, + size_t length, + size_t position) { + if (length == 0) + return; + // Cap the insert position at the current array length. + position = std::min(Size(), position); + + size_t new_size = std::max(Size(), position + length); + Reserve(new_size); + + const size_t overwrite_index = (begin_index_ + position) % capacity_; + const size_t first_chunk_length = + std::min(length, capacity_ - overwrite_index); + memcpy(&array_[overwrite_index], insert_this, + first_chunk_length * sizeof(int16_t)); + const size_t remaining_length = length - first_chunk_length; + if (remaining_length > 0) { + memcpy(array_.get(), &insert_this[first_chunk_length], + remaining_length * sizeof(int16_t)); + } + + end_index_ = (begin_index_ + new_size) % capacity_; +} + +void AudioVector::CrossFade(const AudioVector& append_this, + size_t fade_length) { + // Fade length cannot be longer than the current vector or `append_this`. + RTC_DCHECK_LE(fade_length, Size()); + RTC_DCHECK_LE(fade_length, append_this.Size()); + fade_length = std::min(fade_length, Size()); + fade_length = std::min(fade_length, append_this.Size()); + size_t position = Size() - fade_length + begin_index_; + // Cross fade the overlapping regions. + // `alpha` is the mixing factor in Q14. + // TODO(hlundin): Consider skipping +1 in the denominator to produce a + // smoother cross-fade, in particular at the end of the fade. + int alpha_step = 16384 / (static_cast<int>(fade_length) + 1); + int alpha = 16384; + for (size_t i = 0; i < fade_length; ++i) { + alpha -= alpha_step; + array_[(position + i) % capacity_] = + (alpha * array_[(position + i) % capacity_] + + (16384 - alpha) * append_this[i] + 8192) >> + 14; + } + RTC_DCHECK_GE(alpha, 0); // Verify that the slope was correct. + // Append what is left of `append_this`. + size_t samples_to_push_back = append_this.Size() - fade_length; + if (samples_to_push_back > 0) + PushBack(append_this, samples_to_push_back, fade_length); +} + +// Returns the number of elements in this AudioVector. +size_t AudioVector::Size() const { + return (end_index_ + capacity_ - begin_index_) % capacity_; +} + +// Returns true if this AudioVector is empty. +bool AudioVector::Empty() const { + return begin_index_ == end_index_; +} + +void AudioVector::Reserve(size_t n) { + if (capacity_ > n) + return; + const size_t length = Size(); + // Reserve one more sample to remove the ambiguity between empty vector and + // full vector. Therefore `begin_index_` == `end_index_` indicates empty + // vector, and `begin_index_` == (`end_index_` + 1) % capacity indicates + // full vector. + std::unique_ptr<int16_t[]> temp_array(new int16_t[n + 1]); + CopyTo(length, 0, temp_array.get()); + array_.swap(temp_array); + begin_index_ = 0; + end_index_ = length; + capacity_ = n + 1; +} + +void AudioVector::InsertByPushBack(const int16_t* insert_this, + size_t length, + size_t position) { + const size_t move_chunk_length = Size() - position; + std::unique_ptr<int16_t[]> temp_array(nullptr); + if (move_chunk_length > 0) { + // TODO(minyue): see if it is possible to avoid copying to a buffer. + temp_array.reset(new int16_t[move_chunk_length]); + CopyTo(move_chunk_length, position, temp_array.get()); + PopBack(move_chunk_length); + } + + Reserve(Size() + length + move_chunk_length); + PushBack(insert_this, length); + if (move_chunk_length > 0) + PushBack(temp_array.get(), move_chunk_length); +} + +void AudioVector::InsertByPushFront(const int16_t* insert_this, + size_t length, + size_t position) { + std::unique_ptr<int16_t[]> temp_array(nullptr); + if (position > 0) { + // TODO(minyue): see if it is possible to avoid copying to a buffer. + temp_array.reset(new int16_t[position]); + CopyTo(position, 0, temp_array.get()); + PopFront(position); + } + + Reserve(Size() + length + position); + PushFront(insert_this, length); + if (position > 0) + PushFront(temp_array.get(), position); +} + +void AudioVector::InsertZerosByPushBack(size_t length, size_t position) { + const size_t move_chunk_length = Size() - position; + std::unique_ptr<int16_t[]> temp_array(nullptr); + if (move_chunk_length > 0) { + temp_array.reset(new int16_t[move_chunk_length]); + CopyTo(move_chunk_length, position, temp_array.get()); + PopBack(move_chunk_length); + } + + Reserve(Size() + length + move_chunk_length); + + const size_t first_zero_chunk_length = + std::min(length, capacity_ - end_index_); + memset(&array_[end_index_], 0, first_zero_chunk_length * sizeof(int16_t)); + const size_t remaining_zero_length = length - first_zero_chunk_length; + if (remaining_zero_length > 0) + memset(array_.get(), 0, remaining_zero_length * sizeof(int16_t)); + end_index_ = (end_index_ + length) % capacity_; + + if (move_chunk_length > 0) + PushBack(temp_array.get(), move_chunk_length); +} + +void AudioVector::InsertZerosByPushFront(size_t length, size_t position) { + std::unique_ptr<int16_t[]> temp_array(nullptr); + if (position > 0) { + temp_array.reset(new int16_t[position]); + CopyTo(position, 0, temp_array.get()); + PopFront(position); + } + + Reserve(Size() + length + position); + + const size_t first_zero_chunk_length = std::min(length, begin_index_); + memset(&array_[begin_index_ - first_zero_chunk_length], 0, + first_zero_chunk_length * sizeof(int16_t)); + const size_t remaining_zero_length = length - first_zero_chunk_length; + if (remaining_zero_length > 0) + memset(&array_[capacity_ - remaining_zero_length], 0, + remaining_zero_length * sizeof(int16_t)); + begin_index_ = (begin_index_ + capacity_ - length) % capacity_; + + if (position > 0) + PushFront(temp_array.get(), position); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/audio_vector.h b/third_party/libwebrtc/modules/audio_coding/neteq/audio_vector.h new file mode 100644 index 0000000000..d68f3ec6be --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/audio_vector.h @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_AUDIO_VECTOR_H_ +#define MODULES_AUDIO_CODING_NETEQ_AUDIO_VECTOR_H_ + +#include <string.h> + +#include <cstdint> +#include <memory> + +#include "rtc_base/checks.h" + +namespace webrtc { + +class AudioVector { + public: + // Creates an empty AudioVector. + AudioVector(); + + // Creates an AudioVector with an initial size. + explicit AudioVector(size_t initial_size); + + virtual ~AudioVector(); + + AudioVector(const AudioVector&) = delete; + AudioVector& operator=(const AudioVector&) = delete; + + // Deletes all values and make the vector empty. + virtual void Clear(); + + // Copies all values from this vector to `copy_to`. Any contents in `copy_to` + // are deleted before the copy operation. After the operation is done, + // `copy_to` will be an exact replica of this object. + virtual void CopyTo(AudioVector* copy_to) const; + + // Copies `length` values from `position` in this vector to `copy_to`. + virtual void CopyTo(size_t length, size_t position, int16_t* copy_to) const; + + // Prepends the contents of AudioVector `prepend_this` to this object. The + // length of this object is increased with the length of `prepend_this`. + virtual void PushFront(const AudioVector& prepend_this); + + // Same as above, but with an array `prepend_this` with `length` elements as + // source. + virtual void PushFront(const int16_t* prepend_this, size_t length); + + // Same as PushFront but will append to the end of this object. + virtual void PushBack(const AudioVector& append_this); + + // Appends a segment of `append_this` to the end of this object. The segment + // starts from `position` and has `length` samples. + virtual void PushBack(const AudioVector& append_this, + size_t length, + size_t position); + + // Same as PushFront but will append to the end of this object. + virtual void PushBack(const int16_t* append_this, size_t length); + + // Removes `length` elements from the beginning of this object. + virtual void PopFront(size_t length); + + // Removes `length` elements from the end of this object. + virtual void PopBack(size_t length); + + // Extends this object with `extra_length` elements at the end. The new + // elements are initialized to zero. + virtual void Extend(size_t extra_length); + + // Inserts `length` elements taken from the array `insert_this` and insert + // them at `position`. The length of the AudioVector is increased by `length`. + // `position` = 0 means that the new values are prepended to the vector. + // `position` = Size() means that the new values are appended to the vector. + virtual void InsertAt(const int16_t* insert_this, + size_t length, + size_t position); + + // Like InsertAt, but inserts `length` zero elements at `position`. + virtual void InsertZerosAt(size_t length, size_t position); + + // Overwrites `length` elements of this AudioVector starting from `position` + // with first values in `AudioVector`. The definition of `position` + // is the same as for InsertAt(). If `length` and `position` are selected + // such that the new data extends beyond the end of the current AudioVector, + // the vector is extended to accommodate the new data. + virtual void OverwriteAt(const AudioVector& insert_this, + size_t length, + size_t position); + + // Overwrites `length` elements of this AudioVector with values taken from the + // array `insert_this`, starting at `position`. The definition of `position` + // is the same as for InsertAt(). If `length` and `position` are selected + // such that the new data extends beyond the end of the current AudioVector, + // the vector is extended to accommodate the new data. + virtual void OverwriteAt(const int16_t* insert_this, + size_t length, + size_t position); + + // Appends `append_this` to the end of the current vector. Lets the two + // vectors overlap by `fade_length` samples, and cross-fade linearly in this + // region. + virtual void CrossFade(const AudioVector& append_this, size_t fade_length); + + // Returns the number of elements in this AudioVector. + virtual size_t Size() const; + + // Returns true if this AudioVector is empty. + virtual bool Empty() const; + + // Accesses and modifies an element of AudioVector. + inline const int16_t& operator[](size_t index) const { + return array_[WrapIndex(index, begin_index_, capacity_)]; + } + + inline int16_t& operator[](size_t index) { + return array_[WrapIndex(index, begin_index_, capacity_)]; + } + + private: + static const size_t kDefaultInitialSize = 10; + + // This method is used by the [] operators to calculate an index within the + // capacity of the array, but without using the modulo operation (%). + static inline size_t WrapIndex(size_t index, + size_t begin_index, + size_t capacity) { + RTC_DCHECK_LT(index, capacity); + RTC_DCHECK_LT(begin_index, capacity); + size_t ix = begin_index + index; + RTC_DCHECK_GE(ix, index); // Check for overflow. + if (ix >= capacity) { + ix -= capacity; + } + RTC_DCHECK_LT(ix, capacity); + return ix; + } + + void Reserve(size_t n); + + void InsertByPushBack(const int16_t* insert_this, + size_t length, + size_t position); + + void InsertByPushFront(const int16_t* insert_this, + size_t length, + size_t position); + + void InsertZerosByPushBack(size_t length, size_t position); + + void InsertZerosByPushFront(size_t length, size_t position); + + std::unique_ptr<int16_t[]> array_; + + size_t capacity_; // Allocated number of samples in the array. + + // The index of the first sample in `array_`, except when + // |begin_index_ == end_index_|, which indicates an empty buffer. + size_t begin_index_; + + // The index of the sample after the last sample in `array_`. + size_t end_index_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_AUDIO_VECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/audio_vector_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/audio_vector_unittest.cc new file mode 100644 index 0000000000..ae9dd88606 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/audio_vector_unittest.cc @@ -0,0 +1,384 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/audio_vector.h" + +#include <stdlib.h> + +#include <string> + +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gtest.h" + +namespace webrtc { + +class AudioVectorTest : public ::testing::Test { + protected: + virtual void SetUp() { + // Populate test array. + for (size_t i = 0; i < array_length(); ++i) { + array_[i] = rtc::checked_cast<int16_t>(i); + } + } + + size_t array_length() const { return sizeof(array_) / sizeof(array_[0]); } + + int16_t array_[10]; +}; + +// Create and destroy AudioVector objects, both empty and with a predefined +// length. +TEST_F(AudioVectorTest, CreateAndDestroy) { + AudioVector vec1; + EXPECT_TRUE(vec1.Empty()); + EXPECT_EQ(0u, vec1.Size()); + + size_t initial_size = 17; + AudioVector vec2(initial_size); + EXPECT_FALSE(vec2.Empty()); + EXPECT_EQ(initial_size, vec2.Size()); +} + +// Test the subscript operator [] for getting and setting. +TEST_F(AudioVectorTest, SubscriptOperator) { + AudioVector vec(array_length()); + for (size_t i = 0; i < array_length(); ++i) { + vec[i] = static_cast<int16_t>(i); + const int16_t& value = vec[i]; // Make sure to use the const version. + EXPECT_EQ(static_cast<int16_t>(i), value); + } +} + +// Test the PushBack method and the CopyFrom method. The Clear method is also +// invoked. +TEST_F(AudioVectorTest, PushBackAndCopy) { + AudioVector vec; + AudioVector vec_copy; + vec.PushBack(array_, array_length()); + vec.CopyTo(&vec_copy); // Copy from `vec` to `vec_copy`. + ASSERT_EQ(array_length(), vec.Size()); + ASSERT_EQ(array_length(), vec_copy.Size()); + for (size_t i = 0; i < array_length(); ++i) { + EXPECT_EQ(array_[i], vec[i]); + EXPECT_EQ(array_[i], vec_copy[i]); + } + + // Clear `vec` and verify that it is empty. + vec.Clear(); + EXPECT_TRUE(vec.Empty()); + + // Now copy the empty vector and verify that the copy becomes empty too. + vec.CopyTo(&vec_copy); + EXPECT_TRUE(vec_copy.Empty()); +} + +// Test the PushBack method with another AudioVector as input argument. +TEST_F(AudioVectorTest, PushBackVector) { + static const size_t kLength = 10; + AudioVector vec1(kLength); + AudioVector vec2(kLength); + // Set the first vector to [0, 1, ..., kLength - 1]. + // Set the second vector to [kLength, kLength + 1, ..., 2 * kLength - 1]. + for (size_t i = 0; i < kLength; ++i) { + vec1[i] = static_cast<int16_t>(i); + vec2[i] = static_cast<int16_t>(i + kLength); + } + // Append vec2 to the back of vec1. + vec1.PushBack(vec2); + ASSERT_EQ(2 * kLength, vec1.Size()); + for (size_t i = 0; i < 2 * kLength; ++i) { + EXPECT_EQ(static_cast<int16_t>(i), vec1[i]); + } +} + +// Test the PushFront method. +TEST_F(AudioVectorTest, PushFront) { + AudioVector vec; + vec.PushFront(array_, array_length()); + ASSERT_EQ(array_length(), vec.Size()); + for (size_t i = 0; i < array_length(); ++i) { + EXPECT_EQ(array_[i], vec[i]); + } +} + +// Test the PushFront method with another AudioVector as input argument. +TEST_F(AudioVectorTest, PushFrontVector) { + static const size_t kLength = 10; + AudioVector vec1(kLength); + AudioVector vec2(kLength); + // Set the first vector to [0, 1, ..., kLength - 1]. + // Set the second vector to [kLength, kLength + 1, ..., 2 * kLength - 1]. + for (size_t i = 0; i < kLength; ++i) { + vec1[i] = static_cast<int16_t>(i); + vec2[i] = static_cast<int16_t>(i + kLength); + } + // Prepend vec1 to the front of vec2. + vec2.PushFront(vec1); + ASSERT_EQ(2 * kLength, vec2.Size()); + for (size_t i = 0; i < 2 * kLength; ++i) { + EXPECT_EQ(static_cast<int16_t>(i), vec2[i]); + } +} + +// Test the PopFront method. +TEST_F(AudioVectorTest, PopFront) { + AudioVector vec; + vec.PushBack(array_, array_length()); + vec.PopFront(1); // Remove one element. + EXPECT_EQ(array_length() - 1u, vec.Size()); + for (size_t i = 0; i < array_length() - 1; ++i) { + EXPECT_EQ(static_cast<int16_t>(i + 1), vec[i]); + } + vec.PopFront(array_length()); // Remove more elements than vector size. + EXPECT_EQ(0u, vec.Size()); +} + +// Test the PopBack method. +TEST_F(AudioVectorTest, PopBack) { + AudioVector vec; + vec.PushBack(array_, array_length()); + vec.PopBack(1); // Remove one element. + EXPECT_EQ(array_length() - 1u, vec.Size()); + for (size_t i = 0; i < array_length() - 1; ++i) { + EXPECT_EQ(static_cast<int16_t>(i), vec[i]); + } + vec.PopBack(array_length()); // Remove more elements than vector size. + EXPECT_EQ(0u, vec.Size()); +} + +// Test the Extend method. +TEST_F(AudioVectorTest, Extend) { + AudioVector vec; + vec.PushBack(array_, array_length()); + vec.Extend(5); // Extend with 5 elements, which should all be zeros. + ASSERT_EQ(array_length() + 5u, vec.Size()); + // Verify that all are zero. + for (size_t i = array_length(); i < array_length() + 5; ++i) { + EXPECT_EQ(0, vec[i]); + } +} + +// Test the InsertAt method with an insert position in the middle of the vector. +TEST_F(AudioVectorTest, InsertAt) { + AudioVector vec; + vec.PushBack(array_, array_length()); + static const int kNewLength = 5; + int16_t new_array[kNewLength]; + // Set array elements to {100, 101, 102, ... }. + for (int i = 0; i < kNewLength; ++i) { + new_array[i] = 100 + i; + } + int insert_position = 5; + vec.InsertAt(new_array, kNewLength, insert_position); + // Verify that the vector looks as follows: + // {0, 1, ..., `insert_position` - 1, 100, 101, ..., 100 + kNewLength - 1, + // `insert_position`, `insert_position` + 1, ..., kLength - 1}. + size_t pos = 0; + for (int i = 0; i < insert_position; ++i) { + EXPECT_EQ(array_[i], vec[pos]); + ++pos; + } + for (int i = 0; i < kNewLength; ++i) { + EXPECT_EQ(new_array[i], vec[pos]); + ++pos; + } + for (size_t i = insert_position; i < array_length(); ++i) { + EXPECT_EQ(array_[i], vec[pos]); + ++pos; + } +} + +// Test the InsertZerosAt method with an insert position in the middle of the +// vector. Use the InsertAt method as reference. +TEST_F(AudioVectorTest, InsertZerosAt) { + AudioVector vec; + AudioVector vec_ref; + vec.PushBack(array_, array_length()); + vec_ref.PushBack(array_, array_length()); + static const int kNewLength = 5; + int insert_position = 5; + vec.InsertZerosAt(kNewLength, insert_position); + int16_t new_array[kNewLength] = {0}; // All zero elements. + vec_ref.InsertAt(new_array, kNewLength, insert_position); + // Verify that the vectors are identical. + ASSERT_EQ(vec_ref.Size(), vec.Size()); + for (size_t i = 0; i < vec.Size(); ++i) { + EXPECT_EQ(vec_ref[i], vec[i]); + } +} + +// Test the InsertAt method with an insert position at the start of the vector. +TEST_F(AudioVectorTest, InsertAtBeginning) { + AudioVector vec; + vec.PushBack(array_, array_length()); + static const int kNewLength = 5; + int16_t new_array[kNewLength]; + // Set array elements to {100, 101, 102, ... }. + for (int i = 0; i < kNewLength; ++i) { + new_array[i] = 100 + i; + } + int insert_position = 0; + vec.InsertAt(new_array, kNewLength, insert_position); + // Verify that the vector looks as follows: + // {100, 101, ..., 100 + kNewLength - 1, + // 0, 1, ..., kLength - 1}. + size_t pos = 0; + for (int i = 0; i < kNewLength; ++i) { + EXPECT_EQ(new_array[i], vec[pos]); + ++pos; + } + for (size_t i = insert_position; i < array_length(); ++i) { + EXPECT_EQ(array_[i], vec[pos]); + ++pos; + } +} + +// Test the InsertAt method with an insert position at the end of the vector. +TEST_F(AudioVectorTest, InsertAtEnd) { + AudioVector vec; + vec.PushBack(array_, array_length()); + static const int kNewLength = 5; + int16_t new_array[kNewLength]; + // Set array elements to {100, 101, 102, ... }. + for (int i = 0; i < kNewLength; ++i) { + new_array[i] = 100 + i; + } + int insert_position = rtc::checked_cast<int>(array_length()); + vec.InsertAt(new_array, kNewLength, insert_position); + // Verify that the vector looks as follows: + // {0, 1, ..., kLength - 1, 100, 101, ..., 100 + kNewLength - 1 }. + size_t pos = 0; + for (size_t i = 0; i < array_length(); ++i) { + EXPECT_EQ(array_[i], vec[pos]); + ++pos; + } + for (int i = 0; i < kNewLength; ++i) { + EXPECT_EQ(new_array[i], vec[pos]); + ++pos; + } +} + +// Test the InsertAt method with an insert position beyond the end of the +// vector. Verify that a position beyond the end of the vector does not lead to +// an error. The expected outcome is the same as if the vector end was used as +// input position. That is, the input position should be capped at the maximum +// allowed value. +TEST_F(AudioVectorTest, InsertBeyondEnd) { + AudioVector vec; + vec.PushBack(array_, array_length()); + static const int kNewLength = 5; + int16_t new_array[kNewLength]; + // Set array elements to {100, 101, 102, ... }. + for (int i = 0; i < kNewLength; ++i) { + new_array[i] = 100 + i; + } + int insert_position = + rtc::checked_cast<int>(array_length() + 10); // Too large. + vec.InsertAt(new_array, kNewLength, insert_position); + // Verify that the vector looks as follows: + // {0, 1, ..., kLength - 1, 100, 101, ..., 100 + kNewLength - 1 }. + size_t pos = 0; + for (size_t i = 0; i < array_length(); ++i) { + EXPECT_EQ(array_[i], vec[pos]); + ++pos; + } + for (int i = 0; i < kNewLength; ++i) { + EXPECT_EQ(new_array[i], vec[pos]); + ++pos; + } +} + +// Test the OverwriteAt method with a position such that all of the new values +// fit within the old vector. +TEST_F(AudioVectorTest, OverwriteAt) { + AudioVector vec; + vec.PushBack(array_, array_length()); + static const int kNewLength = 5; + int16_t new_array[kNewLength]; + // Set array elements to {100, 101, 102, ... }. + for (int i = 0; i < kNewLength; ++i) { + new_array[i] = 100 + i; + } + size_t insert_position = 2; + vec.OverwriteAt(new_array, kNewLength, insert_position); + // Verify that the vector looks as follows: + // {0, ..., `insert_position` - 1, 100, 101, ..., 100 + kNewLength - 1, + // `insert_position`, `insert_position` + 1, ..., kLength - 1}. + size_t pos = 0; + for (pos = 0; pos < insert_position; ++pos) { + EXPECT_EQ(array_[pos], vec[pos]); + } + for (int i = 0; i < kNewLength; ++i) { + EXPECT_EQ(new_array[i], vec[pos]); + ++pos; + } + for (; pos < array_length(); ++pos) { + EXPECT_EQ(array_[pos], vec[pos]); + } +} + +// Test the OverwriteAt method with a position such that some of the new values +// extend beyond the end of the current vector. This is valid, and the vector is +// expected to expand to accommodate the new values. +TEST_F(AudioVectorTest, OverwriteBeyondEnd) { + AudioVector vec; + vec.PushBack(array_, array_length()); + static const int kNewLength = 5; + int16_t new_array[kNewLength]; + // Set array elements to {100, 101, 102, ... }. + for (int i = 0; i < kNewLength; ++i) { + new_array[i] = 100 + i; + } + int insert_position = rtc::checked_cast<int>(array_length() - 2); + vec.OverwriteAt(new_array, kNewLength, insert_position); + ASSERT_EQ(array_length() - 2u + kNewLength, vec.Size()); + // Verify that the vector looks as follows: + // {0, ..., `insert_position` - 1, 100, 101, ..., 100 + kNewLength - 1, + // `insert_position`, `insert_position` + 1, ..., kLength - 1}. + int pos = 0; + for (pos = 0; pos < insert_position; ++pos) { + EXPECT_EQ(array_[pos], vec[pos]); + } + for (int i = 0; i < kNewLength; ++i) { + EXPECT_EQ(new_array[i], vec[pos]); + ++pos; + } + // Verify that we checked to the end of `vec`. + EXPECT_EQ(vec.Size(), static_cast<size_t>(pos)); +} + +TEST_F(AudioVectorTest, CrossFade) { + static const size_t kLength = 100; + static const size_t kFadeLength = 10; + AudioVector vec1(kLength); + AudioVector vec2(kLength); + // Set all vector elements to 0 in `vec1` and 100 in `vec2`. + for (size_t i = 0; i < kLength; ++i) { + vec1[i] = 0; + vec2[i] = 100; + } + vec1.CrossFade(vec2, kFadeLength); + ASSERT_EQ(2 * kLength - kFadeLength, vec1.Size()); + // First part untouched. + for (size_t i = 0; i < kLength - kFadeLength; ++i) { + EXPECT_EQ(0, vec1[i]); + } + // Check mixing zone. + for (size_t i = 0; i < kFadeLength; ++i) { + EXPECT_NEAR((i + 1) * 100 / (kFadeLength + 1), + vec1[kLength - kFadeLength + i], 1); + } + // Second part untouched. + for (size_t i = kLength; i < vec1.Size(); ++i) { + EXPECT_EQ(100, vec1[i]); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/background_noise.cc b/third_party/libwebrtc/modules/audio_coding/neteq/background_noise.cc new file mode 100644 index 0000000000..2c95d3b390 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/background_noise.cc @@ -0,0 +1,309 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/background_noise.h" + +#include <string.h> // memcpy + +#include <algorithm> // min, max + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "modules/audio_coding/neteq/cross_correlation.h" +#include "modules/audio_coding/neteq/post_decode_vad.h" + +namespace webrtc { +namespace { + +constexpr size_t kMaxSampleRate = 48000; + +} // namespace + +// static +constexpr size_t BackgroundNoise::kMaxLpcOrder; + +BackgroundNoise::BackgroundNoise(size_t num_channels) + : num_channels_(num_channels), + channel_parameters_(new ChannelParameters[num_channels_]) { + Reset(); +} + +BackgroundNoise::~BackgroundNoise() {} + +void BackgroundNoise::Reset() { + initialized_ = false; + for (size_t channel = 0; channel < num_channels_; ++channel) { + channel_parameters_[channel].Reset(); + } +} + +bool BackgroundNoise::Update(const AudioMultiVector& input, + const PostDecodeVad& vad) { + bool filter_params_saved = false; + if (vad.running() && vad.active_speech()) { + // Do not update the background noise parameters if we know that the signal + // is active speech. + return filter_params_saved; + } + + int32_t auto_correlation[kMaxLpcOrder + 1]; + int16_t fiter_output[kMaxLpcOrder + kResidualLength]; + int16_t reflection_coefficients[kMaxLpcOrder]; + int16_t lpc_coefficients[kMaxLpcOrder + 1]; + + for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) { + ChannelParameters& parameters = channel_parameters_[channel_ix]; + int16_t temp_signal_array[kVecLen + kMaxLpcOrder] = {0}; + int16_t* temp_signal = &temp_signal_array[kMaxLpcOrder]; + RTC_DCHECK_GE(input.Size(), kVecLen); + input[channel_ix].CopyTo(kVecLen, input.Size() - kVecLen, temp_signal); + int32_t sample_energy = + CalculateAutoCorrelation(temp_signal, kVecLen, auto_correlation); + + if ((!vad.running() && + sample_energy < parameters.energy_update_threshold) || + (vad.running() && !vad.active_speech())) { + // Generate LPC coefficients. + if (auto_correlation[0] <= 0) { + // Center value in auto-correlation is not positive. Do not update. + return filter_params_saved; + } + + // Regardless of whether the filter is actually updated or not, + // update energy threshold levels, since we have in fact observed + // a low energy signal. + if (sample_energy < parameters.energy_update_threshold) { + // Never go under 1.0 in average sample energy. + parameters.energy_update_threshold = std::max(sample_energy, 1); + parameters.low_energy_update_threshold = 0; + } + + // Only update BGN if filter is stable, i.e., if return value from + // Levinson-Durbin function is 1. + if (WebRtcSpl_LevinsonDurbin(auto_correlation, lpc_coefficients, + reflection_coefficients, + kMaxLpcOrder) != 1) { + return filter_params_saved; + } + + // Generate the CNG gain factor by looking at the energy of the residual. + WebRtcSpl_FilterMAFastQ12(temp_signal + kVecLen - kResidualLength, + fiter_output, lpc_coefficients, + kMaxLpcOrder + 1, kResidualLength); + int32_t residual_energy = WebRtcSpl_DotProductWithScale( + fiter_output, fiter_output, kResidualLength, 0); + + // Check spectral flatness. + // Comparing the residual variance with the input signal variance tells + // if the spectrum is flat or not. + // If 5 * residual_energy >= 16 * sample_energy, the spectrum is flat + // enough. Also ensure that the energy is non-zero. + if ((sample_energy > 0) && + (int64_t{5} * residual_energy >= int64_t{16} * sample_energy)) { + // Spectrum is flat enough; save filter parameters. + // `temp_signal` + `kVecLen` - `kMaxLpcOrder` points at the first of the + // `kMaxLpcOrder` samples in the residual signal, which will form the + // filter state for the next noise generation. + SaveParameters(channel_ix, lpc_coefficients, + temp_signal + kVecLen - kMaxLpcOrder, sample_energy, + residual_energy); + filter_params_saved = true; + } + } else { + // Will only happen if post-decode VAD is disabled and `sample_energy` is + // not low enough. Increase the threshold for update so that it increases + // by a factor 4 in 4 seconds. + IncrementEnergyThreshold(channel_ix, sample_energy); + } + } + return filter_params_saved; +} + +void BackgroundNoise::GenerateBackgroundNoise( + rtc::ArrayView<const int16_t> random_vector, + size_t channel, + int mute_slope, + bool too_many_expands, + size_t num_noise_samples, + int16_t* buffer) { + constexpr size_t kNoiseLpcOrder = kMaxLpcOrder; + int16_t scaled_random_vector[kMaxSampleRate / 8000 * 125]; + RTC_DCHECK_LE(num_noise_samples, (kMaxSampleRate / 8000 * 125)); + RTC_DCHECK_GE(random_vector.size(), num_noise_samples); + int16_t* noise_samples = &buffer[kNoiseLpcOrder]; + if (initialized()) { + // Use background noise parameters. + memcpy(noise_samples - kNoiseLpcOrder, FilterState(channel), + sizeof(int16_t) * kNoiseLpcOrder); + + int dc_offset = 0; + if (ScaleShift(channel) > 1) { + dc_offset = 1 << (ScaleShift(channel) - 1); + } + + // Scale random vector to correct energy level. + WebRtcSpl_AffineTransformVector(scaled_random_vector, random_vector.data(), + Scale(channel), dc_offset, + ScaleShift(channel), num_noise_samples); + + WebRtcSpl_FilterARFastQ12(scaled_random_vector, noise_samples, + Filter(channel), kNoiseLpcOrder + 1, + num_noise_samples); + + SetFilterState( + channel, + {&(noise_samples[num_noise_samples - kNoiseLpcOrder]), kNoiseLpcOrder}); + + // Unmute the background noise. + int16_t bgn_mute_factor = MuteFactor(channel); + if (bgn_mute_factor < 16384) { + WebRtcSpl_AffineTransformVector(noise_samples, noise_samples, + bgn_mute_factor, 8192, 14, + num_noise_samples); + } + // Update mute_factor in BackgroundNoise class. + SetMuteFactor(channel, bgn_mute_factor); + } else { + // BGN parameters have not been initialized; use zero noise. + memset(noise_samples, 0, sizeof(int16_t) * num_noise_samples); + } +} + +int32_t BackgroundNoise::Energy(size_t channel) const { + RTC_DCHECK_LT(channel, num_channels_); + return channel_parameters_[channel].energy; +} + +void BackgroundNoise::SetMuteFactor(size_t channel, int16_t value) { + RTC_DCHECK_LT(channel, num_channels_); + channel_parameters_[channel].mute_factor = value; +} + +int16_t BackgroundNoise::MuteFactor(size_t channel) const { + RTC_DCHECK_LT(channel, num_channels_); + return channel_parameters_[channel].mute_factor; +} + +const int16_t* BackgroundNoise::Filter(size_t channel) const { + RTC_DCHECK_LT(channel, num_channels_); + return channel_parameters_[channel].filter; +} + +const int16_t* BackgroundNoise::FilterState(size_t channel) const { + RTC_DCHECK_LT(channel, num_channels_); + return channel_parameters_[channel].filter_state; +} + +void BackgroundNoise::SetFilterState(size_t channel, + rtc::ArrayView<const int16_t> input) { + RTC_DCHECK_LT(channel, num_channels_); + size_t length = std::min(input.size(), kMaxLpcOrder); + memcpy(channel_parameters_[channel].filter_state, input.data(), + length * sizeof(int16_t)); +} + +int16_t BackgroundNoise::Scale(size_t channel) const { + RTC_DCHECK_LT(channel, num_channels_); + return channel_parameters_[channel].scale; +} +int16_t BackgroundNoise::ScaleShift(size_t channel) const { + RTC_DCHECK_LT(channel, num_channels_); + return channel_parameters_[channel].scale_shift; +} + +int32_t BackgroundNoise::CalculateAutoCorrelation( + const int16_t* signal, + size_t length, + int32_t* auto_correlation) const { + static const int kCorrelationStep = -1; + const int correlation_scale = + CrossCorrelationWithAutoShift(signal, signal, length, kMaxLpcOrder + 1, + kCorrelationStep, auto_correlation); + + // Number of shifts to normalize energy to energy/sample. + int energy_sample_shift = kLogVecLen - correlation_scale; + return auto_correlation[0] >> energy_sample_shift; +} + +void BackgroundNoise::IncrementEnergyThreshold(size_t channel, + int32_t sample_energy) { + // TODO(hlundin): Simplify the below threshold update. What this code + // does is simply "threshold += (increment * threshold) >> 16", but due + // to the limited-width operations, it is not exactly the same. The + // difference should be inaudible, but bit-exactness would not be + // maintained. + RTC_DCHECK_LT(channel, num_channels_); + ChannelParameters& parameters = channel_parameters_[channel]; + int32_t temp_energy = + (kThresholdIncrement * parameters.low_energy_update_threshold) >> 16; + temp_energy += + kThresholdIncrement * (parameters.energy_update_threshold & 0xFF); + temp_energy += + (kThresholdIncrement * ((parameters.energy_update_threshold >> 8) & 0xFF)) + << 8; + parameters.low_energy_update_threshold += temp_energy; + + parameters.energy_update_threshold += + kThresholdIncrement * (parameters.energy_update_threshold >> 16); + parameters.energy_update_threshold += + parameters.low_energy_update_threshold >> 16; + parameters.low_energy_update_threshold = + parameters.low_energy_update_threshold & 0x0FFFF; + + // Update maximum energy. + // Decrease by a factor 1/1024 each time. + parameters.max_energy = parameters.max_energy - (parameters.max_energy >> 10); + if (sample_energy > parameters.max_energy) { + parameters.max_energy = sample_energy; + } + + // Set `energy_update_threshold` to no less than 60 dB lower than + // `max_energy_`. Adding 524288 assures proper rounding. + int32_t energy_update_threshold = (parameters.max_energy + 524288) >> 20; + if (energy_update_threshold > parameters.energy_update_threshold) { + parameters.energy_update_threshold = energy_update_threshold; + } +} + +void BackgroundNoise::SaveParameters(size_t channel, + const int16_t* lpc_coefficients, + const int16_t* filter_state, + int32_t sample_energy, + int32_t residual_energy) { + RTC_DCHECK_LT(channel, num_channels_); + ChannelParameters& parameters = channel_parameters_[channel]; + memcpy(parameters.filter, lpc_coefficients, + (kMaxLpcOrder + 1) * sizeof(int16_t)); + memcpy(parameters.filter_state, filter_state, kMaxLpcOrder * sizeof(int16_t)); + // Save energy level and update energy threshold levels. + // Never get under 1.0 in average sample energy. + parameters.energy = std::max(sample_energy, 1); + parameters.energy_update_threshold = parameters.energy; + parameters.low_energy_update_threshold = 0; + + // Normalize residual_energy to 29 or 30 bits before sqrt. + int16_t norm_shift = WebRtcSpl_NormW32(residual_energy) - 1; + if (norm_shift & 0x1) { + norm_shift -= 1; // Even number of shifts required. + } + residual_energy = WEBRTC_SPL_SHIFT_W32(residual_energy, norm_shift); + + // Calculate scale and shift factor. + parameters.scale = static_cast<int16_t>(WebRtcSpl_SqrtFloor(residual_energy)); + // Add 13 to the `scale_shift_`, since the random numbers table is in + // Q13. + // TODO(hlundin): Move the "13" to where the `scale_shift_` is used? + parameters.scale_shift = + static_cast<int16_t>(13 + ((kLogResidualLength + norm_shift) / 2)); + + initialized_ = true; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/background_noise.h b/third_party/libwebrtc/modules/audio_coding/neteq/background_noise.h new file mode 100644 index 0000000000..8e6d5890a0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/background_noise.h @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_BACKGROUND_NOISE_H_ +#define MODULES_AUDIO_CODING_NETEQ_BACKGROUND_NOISE_H_ + +#include <string.h> // size_t + +#include <memory> + +#include "api/array_view.h" + +namespace webrtc { + +// Forward declarations. +class AudioMultiVector; +class PostDecodeVad; + +// This class handles estimation of background noise parameters. +class BackgroundNoise { + public: + // TODO(hlundin): For 48 kHz support, increase kMaxLpcOrder to 10. + // Will work anyway, but probably sound a little worse. + static constexpr size_t kMaxLpcOrder = 8; // 32000 / 8000 + 4. + + explicit BackgroundNoise(size_t num_channels); + virtual ~BackgroundNoise(); + + BackgroundNoise(const BackgroundNoise&) = delete; + BackgroundNoise& operator=(const BackgroundNoise&) = delete; + + void Reset(); + + // Updates the parameter estimates based on the signal currently in the + // `sync_buffer`, and on the latest decision in `vad` if it is running. + // Returns true if the filter parameters are updated. + bool Update(const AudioMultiVector& sync_buffer, const PostDecodeVad& vad); + + // Generates background noise given a random vector and writes the output to + // `buffer`. + void GenerateBackgroundNoise(rtc::ArrayView<const int16_t> random_vector, + size_t channel, + int mute_slope, + bool too_many_expands, + size_t num_noise_samples, + int16_t* buffer); + + // Returns `energy_` for `channel`. + int32_t Energy(size_t channel) const; + + // Sets the value of `mute_factor_` for `channel` to `value`. + void SetMuteFactor(size_t channel, int16_t value); + + // Returns `mute_factor_` for `channel`. + int16_t MuteFactor(size_t channel) const; + + // Returns a pointer to `filter_` for `channel`. + const int16_t* Filter(size_t channel) const; + + // Returns a pointer to `filter_state_` for `channel`. + const int16_t* FilterState(size_t channel) const; + + // Copies `input` to the filter state. Will not copy more than `kMaxLpcOrder` + // elements. + void SetFilterState(size_t channel, rtc::ArrayView<const int16_t> input); + + // Returns `scale_` for `channel`. + int16_t Scale(size_t channel) const; + + // Returns `scale_shift_` for `channel`. + int16_t ScaleShift(size_t channel) const; + + // Accessors. + bool initialized() const { return initialized_; } + + private: + static const int kThresholdIncrement = 229; // 0.0035 in Q16. + static const size_t kVecLen = 256; + static const int kLogVecLen = 8; // log2(kVecLen). + static const size_t kResidualLength = 64; + static const int16_t kLogResidualLength = 6; // log2(kResidualLength) + + struct ChannelParameters { + // Constructor. + ChannelParameters() { Reset(); } + + void Reset() { + energy = 2500; + max_energy = 0; + energy_update_threshold = 500000; + low_energy_update_threshold = 0; + memset(filter_state, 0, sizeof(filter_state)); + memset(filter, 0, sizeof(filter)); + filter[0] = 4096; + mute_factor = 0; + scale = 20000; + scale_shift = 24; + } + + int32_t energy; + int32_t max_energy; + int32_t energy_update_threshold; + int32_t low_energy_update_threshold; + int16_t filter_state[kMaxLpcOrder]; + int16_t filter[kMaxLpcOrder + 1]; + int16_t mute_factor; + int16_t scale; + int16_t scale_shift; + }; + + int32_t CalculateAutoCorrelation(const int16_t* signal, + size_t length, + int32_t* auto_correlation) const; + + // Increments the energy threshold by a factor 1 + `kThresholdIncrement`. + void IncrementEnergyThreshold(size_t channel, int32_t sample_energy); + + // Updates the filter parameters. + void SaveParameters(size_t channel, + const int16_t* lpc_coefficients, + const int16_t* filter_state, + int32_t sample_energy, + int32_t residual_energy); + + size_t num_channels_; + std::unique_ptr<ChannelParameters[]> channel_parameters_; + bool initialized_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_BACKGROUND_NOISE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/background_noise_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/background_noise_unittest.cc new file mode 100644 index 0000000000..e32492f57e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/background_noise_unittest.cc @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for BackgroundNoise class. + +#include "modules/audio_coding/neteq/background_noise.h" + +#include "test/gtest.h" + +namespace webrtc { + +TEST(BackgroundNoise, CreateAndDestroy) { + size_t channels = 1; + BackgroundNoise bgn(channels); +} + +// TODO(hlundin): Write more tests. + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/buffer_level_filter.cc b/third_party/libwebrtc/modules/audio_coding/neteq/buffer_level_filter.cc new file mode 100644 index 0000000000..2c42d0d13f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/buffer_level_filter.cc @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/buffer_level_filter.h" + +#include <stdint.h> + +#include <algorithm> + +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +BufferLevelFilter::BufferLevelFilter() { + Reset(); +} + +void BufferLevelFilter::Reset() { + filtered_current_level_ = 0; + level_factor_ = 253; +} + +void BufferLevelFilter::Update(size_t buffer_size_samples, + int time_stretched_samples) { + // Filter: + // `filtered_current_level_` = `level_factor_` * `filtered_current_level_` + + // (1 - `level_factor_`) * `buffer_size_samples` + // `level_factor_` and `filtered_current_level_` are in Q8. + // `buffer_size_samples` is in Q0. + const int64_t filtered_current_level = + (level_factor_ * int64_t{filtered_current_level_} >> 8) + + (256 - level_factor_) * rtc::dchecked_cast<int64_t>(buffer_size_samples); + + // Account for time-scale operations (accelerate and pre-emptive expand) and + // make sure that the filtered value remains non-negative. + filtered_current_level_ = rtc::saturated_cast<int>(std::max<int64_t>( + 0, filtered_current_level - int64_t{time_stretched_samples} * (1 << 8))); +} + +void BufferLevelFilter::SetFilteredBufferLevel(int buffer_size_samples) { + filtered_current_level_ = + rtc::saturated_cast<int>(int64_t{buffer_size_samples} * 256); +} + +void BufferLevelFilter::SetTargetBufferLevel(int target_buffer_level_ms) { + if (target_buffer_level_ms <= 20) { + level_factor_ = 251; + } else if (target_buffer_level_ms <= 60) { + level_factor_ = 252; + } else if (target_buffer_level_ms <= 140) { + level_factor_ = 253; + } else { + level_factor_ = 254; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/buffer_level_filter.h b/third_party/libwebrtc/modules/audio_coding/neteq/buffer_level_filter.h new file mode 100644 index 0000000000..ced36da9c2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/buffer_level_filter.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_BUFFER_LEVEL_FILTER_H_ +#define MODULES_AUDIO_CODING_NETEQ_BUFFER_LEVEL_FILTER_H_ + +#include <stddef.h> +#include <stdint.h> + +namespace webrtc { + +class BufferLevelFilter { + public: + BufferLevelFilter(); + virtual ~BufferLevelFilter() {} + + BufferLevelFilter(const BufferLevelFilter&) = delete; + BufferLevelFilter& operator=(const BufferLevelFilter&) = delete; + + virtual void Reset(); + + // Updates the filter. Current buffer size is `buffer_size_samples`. + // `time_stretched_samples` is subtracted from the filtered value (thus + // bypassing the filter operation). + virtual void Update(size_t buffer_size_samples, int time_stretched_samples); + + // Set the filtered buffer level to a particular value directly. This should + // only be used in case of large changes in buffer size, such as buffer + // flushes. + virtual void SetFilteredBufferLevel(int buffer_size_samples); + + // The target level is used to select the appropriate filter coefficient. + virtual void SetTargetBufferLevel(int target_buffer_level_ms); + + // Returns filtered current level in number of samples. + virtual int filtered_current_level() const { + // Round to nearest whole sample. + return (int64_t{filtered_current_level_} + (1 << 7)) >> 8; + } + + private: + int level_factor_; // Filter factor for the buffer level filter in Q8. + int filtered_current_level_; // Filtered current buffer level in Q8. +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_BUFFER_LEVEL_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/buffer_level_filter_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/buffer_level_filter_unittest.cc new file mode 100644 index 0000000000..6773e96f58 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/buffer_level_filter_unittest.cc @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for BufferLevelFilter class. + +#include "modules/audio_coding/neteq/buffer_level_filter.h" + +#include <math.h> // Access to pow function. + +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(BufferLevelFilter, CreateAndDestroy) { + BufferLevelFilter* filter = new BufferLevelFilter(); + EXPECT_EQ(0, filter->filtered_current_level()); + delete filter; +} + +TEST(BufferLevelFilter, ConvergenceTest) { + BufferLevelFilter filter; + for (int times = 10; times <= 50; times += 10) { + for (int value = 100; value <= 200; value += 10) { + filter.Reset(); + filter.SetTargetBufferLevel(20); // Makes filter coefficient 251/256. + rtc::StringBuilder ss; + ss << "times = " << times << ", value = " << value; + SCOPED_TRACE(ss.str()); // Print out the parameter values on failure. + for (int i = 0; i < times; ++i) { + filter.Update(value, 0 /* time_stretched_samples */); + } + // Expect the filtered value to be (theoretically) + // (1 - (251/256) ^ `times`) * `value`. + double expected_value_double = (1 - pow(251.0 / 256.0, times)) * value; + int expected_value = static_cast<int>(expected_value_double); + + // The actual value may differ slightly from the expected value due to + // intermediate-stage rounding errors in the filter implementation. + // This is why we have to use EXPECT_NEAR with a tolerance of +/-1. + EXPECT_NEAR(expected_value, filter.filtered_current_level(), 1); + } + } +} + +// Verify that target buffer level impacts on the filter convergence. +TEST(BufferLevelFilter, FilterFactor) { + BufferLevelFilter filter; + // Update 10 times with value 100. + const int kTimes = 10; + const int kValue = 100; + + filter.SetTargetBufferLevel(60); // Makes filter coefficient 252/256. + for (int i = 0; i < kTimes; ++i) { + filter.Update(kValue, 0 /* time_stretched_samples */); + } + // Expect the filtered value to be + // (1 - (252/256) ^ `kTimes`) * `kValue`. + int expected_value = 15; + EXPECT_EQ(expected_value, filter.filtered_current_level()); + + filter.Reset(); + filter.SetTargetBufferLevel(140); // Makes filter coefficient 253/256. + for (int i = 0; i < kTimes; ++i) { + filter.Update(kValue, 0 /* time_stretched_samples */); + } + // Expect the filtered value to be + // (1 - (253/256) ^ `kTimes`) * `kValue`. + expected_value = 11; + EXPECT_EQ(expected_value, filter.filtered_current_level()); + + filter.Reset(); + filter.SetTargetBufferLevel(160); // Makes filter coefficient 254/256. + for (int i = 0; i < kTimes; ++i) { + filter.Update(kValue, 0 /* time_stretched_samples */); + } + // Expect the filtered value to be + // (1 - (254/256) ^ `kTimes`) * `kValue`. + expected_value = 8; + EXPECT_EQ(expected_value, filter.filtered_current_level()); +} + +TEST(BufferLevelFilter, TimeStretchedSamples) { + BufferLevelFilter filter; + filter.SetTargetBufferLevel(20); // Makes filter coefficient 251/256. + // Update 10 times with value 100. + const int kTimes = 10; + const int kValue = 100; + const int kTimeStretchedSamples = 3; + for (int i = 0; i < kTimes; ++i) { + filter.Update(kValue, 0); + } + // Expect the filtered value to be + // (1 - (251/256) ^ `kTimes`) * `kValue`. + const int kExpectedValue = 18; + EXPECT_EQ(kExpectedValue, filter.filtered_current_level()); + + // Update filter again, now with non-zero value for packet length. + // Set the current filtered value to be the input, in order to isolate the + // impact of `kTimeStretchedSamples`. + filter.Update(filter.filtered_current_level(), kTimeStretchedSamples); + EXPECT_EQ(kExpectedValue - kTimeStretchedSamples, + filter.filtered_current_level()); + // Try negative value and verify that we come back to the previous result. + filter.Update(filter.filtered_current_level(), -kTimeStretchedSamples); + EXPECT_EQ(kExpectedValue, filter.filtered_current_level()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/comfort_noise.cc b/third_party/libwebrtc/modules/audio_coding/neteq/comfort_noise.cc new file mode 100644 index 0000000000..a2ce888f45 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/comfort_noise.cc @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/comfort_noise.h" + + +#include <cstdint> +#include <memory> + +#include "api/array_view.h" +#include "modules/audio_coding/codecs/cng/webrtc_cng.h" +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "modules/audio_coding/neteq/audio_vector.h" +#include "modules/audio_coding/neteq/decoder_database.h" +#include "modules/audio_coding/neteq/dsp_helper.h" +#include "modules/audio_coding/neteq/sync_buffer.h" +#include "rtc_base/buffer.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +void ComfortNoise::Reset() { + first_call_ = true; +} + +int ComfortNoise::UpdateParameters(const Packet& packet) { + // Get comfort noise decoder. + if (decoder_database_->SetActiveCngDecoder(packet.payload_type) != kOK) { + return kUnknownPayloadType; + } + ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder(); + RTC_DCHECK(cng_decoder); + cng_decoder->UpdateSid(packet.payload); + return kOK; +} + +int ComfortNoise::Generate(size_t requested_length, AudioMultiVector* output) { + // TODO(hlundin): Change to an enumerator and skip assert. + RTC_DCHECK(fs_hz_ == 8000 || fs_hz_ == 16000 || fs_hz_ == 32000 || + fs_hz_ == 48000); + // Not adapted for multi-channel yet. + if (output->Channels() != 1) { + RTC_LOG(LS_ERROR) << "No multi-channel support"; + return kMultiChannelNotSupported; + } + + size_t number_of_samples = requested_length; + bool new_period = false; + if (first_call_) { + // Generate noise and overlap slightly with old data. + number_of_samples = requested_length + overlap_length_; + new_period = true; + } + output->AssertSize(number_of_samples); + // Get the decoder from the database. + ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder(); + if (!cng_decoder) { + RTC_LOG(LS_ERROR) << "Unknwown payload type"; + return kUnknownPayloadType; + } + + std::unique_ptr<int16_t[]> temp(new int16_t[number_of_samples]); + if (!cng_decoder->Generate( + rtc::ArrayView<int16_t>(temp.get(), number_of_samples), new_period)) { + // Error returned. + output->Zeros(requested_length); + RTC_LOG(LS_ERROR) + << "ComfortNoiseDecoder::Genererate failed to generate comfort noise"; + return kInternalError; + } + (*output)[0].OverwriteAt(temp.get(), number_of_samples, 0); + + if (first_call_) { + // Set tapering window parameters. Values are in Q15. + int16_t muting_window; // Mixing factor for overlap data. + int16_t muting_window_increment; // Mixing factor increment (negative). + int16_t unmuting_window; // Mixing factor for comfort noise. + int16_t unmuting_window_increment; // Mixing factor increment. + if (fs_hz_ == 8000) { + muting_window = DspHelper::kMuteFactorStart8kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement8kHz; + unmuting_window = DspHelper::kUnmuteFactorStart8kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement8kHz; + } else if (fs_hz_ == 16000) { + muting_window = DspHelper::kMuteFactorStart16kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement16kHz; + unmuting_window = DspHelper::kUnmuteFactorStart16kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement16kHz; + } else if (fs_hz_ == 32000) { + muting_window = DspHelper::kMuteFactorStart32kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement32kHz; + unmuting_window = DspHelper::kUnmuteFactorStart32kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement32kHz; + } else { // fs_hz_ == 48000 + muting_window = DspHelper::kMuteFactorStart48kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement48kHz; + unmuting_window = DspHelper::kUnmuteFactorStart48kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement48kHz; + } + + // Do overlap-add between new vector and overlap. + size_t start_ix = sync_buffer_->Size() - overlap_length_; + for (size_t i = 0; i < overlap_length_; i++) { + /* overlapVec[i] = WinMute * overlapVec[i] + WinUnMute * outData[i] */ + // The expression (*output)[0][i] is the i-th element in the first + // channel. + (*sync_buffer_)[0][start_ix + i] = + (((*sync_buffer_)[0][start_ix + i] * muting_window) + + ((*output)[0][i] * unmuting_window) + 16384) >> + 15; + muting_window += muting_window_increment; + unmuting_window += unmuting_window_increment; + } + // Remove `overlap_length_` samples from the front of `output` since they + // were mixed into `sync_buffer_` above. + output->PopFront(overlap_length_); + } + first_call_ = false; + return kOK; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/comfort_noise.h b/third_party/libwebrtc/modules/audio_coding/neteq/comfort_noise.h new file mode 100644 index 0000000000..31fcee31d0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/comfort_noise.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_COMFORT_NOISE_H_ +#define MODULES_AUDIO_CODING_NETEQ_COMFORT_NOISE_H_ + +#include <stddef.h> + +namespace webrtc { + +// Forward declarations. +class AudioMultiVector; +class DecoderDatabase; +class SyncBuffer; +struct Packet; + +// This class acts as an interface to the CNG generator. +class ComfortNoise { + public: + enum ReturnCodes { + kOK = 0, + kUnknownPayloadType, + kInternalError, + kMultiChannelNotSupported + }; + + ComfortNoise(int fs_hz, + DecoderDatabase* decoder_database, + SyncBuffer* sync_buffer) + : fs_hz_(fs_hz), + first_call_(true), + overlap_length_(5 * fs_hz_ / 8000), + decoder_database_(decoder_database), + sync_buffer_(sync_buffer) {} + + ComfortNoise(const ComfortNoise&) = delete; + ComfortNoise& operator=(const ComfortNoise&) = delete; + + // Resets the state. Should be called before each new comfort noise period. + void Reset(); + + // Update the comfort noise generator with the parameters in `packet`. + int UpdateParameters(const Packet& packet); + + // Generates `requested_length` samples of comfort noise and writes to + // `output`. If this is the first in call after Reset (or first after creating + // the object), it will also mix in comfort noise at the end of the + // SyncBuffer object provided in the constructor. + int Generate(size_t requested_length, AudioMultiVector* output); + + // Returns the last error code that was produced by the comfort noise + // decoder. Returns 0 if no error has been encountered since the last reset. + int internal_error_code() { return internal_error_code_; } + + private: + int fs_hz_; + bool first_call_; + size_t overlap_length_; + DecoderDatabase* decoder_database_; + SyncBuffer* sync_buffer_; + int internal_error_code_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_COMFORT_NOISE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/comfort_noise_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/comfort_noise_unittest.cc new file mode 100644 index 0000000000..b436800061 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/comfort_noise_unittest.cc @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for ComfortNoise class. + +#include "modules/audio_coding/neteq/comfort_noise.h" + +#include "modules/audio_coding/neteq/mock/mock_decoder_database.h" +#include "modules/audio_coding/neteq/sync_buffer.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(ComfortNoise, CreateAndDestroy) { + int fs = 8000; + MockDecoderDatabase db; + SyncBuffer sync_buffer(1, 1000); + ComfortNoise cn(fs, &db, &sync_buffer); + EXPECT_CALL(db, Die()); // Called when `db` goes out of scope. +} + +// TODO(hlundin): Write more tests. + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/cross_correlation.cc b/third_party/libwebrtc/modules/audio_coding/neteq/cross_correlation.cc new file mode 100644 index 0000000000..37ed9374f0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/cross_correlation.cc @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/cross_correlation.h" + +#include <cstdlib> +#include <limits> + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +namespace webrtc { + +// This function decides the overflow-protecting scaling and calls +// WebRtcSpl_CrossCorrelation. +int CrossCorrelationWithAutoShift(const int16_t* sequence_1, + const int16_t* sequence_2, + size_t sequence_1_length, + size_t cross_correlation_length, + int cross_correlation_step, + int32_t* cross_correlation) { + // Find the element that has the maximum absolute value of sequence_1 and 2. + // Note that these values may be negative. + const int16_t max_1 = + WebRtcSpl_MaxAbsElementW16(sequence_1, sequence_1_length); + const int sequence_2_shift = + cross_correlation_step * (static_cast<int>(cross_correlation_length) - 1); + const int16_t* sequence_2_start = + sequence_2_shift >= 0 ? sequence_2 : sequence_2 + sequence_2_shift; + const size_t sequence_2_length = + sequence_1_length + std::abs(sequence_2_shift); + const int16_t max_2 = + WebRtcSpl_MaxAbsElementW16(sequence_2_start, sequence_2_length); + + // In order to avoid overflow when computing the sum we should scale the + // samples so that (in_vector_length * max_1 * max_2) will not overflow. + const int64_t max_value = + abs(max_1 * max_2) * static_cast<int64_t>(sequence_1_length); + const int32_t factor = max_value >> 31; + const int scaling = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor); + + WebRtcSpl_CrossCorrelation(cross_correlation, sequence_1, sequence_2, + sequence_1_length, cross_correlation_length, + scaling, cross_correlation_step); + + return scaling; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/cross_correlation.h b/third_party/libwebrtc/modules/audio_coding/neteq/cross_correlation.h new file mode 100644 index 0000000000..5082ce6a30 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/cross_correlation.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_CROSS_CORRELATION_H_ +#define MODULES_AUDIO_CODING_NETEQ_CROSS_CORRELATION_H_ + +#include <stddef.h> +#include <stdint.h> + +namespace webrtc { + +// The function calculates the cross-correlation between two sequences +// `sequence_1` and `sequence_2`. `sequence_1` is taken as reference, with +// `sequence_1_length` as its length. `sequence_2` slides for the calculation of +// cross-correlation. The result will be saved in `cross_correlation`. +// `cross_correlation_length` correlation points are calculated. +// The corresponding lag starts from 0, and increases with a step of +// `cross_correlation_step`. The result is without normalization. To avoid +// overflow, the result will be right shifted. The amount of shifts will be +// returned. +// +// Input: +// - sequence_1 : First sequence (reference). +// - sequence_2 : Second sequence (sliding during calculation). +// - sequence_1_length : Length of `sequence_1`. +// - cross_correlation_length : Number of cross-correlations to calculate. +// - cross_correlation_step : Step in the lag for the cross-correlation. +// +// Output: +// - cross_correlation : The cross-correlation in Q(-right_shifts) +// +// Return: +// Number of right shifts in cross_correlation. + +int CrossCorrelationWithAutoShift(const int16_t* sequence_1, + const int16_t* sequence_2, + size_t sequence_1_length, + size_t cross_correlation_length, + int cross_correlation_step, + int32_t* cross_correlation); + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_NETEQ_CROSS_CORRELATION_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic.cc b/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic.cc new file mode 100644 index 0000000000..558774dcb6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic.cc @@ -0,0 +1,508 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/decision_logic.h" + +#include <stdio.h> + +#include <cstdint> +#include <memory> +#include <string> + +#include "absl/types/optional.h" +#include "api/neteq/neteq.h" +#include "api/neteq/neteq_controller.h" +#include "modules/audio_coding/neteq/packet_arrival_history.h" +#include "modules/audio_coding/neteq/packet_buffer.h" +#include "rtc_base/checks.h" +#include "rtc_base/experiments/field_trial_parser.h" +#include "rtc_base/experiments/struct_parameters_parser.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { + +namespace { + +constexpr int kPostponeDecodingLevel = 50; +constexpr int kTargetLevelWindowMs = 100; +constexpr int kMaxWaitForPacketTicks = 10; +// The granularity of delay adjustments (accelerate/preemptive expand) is 15ms, +// but round up since the clock has a granularity of 10ms. +constexpr int kDelayAdjustmentGranularityMs = 20; + +std::unique_ptr<DelayManager> CreateDelayManager( + const NetEqController::Config& neteq_config) { + DelayManager::Config config; + config.max_packets_in_buffer = neteq_config.max_packets_in_buffer; + config.base_minimum_delay_ms = neteq_config.base_min_delay_ms; + config.Log(); + return std::make_unique<DelayManager>(config, neteq_config.tick_timer); +} + +bool IsTimestretch(NetEq::Mode mode) { + return mode == NetEq::Mode::kAccelerateSuccess || + mode == NetEq::Mode::kAccelerateLowEnergy || + mode == NetEq::Mode::kPreemptiveExpandSuccess || + mode == NetEq::Mode::kPreemptiveExpandLowEnergy; +} + +bool IsCng(NetEq::Mode mode) { + return mode == NetEq::Mode::kRfc3389Cng || + mode == NetEq::Mode::kCodecInternalCng; +} + +bool IsExpand(NetEq::Mode mode) { + return mode == NetEq::Mode::kExpand || mode == NetEq::Mode::kCodecPlc; +} + +} // namespace + +DecisionLogic::Config::Config() { + StructParametersParser::Create( + "enable_stable_playout_delay", &enable_stable_playout_delay, // + "reinit_after_expands", &reinit_after_expands, // + "packet_history_size_ms", &packet_history_size_ms, // + "deceleration_target_level_offset_ms", + &deceleration_target_level_offset_ms) + ->Parse(webrtc::field_trial::FindFullName( + "WebRTC-Audio-NetEqDecisionLogicConfig")); + RTC_LOG(LS_INFO) << "NetEq decision logic config:" + << " enable_stable_playout_delay=" + << enable_stable_playout_delay + << " reinit_after_expands=" << reinit_after_expands + << " packet_history_size_ms=" << packet_history_size_ms + << " deceleration_target_level_offset_ms=" + << deceleration_target_level_offset_ms; +} + +DecisionLogic::DecisionLogic(NetEqController::Config config) + : DecisionLogic(config, + CreateDelayManager(config), + std::make_unique<BufferLevelFilter>()) {} + +DecisionLogic::DecisionLogic( + NetEqController::Config config, + std::unique_ptr<DelayManager> delay_manager, + std::unique_ptr<BufferLevelFilter> buffer_level_filter) + : delay_manager_(std::move(delay_manager)), + buffer_level_filter_(std::move(buffer_level_filter)), + packet_arrival_history_(config_.packet_history_size_ms), + tick_timer_(config.tick_timer), + disallow_time_stretching_(!config.allow_time_stretching), + timescale_countdown_( + tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)) {} + +DecisionLogic::~DecisionLogic() = default; + +void DecisionLogic::SoftReset() { + packet_length_samples_ = 0; + sample_memory_ = 0; + prev_time_scale_ = false; + timescale_countdown_ = + tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1); + time_stretched_cn_samples_ = 0; + delay_manager_->Reset(); + buffer_level_filter_->Reset(); + packet_arrival_history_.Reset(); + last_playout_delay_ms_ = 0; +} + +void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) { + // TODO(hlundin): Change to an enumerator and skip assert. + RTC_DCHECK(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || + fs_hz == 48000); + sample_rate_khz_ = fs_hz / 1000; + output_size_samples_ = output_size_samples; + packet_arrival_history_.set_sample_rate(fs_hz); +} + +NetEq::Operation DecisionLogic::GetDecision(const NetEqStatus& status, + bool* reset_decoder) { + // If last mode was CNG (or Expand, since this could be covering up for + // a lost CNG packet), remember that CNG is on. This is needed if comfort + // noise is interrupted by DTMF. + if (status.last_mode == NetEq::Mode::kRfc3389Cng) { + cng_state_ = kCngRfc3389On; + } else if (status.last_mode == NetEq::Mode::kCodecInternalCng) { + cng_state_ = kCngInternalOn; + } + + if (IsExpand(status.last_mode)) { + ++num_consecutive_expands_; + } else { + num_consecutive_expands_ = 0; + } + + if (!IsExpand(status.last_mode) && !IsCng(status.last_mode)) { + last_playout_delay_ms_ = GetPlayoutDelayMs(status); + } + + prev_time_scale_ = prev_time_scale_ && IsTimestretch(status.last_mode); + if (prev_time_scale_) { + timescale_countdown_ = tick_timer_->GetNewCountdown(kMinTimescaleInterval); + } + if (!IsCng(status.last_mode)) { + FilterBufferLevel(status.packet_buffer_info.span_samples); + } + + // Guard for errors, to avoid getting stuck in error mode. + if (status.last_mode == NetEq::Mode::kError) { + if (!status.next_packet) { + return NetEq::Operation::kExpand; + } else { + // Use kUndefined to flag for a reset. + return NetEq::Operation::kUndefined; + } + } + + if (status.next_packet && status.next_packet->is_cng) { + return CngOperation(status); + } + + // Handle the case with no packet at all available (except maybe DTMF). + if (!status.next_packet) { + return NoPacket(status); + } + + // If the expand period was very long, reset NetEQ since it is likely that the + // sender was restarted. + if (num_consecutive_expands_ > config_.reinit_after_expands) { + *reset_decoder = true; + return NetEq::Operation::kNormal; + } + + // Make sure we don't restart audio too soon after an expansion to avoid + // running out of data right away again. We should only wait if there are no + // DTX or CNG packets in the buffer (otherwise we should just play out what we + // have, since we cannot know the exact duration of DTX or CNG packets), and + // if the mute factor is low enough (otherwise the expansion was short enough + // to not be noticable). + // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1. + const int target_level_samples = TargetLevelMs() * sample_rate_khz_; + if (!config_.enable_stable_playout_delay && IsExpand(status.last_mode) && + status.expand_mutefactor < 16384 / 2 && + status.packet_buffer_info.span_samples < + static_cast<size_t>(target_level_samples * kPostponeDecodingLevel / + 100) && + !status.packet_buffer_info.dtx_or_cng) { + return NetEq::Operation::kExpand; + } + + const uint32_t five_seconds_samples = + static_cast<uint32_t>(5000 * sample_rate_khz_); + // Check if the required packet is available. + if (status.target_timestamp == status.next_packet->timestamp) { + return ExpectedPacketAvailable(status); + } + if (!PacketBuffer::IsObsoleteTimestamp(status.next_packet->timestamp, + status.target_timestamp, + five_seconds_samples)) { + return FuturePacketAvailable(status); + } + // This implies that available_timestamp < target_timestamp, which can + // happen when a new stream or codec is received. Signal for a reset. + return NetEq::Operation::kUndefined; +} + +void DecisionLogic::NotifyMutedState() { + ++num_consecutive_expands_; +} + +int DecisionLogic::TargetLevelMs() const { + int target_delay_ms = delay_manager_->TargetDelayMs(); + if (!config_.enable_stable_playout_delay) { + target_delay_ms = + std::max(target_delay_ms, + static_cast<int>(packet_length_samples_ / sample_rate_khz_)); + } + return target_delay_ms; +} + +int DecisionLogic::UnlimitedTargetLevelMs() const { + return delay_manager_->UnlimitedTargetLevelMs(); +} + +int DecisionLogic::GetFilteredBufferLevel() const { + if (config_.enable_stable_playout_delay) { + return last_playout_delay_ms_ * sample_rate_khz_; + } + return buffer_level_filter_->filtered_current_level(); +} + +absl::optional<int> DecisionLogic::PacketArrived( + int fs_hz, + bool should_update_stats, + const PacketArrivedInfo& info) { + buffer_flush_ = buffer_flush_ || info.buffer_flush; + if (!should_update_stats || info.is_cng_or_dtmf) { + return absl::nullopt; + } + if (info.packet_length_samples > 0 && fs_hz > 0 && + info.packet_length_samples != packet_length_samples_) { + packet_length_samples_ = info.packet_length_samples; + delay_manager_->SetPacketAudioLength(packet_length_samples_ * 1000 / fs_hz); + } + int64_t time_now_ms = tick_timer_->ticks() * tick_timer_->ms_per_tick(); + packet_arrival_history_.Insert(info.main_timestamp, time_now_ms); + if (packet_arrival_history_.size() < 2) { + // No meaningful delay estimate unless at least 2 packets have arrived. + return absl::nullopt; + } + int arrival_delay_ms = + packet_arrival_history_.GetDelayMs(info.main_timestamp, time_now_ms); + bool reordered = + !packet_arrival_history_.IsNewestRtpTimestamp(info.main_timestamp); + delay_manager_->Update(arrival_delay_ms, reordered); + return arrival_delay_ms; +} + +void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples) { + buffer_level_filter_->SetTargetBufferLevel(TargetLevelMs()); + + int time_stretched_samples = time_stretched_cn_samples_; + if (prev_time_scale_) { + time_stretched_samples += sample_memory_; + } + + if (buffer_flush_) { + buffer_level_filter_->SetFilteredBufferLevel(buffer_size_samples); + buffer_flush_ = false; + } else { + buffer_level_filter_->Update(buffer_size_samples, time_stretched_samples); + } + prev_time_scale_ = false; + time_stretched_cn_samples_ = 0; +} + +NetEq::Operation DecisionLogic::CngOperation( + NetEqController::NetEqStatus status) { + // Signed difference between target and available timestamp. + int32_t timestamp_diff = static_cast<int32_t>( + static_cast<uint32_t>(status.generated_noise_samples + + status.target_timestamp) - + status.next_packet->timestamp); + int optimal_level_samp = TargetLevelMs() * sample_rate_khz_; + const int64_t excess_waiting_time_samp = + -static_cast<int64_t>(timestamp_diff) - optimal_level_samp; + + if (excess_waiting_time_samp > optimal_level_samp / 2) { + // The waiting time for this packet will be longer than 1.5 + // times the wanted buffer delay. Apply fast-forward to cut the + // waiting time down to the optimal. + noise_fast_forward_ = rtc::saturated_cast<size_t>(noise_fast_forward_ + + excess_waiting_time_samp); + timestamp_diff = + rtc::saturated_cast<int32_t>(timestamp_diff + excess_waiting_time_samp); + } + + if (timestamp_diff < 0 && status.last_mode == NetEq::Mode::kRfc3389Cng) { + // Not time to play this packet yet. Wait another round before using this + // packet. Keep on playing CNG from previous CNG parameters. + return NetEq::Operation::kRfc3389CngNoPacket; + } else { + // Otherwise, go for the CNG packet now. + noise_fast_forward_ = 0; + return NetEq::Operation::kRfc3389Cng; + } +} + +NetEq::Operation DecisionLogic::NoPacket(NetEqController::NetEqStatus status) { + if (cng_state_ == kCngRfc3389On) { + // Keep on playing comfort noise. + return NetEq::Operation::kRfc3389CngNoPacket; + } else if (cng_state_ == kCngInternalOn) { + // Keep on playing codec internal comfort noise. + return NetEq::Operation::kCodecInternalCng; + } else if (status.play_dtmf) { + return NetEq::Operation::kDtmf; + } else { + // Nothing to play, do expand. + return NetEq::Operation::kExpand; + } +} + +NetEq::Operation DecisionLogic::ExpectedPacketAvailable( + NetEqController::NetEqStatus status) { + if (!disallow_time_stretching_ && status.last_mode != NetEq::Mode::kExpand && + !status.play_dtmf) { + if (config_.enable_stable_playout_delay) { + const int playout_delay_ms = GetPlayoutDelayMs(status); + if (playout_delay_ms >= HighThreshold() << 2) { + return NetEq::Operation::kFastAccelerate; + } + if (TimescaleAllowed()) { + if (playout_delay_ms >= HighThreshold()) { + return NetEq::Operation::kAccelerate; + } + if (playout_delay_ms < LowThreshold()) { + return NetEq::Operation::kPreemptiveExpand; + } + } + } else { + const int target_level_samples = TargetLevelMs() * sample_rate_khz_; + const int low_limit = std::max( + target_level_samples * 3 / 4, + target_level_samples - + config_.deceleration_target_level_offset_ms * sample_rate_khz_); + const int high_limit = std::max( + target_level_samples, + low_limit + kDelayAdjustmentGranularityMs * sample_rate_khz_); + + const int buffer_level_samples = + buffer_level_filter_->filtered_current_level(); + if (buffer_level_samples >= high_limit << 2) + return NetEq::Operation::kFastAccelerate; + if (TimescaleAllowed()) { + if (buffer_level_samples >= high_limit) + return NetEq::Operation::kAccelerate; + if (buffer_level_samples < low_limit) + return NetEq::Operation::kPreemptiveExpand; + } + } + } + return NetEq::Operation::kNormal; +} + +NetEq::Operation DecisionLogic::FuturePacketAvailable( + NetEqController::NetEqStatus status) { + // Required packet is not available, but a future packet is. + // Check if we should continue with an ongoing expand because the new packet + // is too far into the future. + if (IsExpand(status.last_mode) && ShouldContinueExpand(status)) { + if (status.play_dtmf) { + // Still have DTMF to play, so do not do expand. + return NetEq::Operation::kDtmf; + } else { + // Nothing to play. + return NetEq::Operation::kExpand; + } + } + + if (status.last_mode == NetEq::Mode::kCodecPlc) { + return NetEq::Operation::kNormal; + } + + // If previous was comfort noise, then no merge is needed. + if (IsCng(status.last_mode)) { + uint32_t timestamp_leap = + status.next_packet->timestamp - status.target_timestamp; + const bool generated_enough_noise = + status.generated_noise_samples >= timestamp_leap; + + int playout_delay_ms = GetNextPacketDelayMs(status); + const bool above_target_delay = playout_delay_ms > HighThresholdCng(); + const bool below_target_delay = playout_delay_ms < LowThresholdCng(); + // Keep the delay same as before CNG, but make sure that it is within the + // target window. + if ((generated_enough_noise && !below_target_delay) || above_target_delay) { + time_stretched_cn_samples_ = + timestamp_leap - status.generated_noise_samples; + return NetEq::Operation::kNormal; + } + + if (status.last_mode == NetEq::Mode::kRfc3389Cng) { + return NetEq::Operation::kRfc3389CngNoPacket; + } + return NetEq::Operation::kCodecInternalCng; + } + + // Do not merge unless we have done an expand before. + if (status.last_mode == NetEq::Mode::kExpand) { + return NetEq::Operation::kMerge; + } else if (status.play_dtmf) { + // Play DTMF instead of expand. + return NetEq::Operation::kDtmf; + } else { + return NetEq::Operation::kExpand; + } +} + +bool DecisionLogic::UnderTargetLevel() const { + return buffer_level_filter_->filtered_current_level() < + TargetLevelMs() * sample_rate_khz_; +} + +bool DecisionLogic::ReinitAfterExpands(uint32_t timestamp_leap) const { + return timestamp_leap >= static_cast<uint32_t>(output_size_samples_ * + config_.reinit_after_expands); +} + +bool DecisionLogic::PacketTooEarly(uint32_t timestamp_leap) const { + return timestamp_leap > + static_cast<uint32_t>(output_size_samples_ * num_consecutive_expands_); +} + +bool DecisionLogic::MaxWaitForPacket() const { + return num_consecutive_expands_ >= kMaxWaitForPacketTicks; +} + +bool DecisionLogic::ShouldContinueExpand( + NetEqController::NetEqStatus status) const { + uint32_t timestamp_leap = + status.next_packet->timestamp - status.target_timestamp; + if (config_.enable_stable_playout_delay) { + return GetNextPacketDelayMs(status) < HighThreshold() && + PacketTooEarly(timestamp_leap); + } + return !ReinitAfterExpands(timestamp_leap) && !MaxWaitForPacket() && + PacketTooEarly(timestamp_leap) && UnderTargetLevel(); +} + +int DecisionLogic::GetNextPacketDelayMs( + NetEqController::NetEqStatus status) const { + if (config_.enable_stable_playout_delay) { + return packet_arrival_history_.GetDelayMs( + status.next_packet->timestamp, + tick_timer_->ticks() * tick_timer_->ms_per_tick()); + } + return status.packet_buffer_info.span_samples / sample_rate_khz_; +} + +int DecisionLogic::GetPlayoutDelayMs( + NetEqController::NetEqStatus status) const { + uint32_t playout_timestamp = + status.target_timestamp - status.sync_buffer_samples; + return packet_arrival_history_.GetDelayMs( + playout_timestamp, tick_timer_->ticks() * tick_timer_->ms_per_tick()); +} + +int DecisionLogic::LowThreshold() const { + int target_delay_ms = TargetLevelMs(); + return std::max( + target_delay_ms * 3 / 4, + target_delay_ms - config_.deceleration_target_level_offset_ms); +} + +int DecisionLogic::HighThreshold() const { + if (config_.enable_stable_playout_delay) { + return std::max(TargetLevelMs(), packet_arrival_history_.GetMaxDelayMs()) + + kDelayAdjustmentGranularityMs; + } + return std::max(TargetLevelMs(), + LowThreshold() + kDelayAdjustmentGranularityMs); +} + +int DecisionLogic::LowThresholdCng() const { + if (config_.enable_stable_playout_delay) { + return LowThreshold(); + } + return std::max(0, TargetLevelMs() - kTargetLevelWindowMs / 2); +} + +int DecisionLogic::HighThresholdCng() const { + if (config_.enable_stable_playout_delay) { + return HighThreshold(); + } + return TargetLevelMs() + kTargetLevelWindowMs / 2; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic.h b/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic.h new file mode 100644 index 0000000000..2e55322f8f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic.h @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_H_ +#define MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_H_ + +#include <memory> + +#include "api/neteq/neteq.h" +#include "api/neteq/neteq_controller.h" +#include "api/neteq/tick_timer.h" +#include "modules/audio_coding/neteq/buffer_level_filter.h" +#include "modules/audio_coding/neteq/delay_manager.h" +#include "modules/audio_coding/neteq/packet_arrival_history.h" +#include "rtc_base/experiments/field_trial_parser.h" + +namespace webrtc { + +// This is the class for the decision tree implementation. +class DecisionLogic : public NetEqController { + public: + DecisionLogic(NetEqController::Config config); + DecisionLogic(NetEqController::Config config, + std::unique_ptr<DelayManager> delay_manager, + std::unique_ptr<BufferLevelFilter> buffer_level_filter); + + ~DecisionLogic() override; + + DecisionLogic(const DecisionLogic&) = delete; + DecisionLogic& operator=(const DecisionLogic&) = delete; + + // Not used. + void Reset() override {} + + // Resets parts of the state. Typically done when switching codecs. + void SoftReset() override; + + // Sets the sample rate and the output block size. + void SetSampleRate(int fs_hz, size_t output_size_samples) override; + + // Given info about the latest received packet, and current jitter buffer + // status, returns the operation. `target_timestamp` and `expand_mutefactor` + // are provided for reference. `last_packet_samples` is the number of samples + // obtained from the last decoded frame. If there is a packet available, it + // should be supplied in `packet`; otherwise it should be NULL. The mode + // resulting from the last call to NetEqImpl::GetAudio is supplied in + // `last_mode`. If there is a DTMF event to play, `play_dtmf` should be set to + // true. The output variable `reset_decoder` will be set to true if a reset is + // required; otherwise it is left unchanged (i.e., it can remain true if it + // was true before the call). + NetEq::Operation GetDecision(const NetEqController::NetEqStatus& status, + bool* reset_decoder) override; + + // These methods test the `cng_state_` for different conditions. + bool CngRfc3389On() const override { return cng_state_ == kCngRfc3389On; } + bool CngOff() const override { return cng_state_ == kCngOff; } + + // Resets the `cng_state_` to kCngOff. + void SetCngOff() override { cng_state_ = kCngOff; } + + void ExpandDecision(NetEq::Operation operation) override {} + + // Adds `value` to `sample_memory_`. + void AddSampleMemory(int32_t value) override { sample_memory_ += value; } + + int TargetLevelMs() const override; + + int UnlimitedTargetLevelMs() const override; + + absl::optional<int> PacketArrived(int fs_hz, + bool should_update_stats, + const PacketArrivedInfo& info) override; + + void RegisterEmptyPacket() override {} + + void NotifyMutedState() override; + + bool SetMaximumDelay(int delay_ms) override { + return delay_manager_->SetMaximumDelay(delay_ms); + } + bool SetMinimumDelay(int delay_ms) override { + return delay_manager_->SetMinimumDelay(delay_ms); + } + bool SetBaseMinimumDelay(int delay_ms) override { + return delay_manager_->SetBaseMinimumDelay(delay_ms); + } + int GetBaseMinimumDelay() const override { + return delay_manager_->GetBaseMinimumDelay(); + } + bool PeakFound() const override { return false; } + + int GetFilteredBufferLevel() const override; + + // Accessors and mutators. + void set_sample_memory(int32_t value) override { sample_memory_ = value; } + size_t noise_fast_forward() const override { return noise_fast_forward_; } + size_t packet_length_samples() const override { + return packet_length_samples_; + } + void set_packet_length_samples(size_t value) override { + packet_length_samples_ = value; + } + void set_prev_time_scale(bool value) override { prev_time_scale_ = value; } + + private: + // The value 5 sets maximum time-stretch rate to about 100 ms/s. + static const int kMinTimescaleInterval = 5; + + enum CngState { kCngOff, kCngRfc3389On, kCngInternalOn }; + + // Updates the `buffer_level_filter_` with the current buffer level + // `buffer_size_samples`. + void FilterBufferLevel(size_t buffer_size_samples); + + // Returns the operation given that the next available packet is a comfort + // noise payload (RFC 3389 only, not codec-internal). + virtual NetEq::Operation CngOperation(NetEqController::NetEqStatus status); + + // Returns the operation given that no packets are available (except maybe + // a DTMF event, flagged by setting `play_dtmf` true). + virtual NetEq::Operation NoPacket(NetEqController::NetEqStatus status); + + // Returns the operation to do given that the expected packet is available. + virtual NetEq::Operation ExpectedPacketAvailable( + NetEqController::NetEqStatus status); + + // Returns the operation to do given that the expected packet is not + // available, but a packet further into the future is at hand. + virtual NetEq::Operation FuturePacketAvailable( + NetEqController::NetEqStatus status); + + // Checks if enough time has elapsed since the last successful timescale + // operation was done (i.e., accelerate or preemptive expand). + bool TimescaleAllowed() const { + return !timescale_countdown_ || timescale_countdown_->Finished(); + } + + // Checks if the current (filtered) buffer level is under the target level. + bool UnderTargetLevel() const; + + // Checks if `timestamp_leap` is so long into the future that a reset due + // to exceeding kReinitAfterExpands will be done. + bool ReinitAfterExpands(uint32_t timestamp_leap) const; + + // Checks if we still have not done enough expands to cover the distance from + // the last decoded packet to the next available packet, the distance beeing + // conveyed in `timestamp_leap`. + bool PacketTooEarly(uint32_t timestamp_leap) const; + + bool MaxWaitForPacket() const; + + bool ShouldContinueExpand(NetEqController::NetEqStatus status) const; + + int GetNextPacketDelayMs(NetEqController::NetEqStatus status) const; + int GetPlayoutDelayMs(NetEqController::NetEqStatus status) const; + + int LowThreshold() const; + int HighThreshold() const; + int LowThresholdCng() const; + int HighThresholdCng() const; + + // Runtime configurable options through field trial + // WebRTC-Audio-NetEqDecisionLogicConfig. + struct Config { + Config(); + + bool enable_stable_playout_delay = false; + int reinit_after_expands = 100; + int deceleration_target_level_offset_ms = 85; + int packet_history_size_ms = 2000; + }; + Config config_; + std::unique_ptr<DelayManager> delay_manager_; + std::unique_ptr<BufferLevelFilter> buffer_level_filter_; + PacketArrivalHistory packet_arrival_history_; + const TickTimer* tick_timer_; + int sample_rate_khz_; + size_t output_size_samples_; + CngState cng_state_ = kCngOff; // Remember if comfort noise is interrupted by + // other event (e.g., DTMF). + size_t noise_fast_forward_ = 0; + size_t packet_length_samples_ = 0; + int sample_memory_ = 0; + bool prev_time_scale_ = false; + bool disallow_time_stretching_; + std::unique_ptr<TickTimer::Countdown> timescale_countdown_; + int num_consecutive_expands_ = 0; + int time_stretched_cn_samples_ = 0; + bool buffer_flush_ = false; + int last_playout_delay_ms_ = 0; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic_unittest.cc new file mode 100644 index 0000000000..d70e3070f3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic_unittest.cc @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for DecisionLogic class and derived classes. + +#include "modules/audio_coding/neteq/decision_logic.h" + +#include "api/neteq/neteq_controller.h" +#include "api/neteq/tick_timer.h" +#include "modules/audio_coding/neteq/buffer_level_filter.h" +#include "modules/audio_coding/neteq/delay_manager.h" +#include "modules/audio_coding/neteq/mock/mock_buffer_level_filter.h" +#include "modules/audio_coding/neteq/mock/mock_delay_manager.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +constexpr int kSampleRate = 8000; +constexpr int kSamplesPerMs = kSampleRate / 1000; +constexpr int kOutputSizeSamples = kSamplesPerMs * 10; +constexpr int kMinTimescaleInterval = 5; + +NetEqController::NetEqStatus CreateNetEqStatus(NetEq::Mode last_mode, + int current_delay_ms) { + NetEqController::NetEqStatus status; + status.play_dtmf = false; + status.last_mode = last_mode; + status.target_timestamp = 1234; + status.generated_noise_samples = 0; + status.expand_mutefactor = 0; + status.packet_buffer_info.num_samples = current_delay_ms * kSamplesPerMs; + status.packet_buffer_info.span_samples = current_delay_ms * kSamplesPerMs; + status.packet_buffer_info.span_samples_no_dtx = + current_delay_ms * kSamplesPerMs; + status.packet_buffer_info.dtx_or_cng = false; + status.next_packet = {status.target_timestamp, false, false}; + return status; +} + +using ::testing::Return; + +} // namespace + +class DecisionLogicTest : public ::testing::Test { + protected: + DecisionLogicTest() { + NetEqController::Config config; + config.tick_timer = &tick_timer_; + config.allow_time_stretching = true; + auto delay_manager = std::make_unique<MockDelayManager>( + DelayManager::Config(), config.tick_timer); + mock_delay_manager_ = delay_manager.get(); + auto buffer_level_filter = std::make_unique<MockBufferLevelFilter>(); + mock_buffer_level_filter_ = buffer_level_filter.get(); + decision_logic_ = std::make_unique<DecisionLogic>( + config, std::move(delay_manager), std::move(buffer_level_filter)); + decision_logic_->SetSampleRate(kSampleRate, kOutputSizeSamples); + } + + TickTimer tick_timer_; + std::unique_ptr<DecisionLogic> decision_logic_; + MockDelayManager* mock_delay_manager_; + MockBufferLevelFilter* mock_buffer_level_filter_; +}; + +TEST_F(DecisionLogicTest, NormalOperation) { + EXPECT_CALL(*mock_delay_manager_, TargetDelayMs()) + .WillRepeatedly(Return(100)); + EXPECT_CALL(*mock_buffer_level_filter_, filtered_current_level()) + .WillRepeatedly(Return(90 * kSamplesPerMs)); + + bool reset_decoder = false; + tick_timer_.Increment(kMinTimescaleInterval + 1); + EXPECT_EQ(decision_logic_->GetDecision( + CreateNetEqStatus(NetEq::Mode::kNormal, 100), &reset_decoder), + NetEq::Operation::kNormal); + EXPECT_FALSE(reset_decoder); +} + +TEST_F(DecisionLogicTest, Accelerate) { + EXPECT_CALL(*mock_delay_manager_, TargetDelayMs()) + .WillRepeatedly(Return(100)); + EXPECT_CALL(*mock_buffer_level_filter_, filtered_current_level()) + .WillRepeatedly(Return(110 * kSamplesPerMs)); + + bool reset_decoder = false; + tick_timer_.Increment(kMinTimescaleInterval + 1); + EXPECT_EQ(decision_logic_->GetDecision( + CreateNetEqStatus(NetEq::Mode::kNormal, 100), &reset_decoder), + NetEq::Operation::kAccelerate); + EXPECT_FALSE(reset_decoder); +} + +TEST_F(DecisionLogicTest, FastAccelerate) { + EXPECT_CALL(*mock_delay_manager_, TargetDelayMs()) + .WillRepeatedly(Return(100)); + EXPECT_CALL(*mock_buffer_level_filter_, filtered_current_level()) + .WillRepeatedly(Return(400 * kSamplesPerMs)); + + bool reset_decoder = false; + tick_timer_.Increment(kMinTimescaleInterval + 1); + EXPECT_EQ(decision_logic_->GetDecision( + CreateNetEqStatus(NetEq::Mode::kNormal, 100), &reset_decoder), + NetEq::Operation::kFastAccelerate); + EXPECT_FALSE(reset_decoder); +} + +TEST_F(DecisionLogicTest, PreemptiveExpand) { + EXPECT_CALL(*mock_delay_manager_, TargetDelayMs()) + .WillRepeatedly(Return(100)); + EXPECT_CALL(*mock_buffer_level_filter_, filtered_current_level()) + .WillRepeatedly(Return(50 * kSamplesPerMs)); + + bool reset_decoder = false; + tick_timer_.Increment(kMinTimescaleInterval + 1); + EXPECT_EQ(decision_logic_->GetDecision( + CreateNetEqStatus(NetEq::Mode::kNormal, 100), &reset_decoder), + NetEq::Operation::kPreemptiveExpand); + EXPECT_FALSE(reset_decoder); +} + +TEST_F(DecisionLogicTest, DecelerationTargetLevelOffset) { + EXPECT_CALL(*mock_delay_manager_, TargetDelayMs()) + .WillRepeatedly(Return(500)); + EXPECT_CALL(*mock_buffer_level_filter_, filtered_current_level()) + .WillRepeatedly(Return(400 * kSamplesPerMs)); + + bool reset_decoder = false; + tick_timer_.Increment(kMinTimescaleInterval + 1); + EXPECT_EQ(decision_logic_->GetDecision( + CreateNetEqStatus(NetEq::Mode::kNormal, 400), &reset_decoder), + NetEq::Operation::kPreemptiveExpand); + EXPECT_FALSE(reset_decoder); +} + +TEST_F(DecisionLogicTest, PostponeDecodeAfterExpand) { + EXPECT_CALL(*mock_delay_manager_, TargetDelayMs()) + .WillRepeatedly(Return(500)); + + // Below 50% target delay threshold. + bool reset_decoder = false; + EXPECT_EQ(decision_logic_->GetDecision( + CreateNetEqStatus(NetEq::Mode::kExpand, 200), &reset_decoder), + NetEq::Operation::kExpand); + EXPECT_FALSE(reset_decoder); + + // Above 50% target delay threshold. + EXPECT_EQ(decision_logic_->GetDecision( + CreateNetEqStatus(NetEq::Mode::kExpand, 250), &reset_decoder), + NetEq::Operation::kNormal); + EXPECT_FALSE(reset_decoder); +} + +TEST_F(DecisionLogicTest, TimeStrechComfortNoise) { + EXPECT_CALL(*mock_delay_manager_, TargetDelayMs()) + .WillRepeatedly(Return(500)); + + { + bool reset_decoder = false; + // Below target window. + auto status = CreateNetEqStatus(NetEq::Mode::kCodecInternalCng, 400); + status.generated_noise_samples = 400 * kSamplesPerMs; + status.next_packet->timestamp = + status.target_timestamp + 400 * kSamplesPerMs; + EXPECT_EQ(decision_logic_->GetDecision(status, &reset_decoder), + NetEq::Operation::kCodecInternalCng); + EXPECT_FALSE(reset_decoder); + } + + { + bool reset_decoder = false; + // Above target window. + auto status = CreateNetEqStatus(NetEq::Mode::kCodecInternalCng, 600); + status.generated_noise_samples = 200 * kSamplesPerMs; + status.next_packet->timestamp = + status.target_timestamp + 400 * kSamplesPerMs; + EXPECT_EQ(decision_logic_->GetDecision(status, &reset_decoder), + NetEq::Operation::kNormal); + EXPECT_FALSE(reset_decoder); + + // The buffer level filter should be adjusted with the number of samples + // that was skipped. + int timestamp_leap = status.next_packet->timestamp - + status.target_timestamp - + status.generated_noise_samples; + EXPECT_CALL(*mock_buffer_level_filter_, + Update(400 * kSamplesPerMs, timestamp_leap)); + EXPECT_EQ(decision_logic_->GetDecision( + CreateNetEqStatus(NetEq::Mode::kNormal, 400), &reset_decoder), + NetEq::Operation::kNormal); + EXPECT_FALSE(reset_decoder); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/decoder_database.cc b/third_party/libwebrtc/modules/audio_coding/neteq/decoder_database.cc new file mode 100644 index 0000000000..3447ced1da --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/decoder_database.cc @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/decoder_database.h" + +#include <stddef.h> + +#include <cstdint> +#include <list> +#include <type_traits> +#include <utility> + +#include "absl/strings/match.h" +#include "absl/strings/string_view.h" +#include "api/audio_codecs/audio_decoder.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/strings/audio_format_to_string.h" + +namespace webrtc { + +DecoderDatabase::DecoderDatabase( + const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory, + absl::optional<AudioCodecPairId> codec_pair_id) + : active_decoder_type_(-1), + active_cng_decoder_type_(-1), + decoder_factory_(decoder_factory), + codec_pair_id_(codec_pair_id) {} + +DecoderDatabase::~DecoderDatabase() = default; + +DecoderDatabase::DecoderInfo::DecoderInfo( + const SdpAudioFormat& audio_format, + absl::optional<AudioCodecPairId> codec_pair_id, + AudioDecoderFactory* factory, + absl::string_view codec_name) + : name_(codec_name), + audio_format_(audio_format), + codec_pair_id_(codec_pair_id), + factory_(factory), + cng_decoder_(CngDecoder::Create(audio_format)), + subtype_(SubtypeFromFormat(audio_format)) {} + +DecoderDatabase::DecoderInfo::DecoderInfo( + const SdpAudioFormat& audio_format, + absl::optional<AudioCodecPairId> codec_pair_id, + AudioDecoderFactory* factory) + : DecoderInfo(audio_format, codec_pair_id, factory, audio_format.name) {} + +DecoderDatabase::DecoderInfo::DecoderInfo(DecoderInfo&&) = default; +DecoderDatabase::DecoderInfo::~DecoderInfo() = default; + +AudioDecoder* DecoderDatabase::DecoderInfo::GetDecoder() const { + if (subtype_ != Subtype::kNormal) { + // These are handled internally, so they have no AudioDecoder objects. + return nullptr; + } + if (!decoder_) { + // TODO(ossu): Keep a check here for now, since a number of tests create + // DecoderInfos without factories. + RTC_DCHECK(factory_); + decoder_ = factory_->MakeAudioDecoder(audio_format_, codec_pair_id_); + } + RTC_DCHECK(decoder_) << "Failed to create: " << rtc::ToString(audio_format_); + return decoder_.get(); +} + +bool DecoderDatabase::DecoderInfo::IsType(absl::string_view name) const { + return absl::EqualsIgnoreCase(audio_format_.name, name); +} + +absl::optional<DecoderDatabase::DecoderInfo::CngDecoder> +DecoderDatabase::DecoderInfo::CngDecoder::Create(const SdpAudioFormat& format) { + if (absl::EqualsIgnoreCase(format.name, "CN")) { + // CN has a 1:1 RTP clock rate to sample rate ratio. + const int sample_rate_hz = format.clockrate_hz; + RTC_DCHECK(sample_rate_hz == 8000 || sample_rate_hz == 16000 || + sample_rate_hz == 32000 || sample_rate_hz == 48000); + return DecoderDatabase::DecoderInfo::CngDecoder{sample_rate_hz}; + } else { + return absl::nullopt; + } +} + +DecoderDatabase::DecoderInfo::Subtype +DecoderDatabase::DecoderInfo::SubtypeFromFormat(const SdpAudioFormat& format) { + if (absl::EqualsIgnoreCase(format.name, "CN")) { + return Subtype::kComfortNoise; + } else if (absl::EqualsIgnoreCase(format.name, "telephone-event")) { + return Subtype::kDtmf; + } else if (absl::EqualsIgnoreCase(format.name, "red")) { + return Subtype::kRed; + } + + return Subtype::kNormal; +} + +bool DecoderDatabase::Empty() const { + return decoders_.empty(); +} + +int DecoderDatabase::Size() const { + return static_cast<int>(decoders_.size()); +} + +std::vector<int> DecoderDatabase::SetCodecs( + const std::map<int, SdpAudioFormat>& codecs) { + // First collect all payload types that we'll remove or reassign, then remove + // them from the database. + std::vector<int> changed_payload_types; + for (const std::pair<uint8_t, const DecoderInfo&> kv : decoders_) { + auto i = codecs.find(kv.first); + if (i == codecs.end() || i->second != kv.second.GetFormat()) { + changed_payload_types.push_back(kv.first); + } + } + for (int pl_type : changed_payload_types) { + Remove(pl_type); + } + + // Enter the new and changed payload type mappings into the database. + for (const auto& kv : codecs) { + const int& rtp_payload_type = kv.first; + const SdpAudioFormat& audio_format = kv.second; + RTC_DCHECK_GE(rtp_payload_type, 0); + RTC_DCHECK_LE(rtp_payload_type, 0x7f); + if (decoders_.count(rtp_payload_type) == 0) { + decoders_.insert(std::make_pair( + rtp_payload_type, + DecoderInfo(audio_format, codec_pair_id_, decoder_factory_.get()))); + } else { + // The mapping for this payload type hasn't changed. + } + } + + return changed_payload_types; +} + +int DecoderDatabase::RegisterPayload(int rtp_payload_type, + const SdpAudioFormat& audio_format) { + if (rtp_payload_type < 0 || rtp_payload_type > 0x7f) { + return kInvalidRtpPayloadType; + } + const auto ret = decoders_.insert(std::make_pair( + rtp_payload_type, + DecoderInfo(audio_format, codec_pair_id_, decoder_factory_.get()))); + if (ret.second == false) { + // Database already contains a decoder with type `rtp_payload_type`. + return kDecoderExists; + } + return kOK; +} + +int DecoderDatabase::Remove(uint8_t rtp_payload_type) { + if (decoders_.erase(rtp_payload_type) == 0) { + // No decoder with that `rtp_payload_type`. + return kDecoderNotFound; + } + if (active_decoder_type_ == rtp_payload_type) { + active_decoder_type_ = -1; // No active decoder. + } + if (active_cng_decoder_type_ == rtp_payload_type) { + active_cng_decoder_type_ = -1; // No active CNG decoder. + } + return kOK; +} + +void DecoderDatabase::RemoveAll() { + decoders_.clear(); + active_decoder_type_ = -1; // No active decoder. + active_cng_decoder_type_ = -1; // No active CNG decoder. +} + +const DecoderDatabase::DecoderInfo* DecoderDatabase::GetDecoderInfo( + uint8_t rtp_payload_type) const { + DecoderMap::const_iterator it = decoders_.find(rtp_payload_type); + if (it == decoders_.end()) { + // Decoder not found. + return NULL; + } + return &it->second; +} + +int DecoderDatabase::SetActiveDecoder(uint8_t rtp_payload_type, + bool* new_decoder) { + // Check that `rtp_payload_type` exists in the database. + const DecoderInfo* info = GetDecoderInfo(rtp_payload_type); + if (!info) { + // Decoder not found. + return kDecoderNotFound; + } + RTC_CHECK(!info->IsComfortNoise()); + RTC_DCHECK(new_decoder); + *new_decoder = false; + if (active_decoder_type_ < 0) { + // This is the first active decoder. + *new_decoder = true; + } else if (active_decoder_type_ != rtp_payload_type) { + // Moving from one active decoder to another. Delete the first one. + const DecoderInfo* old_info = GetDecoderInfo(active_decoder_type_); + RTC_DCHECK(old_info); + old_info->DropDecoder(); + *new_decoder = true; + } + active_decoder_type_ = rtp_payload_type; + return kOK; +} + +AudioDecoder* DecoderDatabase::GetActiveDecoder() const { + if (active_decoder_type_ < 0) { + // No active decoder. + return NULL; + } + return GetDecoder(active_decoder_type_); +} + +int DecoderDatabase::SetActiveCngDecoder(uint8_t rtp_payload_type) { + // Check that `rtp_payload_type` exists in the database. + const DecoderInfo* info = GetDecoderInfo(rtp_payload_type); + if (!info) { + // Decoder not found. + return kDecoderNotFound; + } + if (active_cng_decoder_type_ >= 0 && + active_cng_decoder_type_ != rtp_payload_type) { + // Moving from one active CNG decoder to another. Delete the first one. + RTC_DCHECK(active_cng_decoder_); + active_cng_decoder_.reset(); + } + active_cng_decoder_type_ = rtp_payload_type; + return kOK; +} + +ComfortNoiseDecoder* DecoderDatabase::GetActiveCngDecoder() const { + if (active_cng_decoder_type_ < 0) { + // No active CNG decoder. + return NULL; + } + if (!active_cng_decoder_) { + active_cng_decoder_.reset(new ComfortNoiseDecoder); + } + return active_cng_decoder_.get(); +} + +AudioDecoder* DecoderDatabase::GetDecoder(uint8_t rtp_payload_type) const { + const DecoderInfo* info = GetDecoderInfo(rtp_payload_type); + return info ? info->GetDecoder() : nullptr; +} + +bool DecoderDatabase::IsComfortNoise(uint8_t rtp_payload_type) const { + const DecoderInfo* info = GetDecoderInfo(rtp_payload_type); + return info && info->IsComfortNoise(); +} + +bool DecoderDatabase::IsDtmf(uint8_t rtp_payload_type) const { + const DecoderInfo* info = GetDecoderInfo(rtp_payload_type); + return info && info->IsDtmf(); +} + +bool DecoderDatabase::IsRed(uint8_t rtp_payload_type) const { + const DecoderInfo* info = GetDecoderInfo(rtp_payload_type); + return info && info->IsRed(); +} + +int DecoderDatabase::CheckPayloadTypes(const PacketList& packet_list) const { + PacketList::const_iterator it; + for (it = packet_list.begin(); it != packet_list.end(); ++it) { + if (!GetDecoderInfo(it->payload_type)) { + // Payload type is not found. + RTC_LOG(LS_WARNING) << "CheckPayloadTypes: unknown RTP payload type " + << static_cast<int>(it->payload_type); + return kDecoderNotFound; + } + } + return kOK; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/decoder_database.h b/third_party/libwebrtc/modules/audio_coding/neteq/decoder_database.h new file mode 100644 index 0000000000..8cf2019135 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/decoder_database.h @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_DECODER_DATABASE_H_ +#define MODULES_AUDIO_CODING_NETEQ_DECODER_DATABASE_H_ + +#include <map> +#include <memory> +#include <string> + +#include "absl/strings/string_view.h" +#include "api/audio_codecs/audio_decoder_factory.h" +#include "api/audio_codecs/audio_format.h" +#include "api/scoped_refptr.h" +#include "modules/audio_coding/codecs/cng/webrtc_cng.h" +#include "modules/audio_coding/neteq/packet.h" + +namespace webrtc { + +class DecoderDatabase { + public: + enum DatabaseReturnCodes { + kOK = 0, + kInvalidRtpPayloadType = -1, + kCodecNotSupported = -2, + kInvalidSampleRate = -3, + kDecoderExists = -4, + kDecoderNotFound = -5, + kInvalidPointer = -6 + }; + + // Class that stores decoder info in the database. + class DecoderInfo { + public: + DecoderInfo(const SdpAudioFormat& audio_format, + absl::optional<AudioCodecPairId> codec_pair_id, + AudioDecoderFactory* factory, + absl::string_view codec_name); + explicit DecoderInfo(const SdpAudioFormat& audio_format, + absl::optional<AudioCodecPairId> codec_pair_id, + AudioDecoderFactory* factory = nullptr); + DecoderInfo(DecoderInfo&&); + ~DecoderInfo(); + + // Get the AudioDecoder object, creating it first if necessary. + AudioDecoder* GetDecoder() const; + + // Delete the AudioDecoder object, unless it's external. (This means we can + // always recreate it later if we need it.) + void DropDecoder() const { decoder_.reset(); } + + int SampleRateHz() const { + if (IsDtmf()) { + // DTMF has a 1:1 mapping between clock rate and sample rate. + return audio_format_.clockrate_hz; + } + const AudioDecoder* decoder = GetDecoder(); + RTC_DCHECK_EQ(1, !!decoder + !!cng_decoder_); + return decoder ? decoder->SampleRateHz() : cng_decoder_->sample_rate_hz; + } + + const SdpAudioFormat& GetFormat() const { return audio_format_; } + + // Returns true if the decoder's format is comfort noise. + bool IsComfortNoise() const { + RTC_DCHECK_EQ(!!cng_decoder_, subtype_ == Subtype::kComfortNoise); + return subtype_ == Subtype::kComfortNoise; + } + + // Returns true if the decoder's format is DTMF. + bool IsDtmf() const { return subtype_ == Subtype::kDtmf; } + + // Returns true if the decoder's format is RED. + bool IsRed() const { return subtype_ == Subtype::kRed; } + + // Returns true if the decoder's format is named `name`. + bool IsType(absl::string_view name) const; + + const std::string& get_name() const { return name_; } + + private: + // TODO(ossu): `name_` is kept here while we retain the old external + // decoder interface. Remove this once using an + // AudioDecoderFactory has supplanted the old functionality. + const std::string name_; + + const SdpAudioFormat audio_format_; + const absl::optional<AudioCodecPairId> codec_pair_id_; + AudioDecoderFactory* const factory_; + mutable std::unique_ptr<AudioDecoder> decoder_; + + // Set iff this is a comfort noise decoder. + struct CngDecoder { + static absl::optional<CngDecoder> Create(const SdpAudioFormat& format); + int sample_rate_hz; + }; + const absl::optional<CngDecoder> cng_decoder_; + + enum class Subtype : int8_t { kNormal, kComfortNoise, kDtmf, kRed }; + + static Subtype SubtypeFromFormat(const SdpAudioFormat& format); + + const Subtype subtype_; + }; + + // Maximum value for 8 bits, and an invalid RTP payload type (since it is + // only 7 bits). + static const uint8_t kRtpPayloadTypeError = 0xFF; + + DecoderDatabase( + const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory, + absl::optional<AudioCodecPairId> codec_pair_id); + + virtual ~DecoderDatabase(); + + DecoderDatabase(const DecoderDatabase&) = delete; + DecoderDatabase& operator=(const DecoderDatabase&) = delete; + + // Returns true if the database is empty. + virtual bool Empty() const; + + // Returns the number of decoders registered in the database. + virtual int Size() const; + + // Replaces the existing set of decoders with the given set. Returns the + // payload types that were reassigned or removed while doing so. + virtual std::vector<int> SetCodecs( + const std::map<int, SdpAudioFormat>& codecs); + + // Registers a decoder for the given payload type. Returns kOK on success; + // otherwise an error code. + virtual int RegisterPayload(int rtp_payload_type, + const SdpAudioFormat& audio_format); + + // Removes the entry for `rtp_payload_type` from the database. + // Returns kDecoderNotFound or kOK depending on the outcome of the operation. + virtual int Remove(uint8_t rtp_payload_type); + + // Remove all entries. + virtual void RemoveAll(); + + // Returns a pointer to the DecoderInfo struct for `rtp_payload_type`. If + // no decoder is registered with that `rtp_payload_type`, NULL is returned. + virtual const DecoderInfo* GetDecoderInfo(uint8_t rtp_payload_type) const; + + // Sets the active decoder to be `rtp_payload_type`. If this call results in a + // change of active decoder, `new_decoder` is set to true. The previous active + // decoder's AudioDecoder object is deleted. + virtual int SetActiveDecoder(uint8_t rtp_payload_type, bool* new_decoder); + + // Returns the current active decoder, or NULL if no active decoder exists. + virtual AudioDecoder* GetActiveDecoder() const; + + // Sets the active comfort noise decoder to be `rtp_payload_type`. If this + // call results in a change of active comfort noise decoder, the previous + // active decoder's AudioDecoder object is deleted. + virtual int SetActiveCngDecoder(uint8_t rtp_payload_type); + + // Returns the current active comfort noise decoder, or NULL if no active + // comfort noise decoder exists. + virtual ComfortNoiseDecoder* GetActiveCngDecoder() const; + + // The following are utility methods: they will look up DecoderInfo through + // GetDecoderInfo and call the respective method on that info object, if it + // exists. + + // Returns a pointer to the AudioDecoder object associated with + // `rtp_payload_type`, or NULL if none is registered. If the AudioDecoder + // object does not exist for that decoder, the object is created. + AudioDecoder* GetDecoder(uint8_t rtp_payload_type) const; + + // Returns true if `rtp_payload_type` is registered as comfort noise. + bool IsComfortNoise(uint8_t rtp_payload_type) const; + + // Returns true if `rtp_payload_type` is registered as DTMF. + bool IsDtmf(uint8_t rtp_payload_type) const; + + // Returns true if `rtp_payload_type` is registered as RED. + bool IsRed(uint8_t rtp_payload_type) const; + + // Returns kOK if all packets in `packet_list` carry payload types that are + // registered in the database. Otherwise, returns kDecoderNotFound. + int CheckPayloadTypes(const PacketList& packet_list) const; + + private: + typedef std::map<uint8_t, DecoderInfo> DecoderMap; + + DecoderMap decoders_; + int active_decoder_type_; + int active_cng_decoder_type_; + mutable std::unique_ptr<ComfortNoiseDecoder> active_cng_decoder_; + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory_; + const absl::optional<AudioCodecPairId> codec_pair_id_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_DECODER_DATABASE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/decoder_database_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/decoder_database_unittest.cc new file mode 100644 index 0000000000..445c21924b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/decoder_database_unittest.cc @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/decoder_database.h" + +#include <stdlib.h> + +#include <string> + +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/mock_audio_decoder.h" +#include "test/mock_audio_decoder_factory.h" + +using ::testing::_; +using ::testing::Invoke; + +namespace webrtc { + +TEST(DecoderDatabase, CreateAndDestroy) { + DecoderDatabase db(rtc::make_ref_counted<MockAudioDecoderFactory>(), + absl::nullopt); + EXPECT_EQ(0, db.Size()); + EXPECT_TRUE(db.Empty()); +} + +TEST(DecoderDatabase, InsertAndRemove) { + auto factory = rtc::make_ref_counted<MockAudioDecoderFactory>(); + DecoderDatabase db(factory, absl::nullopt); + const uint8_t kPayloadType = 0; + const std::string kCodecName = "Robert\'); DROP TABLE Students;"; + EXPECT_EQ( + DecoderDatabase::kOK, + db.RegisterPayload(kPayloadType, SdpAudioFormat(kCodecName, 8000, 1))); + EXPECT_EQ(1, db.Size()); + EXPECT_FALSE(db.Empty()); + EXPECT_EQ(DecoderDatabase::kOK, db.Remove(kPayloadType)); + EXPECT_EQ(0, db.Size()); + EXPECT_TRUE(db.Empty()); +} + +TEST(DecoderDatabase, InsertAndRemoveAll) { + auto factory = rtc::make_ref_counted<MockAudioDecoderFactory>(); + DecoderDatabase db(factory, absl::nullopt); + const std::string kCodecName1 = "Robert\'); DROP TABLE Students;"; + const std::string kCodecName2 = "https://xkcd.com/327/"; + EXPECT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(0, SdpAudioFormat(kCodecName1, 8000, 1))); + EXPECT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(1, SdpAudioFormat(kCodecName2, 8000, 1))); + EXPECT_EQ(2, db.Size()); + EXPECT_FALSE(db.Empty()); + db.RemoveAll(); + EXPECT_EQ(0, db.Size()); + EXPECT_TRUE(db.Empty()); +} + +TEST(DecoderDatabase, GetDecoderInfo) { + auto factory = rtc::make_ref_counted<MockAudioDecoderFactory>(); + auto* decoder = new MockAudioDecoder; + EXPECT_CALL(*factory, MakeAudioDecoderMock(_, _, _)) + .WillOnce(Invoke([decoder](const SdpAudioFormat& format, + absl::optional<AudioCodecPairId> codec_pair_id, + std::unique_ptr<AudioDecoder>* dec) { + EXPECT_EQ("pcmu", format.name); + dec->reset(decoder); + })); + DecoderDatabase db(factory, absl::nullopt); + const uint8_t kPayloadType = 0; + const std::string kCodecName = "pcmu"; + EXPECT_EQ( + DecoderDatabase::kOK, + db.RegisterPayload(kPayloadType, SdpAudioFormat(kCodecName, 8000, 1))); + const DecoderDatabase::DecoderInfo* info; + info = db.GetDecoderInfo(kPayloadType); + ASSERT_TRUE(info != NULL); + EXPECT_TRUE(info->IsType("pcmu")); + EXPECT_EQ(kCodecName, info->get_name()); + EXPECT_EQ(decoder, db.GetDecoder(kPayloadType)); + info = db.GetDecoderInfo(kPayloadType + 1); // Other payload type. + EXPECT_TRUE(info == NULL); // Should not be found. +} + +TEST(DecoderDatabase, GetDecoder) { + DecoderDatabase db(CreateBuiltinAudioDecoderFactory(), absl::nullopt); + const uint8_t kPayloadType = 0; + EXPECT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(kPayloadType, SdpAudioFormat("l16", 8000, 1))); + AudioDecoder* dec = db.GetDecoder(kPayloadType); + ASSERT_TRUE(dec != NULL); +} + +TEST(DecoderDatabase, TypeTests) { + auto factory = rtc::make_ref_counted<MockAudioDecoderFactory>(); + DecoderDatabase db(factory, absl::nullopt); + const uint8_t kPayloadTypePcmU = 0; + const uint8_t kPayloadTypeCng = 13; + const uint8_t kPayloadTypeDtmf = 100; + const uint8_t kPayloadTypeRed = 101; + const uint8_t kPayloadNotUsed = 102; + // Load into database. + EXPECT_EQ( + DecoderDatabase::kOK, + db.RegisterPayload(kPayloadTypePcmU, SdpAudioFormat("pcmu", 8000, 1))); + EXPECT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(kPayloadTypeCng, SdpAudioFormat("cn", 8000, 1))); + EXPECT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(kPayloadTypeDtmf, + SdpAudioFormat("telephone-event", 8000, 1))); + EXPECT_EQ( + DecoderDatabase::kOK, + db.RegisterPayload(kPayloadTypeRed, SdpAudioFormat("red", 8000, 1))); + EXPECT_EQ(4, db.Size()); + // Test. + EXPECT_FALSE(db.IsComfortNoise(kPayloadNotUsed)); + EXPECT_FALSE(db.IsDtmf(kPayloadNotUsed)); + EXPECT_FALSE(db.IsRed(kPayloadNotUsed)); + EXPECT_FALSE(db.IsComfortNoise(kPayloadTypePcmU)); + EXPECT_FALSE(db.IsDtmf(kPayloadTypePcmU)); + EXPECT_FALSE(db.IsRed(kPayloadTypePcmU)); + EXPECT_TRUE(db.IsComfortNoise(kPayloadTypeCng)); + EXPECT_TRUE(db.IsDtmf(kPayloadTypeDtmf)); + EXPECT_TRUE(db.IsRed(kPayloadTypeRed)); +} + +TEST(DecoderDatabase, CheckPayloadTypes) { + constexpr int kNumPayloads = 10; + auto factory = rtc::make_ref_counted<MockAudioDecoderFactory>(); + DecoderDatabase db(factory, absl::nullopt); + // Load a number of payloads into the database. Payload types are 0, 1, ..., + // while the decoder type is the same for all payload types (this does not + // matter for the test). + for (uint8_t payload_type = 0; payload_type < kNumPayloads; ++payload_type) { + EXPECT_EQ( + DecoderDatabase::kOK, + db.RegisterPayload(payload_type, SdpAudioFormat("pcmu", 8000, 1))); + } + PacketList packet_list; + for (int i = 0; i < kNumPayloads + 1; ++i) { + // Create packet with payload type `i`. The last packet will have a payload + // type that is not registered in the decoder database. + Packet packet; + packet.payload_type = i; + packet_list.push_back(std::move(packet)); + } + + // Expect to return false, since the last packet is of an unknown type. + EXPECT_EQ(DecoderDatabase::kDecoderNotFound, + db.CheckPayloadTypes(packet_list)); + + packet_list.pop_back(); // Remove the unknown one. + + EXPECT_EQ(DecoderDatabase::kOK, db.CheckPayloadTypes(packet_list)); + + // Delete all packets. + PacketList::iterator it = packet_list.begin(); + while (it != packet_list.end()) { + it = packet_list.erase(it); + } +} + +#if defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX) +#define IF_ISAC(x) x +#else +#define IF_ISAC(x) DISABLED_##x +#endif + +// Test the methods for setting and getting active speech and CNG decoders. +TEST(DecoderDatabase, IF_ISAC(ActiveDecoders)) { + DecoderDatabase db(CreateBuiltinAudioDecoderFactory(), absl::nullopt); + // Load payload types. + ASSERT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(0, SdpAudioFormat("pcmu", 8000, 1))); + ASSERT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(103, SdpAudioFormat("isac", 16000, 1))); + ASSERT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(13, SdpAudioFormat("cn", 8000, 1))); + // Verify that no decoders are active from the start. + EXPECT_EQ(NULL, db.GetActiveDecoder()); + EXPECT_EQ(NULL, db.GetActiveCngDecoder()); + + // Set active speech codec. + bool changed; // Should be true when the active decoder changed. + EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveDecoder(0, &changed)); + EXPECT_TRUE(changed); + AudioDecoder* decoder = db.GetActiveDecoder(); + ASSERT_FALSE(decoder == NULL); // Should get a decoder here. + + // Set the same again. Expect no change. + EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveDecoder(0, &changed)); + EXPECT_FALSE(changed); + decoder = db.GetActiveDecoder(); + ASSERT_FALSE(decoder == NULL); // Should get a decoder here. + + // Change active decoder. + EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveDecoder(103, &changed)); + EXPECT_TRUE(changed); + decoder = db.GetActiveDecoder(); + ASSERT_FALSE(decoder == NULL); // Should get a decoder here. + + // Remove the active decoder, and verify that the active becomes NULL. + EXPECT_EQ(DecoderDatabase::kOK, db.Remove(103)); + EXPECT_EQ(NULL, db.GetActiveDecoder()); + + // Set active CNG codec. + EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveCngDecoder(13)); + ComfortNoiseDecoder* cng = db.GetActiveCngDecoder(); + ASSERT_FALSE(cng == NULL); // Should get a decoder here. + + // Remove the active CNG decoder, and verify that the active becomes NULL. + EXPECT_EQ(DecoderDatabase::kOK, db.Remove(13)); + EXPECT_EQ(NULL, db.GetActiveCngDecoder()); + + // Try to set non-existing codecs as active. + EXPECT_EQ(DecoderDatabase::kDecoderNotFound, + db.SetActiveDecoder(17, &changed)); + EXPECT_EQ(DecoderDatabase::kDecoderNotFound, db.SetActiveCngDecoder(17)); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/default_neteq_factory.cc b/third_party/libwebrtc/modules/audio_coding/neteq/default_neteq_factory.cc new file mode 100644 index 0000000000..487450fe0f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/default_neteq_factory.cc @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/default_neteq_factory.h" + +#include <utility> + +#include "modules/audio_coding/neteq/neteq_impl.h" + +namespace webrtc { + +DefaultNetEqFactory::DefaultNetEqFactory() = default; +DefaultNetEqFactory::~DefaultNetEqFactory() = default; + +std::unique_ptr<NetEq> DefaultNetEqFactory::CreateNetEq( + const NetEq::Config& config, + const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory, + Clock* clock) const { + return std::make_unique<NetEqImpl>( + config, NetEqImpl::Dependencies(config, clock, decoder_factory, + controller_factory_)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/default_neteq_factory.h b/third_party/libwebrtc/modules/audio_coding/neteq/default_neteq_factory.h new file mode 100644 index 0000000000..24d2bae419 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/default_neteq_factory.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_DEFAULT_NETEQ_FACTORY_H_ +#define MODULES_AUDIO_CODING_NETEQ_DEFAULT_NETEQ_FACTORY_H_ + +#include <memory> + +#include "api/audio_codecs/audio_decoder_factory.h" +#include "api/neteq/default_neteq_controller_factory.h" +#include "api/neteq/neteq_factory.h" +#include "api/scoped_refptr.h" +#include "system_wrappers/include/clock.h" + +namespace webrtc { + +class DefaultNetEqFactory : public NetEqFactory { + public: + DefaultNetEqFactory(); + ~DefaultNetEqFactory() override; + DefaultNetEqFactory(const DefaultNetEqFactory&) = delete; + DefaultNetEqFactory& operator=(const DefaultNetEqFactory&) = delete; + + std::unique_ptr<NetEq> CreateNetEq( + const NetEq::Config& config, + const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory, + Clock* clock) const override; + + private: + const DefaultNetEqControllerFactory controller_factory_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_DEFAULT_NETEQ_FACTORY_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/delay_manager.cc b/third_party/libwebrtc/modules/audio_coding/neteq/delay_manager.cc new file mode 100644 index 0000000000..bf3a0f18a1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/delay_manager.cc @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/delay_manager.h" + +#include <stdio.h> +#include <stdlib.h> + +#include <algorithm> +#include <memory> +#include <numeric> +#include <string> + +#include "modules/include/module_common_types_public.h" +#include "rtc_base/checks.h" +#include "rtc_base/experiments/struct_parameters_parser.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { +namespace { + +constexpr int kMinBaseMinimumDelayMs = 0; +constexpr int kMaxBaseMinimumDelayMs = 10000; +constexpr int kStartDelayMs = 80; + +std::unique_ptr<ReorderOptimizer> MaybeCreateReorderOptimizer( + const DelayManager::Config& config) { + if (!config.use_reorder_optimizer) { + return nullptr; + } + return std::make_unique<ReorderOptimizer>( + (1 << 15) * config.reorder_forget_factor, config.ms_per_loss_percent, + config.start_forget_weight); +} + +} // namespace + +DelayManager::Config::Config() { + StructParametersParser::Create( // + "quantile", &quantile, // + "forget_factor", &forget_factor, // + "start_forget_weight", &start_forget_weight, // + "resample_interval_ms", &resample_interval_ms, // + "use_reorder_optimizer", &use_reorder_optimizer, // + "reorder_forget_factor", &reorder_forget_factor, // + "ms_per_loss_percent", &ms_per_loss_percent) + ->Parse(webrtc::field_trial::FindFullName( + "WebRTC-Audio-NetEqDelayManagerConfig")); +} + +void DelayManager::Config::Log() { + RTC_LOG(LS_INFO) << "Delay manager config:" + " quantile=" + << quantile << " forget_factor=" << forget_factor + << " start_forget_weight=" << start_forget_weight.value_or(0) + << " resample_interval_ms=" + << resample_interval_ms.value_or(0) + << " use_reorder_optimizer=" << use_reorder_optimizer + << " reorder_forget_factor=" << reorder_forget_factor + << " ms_per_loss_percent=" << ms_per_loss_percent; +} + +DelayManager::DelayManager(const Config& config, const TickTimer* tick_timer) + : max_packets_in_buffer_(config.max_packets_in_buffer), + underrun_optimizer_(tick_timer, + (1 << 30) * config.quantile, + (1 << 15) * config.forget_factor, + config.start_forget_weight, + config.resample_interval_ms), + reorder_optimizer_(MaybeCreateReorderOptimizer(config)), + base_minimum_delay_ms_(config.base_minimum_delay_ms), + effective_minimum_delay_ms_(config.base_minimum_delay_ms), + minimum_delay_ms_(0), + maximum_delay_ms_(0), + target_level_ms_(kStartDelayMs) { + RTC_DCHECK_GE(base_minimum_delay_ms_, 0); + + Reset(); +} + +DelayManager::~DelayManager() {} + +void DelayManager::Update(int arrival_delay_ms, bool reordered) { + if (!reorder_optimizer_ || !reordered) { + underrun_optimizer_.Update(arrival_delay_ms); + } + target_level_ms_ = + underrun_optimizer_.GetOptimalDelayMs().value_or(kStartDelayMs); + if (reorder_optimizer_) { + reorder_optimizer_->Update(arrival_delay_ms, reordered, target_level_ms_); + target_level_ms_ = std::max( + target_level_ms_, reorder_optimizer_->GetOptimalDelayMs().value_or(0)); + } + unlimited_target_level_ms_ = target_level_ms_; + target_level_ms_ = std::max(target_level_ms_, effective_minimum_delay_ms_); + if (maximum_delay_ms_ > 0) { + target_level_ms_ = std::min(target_level_ms_, maximum_delay_ms_); + } + if (packet_len_ms_ > 0) { + // Limit to 75% of maximum buffer size. + target_level_ms_ = std::min( + target_level_ms_, 3 * max_packets_in_buffer_ * packet_len_ms_ / 4); + } +} + +int DelayManager::SetPacketAudioLength(int length_ms) { + if (length_ms <= 0) { + RTC_LOG_F(LS_ERROR) << "length_ms = " << length_ms; + return -1; + } + packet_len_ms_ = length_ms; + return 0; +} + +void DelayManager::Reset() { + packet_len_ms_ = 0; + underrun_optimizer_.Reset(); + target_level_ms_ = kStartDelayMs; + if (reorder_optimizer_) { + reorder_optimizer_->Reset(); + } +} + +int DelayManager::TargetDelayMs() const { + return target_level_ms_; +} + +int DelayManager::UnlimitedTargetLevelMs() const { + return unlimited_target_level_ms_; +} + +bool DelayManager::IsValidMinimumDelay(int delay_ms) const { + return 0 <= delay_ms && delay_ms <= MinimumDelayUpperBound(); +} + +bool DelayManager::IsValidBaseMinimumDelay(int delay_ms) const { + return kMinBaseMinimumDelayMs <= delay_ms && + delay_ms <= kMaxBaseMinimumDelayMs; +} + +bool DelayManager::SetMinimumDelay(int delay_ms) { + if (!IsValidMinimumDelay(delay_ms)) { + return false; + } + + minimum_delay_ms_ = delay_ms; + UpdateEffectiveMinimumDelay(); + return true; +} + +bool DelayManager::SetMaximumDelay(int delay_ms) { + // If `delay_ms` is zero then it unsets the maximum delay and target level is + // unconstrained by maximum delay. + if (delay_ms != 0 && delay_ms < minimum_delay_ms_) { + // Maximum delay shouldn't be less than minimum delay or less than a packet. + return false; + } + + maximum_delay_ms_ = delay_ms; + UpdateEffectiveMinimumDelay(); + return true; +} + +bool DelayManager::SetBaseMinimumDelay(int delay_ms) { + if (!IsValidBaseMinimumDelay(delay_ms)) { + return false; + } + + base_minimum_delay_ms_ = delay_ms; + UpdateEffectiveMinimumDelay(); + return true; +} + +int DelayManager::GetBaseMinimumDelay() const { + return base_minimum_delay_ms_; +} + +void DelayManager::UpdateEffectiveMinimumDelay() { + // Clamp `base_minimum_delay_ms_` into the range which can be effectively + // used. + const int base_minimum_delay_ms = + rtc::SafeClamp(base_minimum_delay_ms_, 0, MinimumDelayUpperBound()); + effective_minimum_delay_ms_ = + std::max(minimum_delay_ms_, base_minimum_delay_ms); +} + +int DelayManager::MinimumDelayUpperBound() const { + // Choose the lowest possible bound discarding 0 cases which mean the value + // is not set and unconstrained. + int q75 = max_packets_in_buffer_ * packet_len_ms_ * 3 / 4; + q75 = q75 > 0 ? q75 : kMaxBaseMinimumDelayMs; + const int maximum_delay_ms = + maximum_delay_ms_ > 0 ? maximum_delay_ms_ : kMaxBaseMinimumDelayMs; + return std::min(maximum_delay_ms, q75); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/delay_manager.h b/third_party/libwebrtc/modules/audio_coding/neteq/delay_manager.h new file mode 100644 index 0000000000..a333681535 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/delay_manager.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_DELAY_MANAGER_H_ +#define MODULES_AUDIO_CODING_NETEQ_DELAY_MANAGER_H_ + +#include <string.h> // Provide access to size_t. + +#include <deque> +#include <memory> + +#include "absl/types/optional.h" +#include "api/neteq/tick_timer.h" +#include "modules/audio_coding/neteq/histogram.h" +#include "modules/audio_coding/neteq/reorder_optimizer.h" +#include "modules/audio_coding/neteq/underrun_optimizer.h" + +namespace webrtc { + +class DelayManager { + public: + struct Config { + Config(); + void Log(); + + // Options that can be configured via field trial. + double quantile = 0.95; + double forget_factor = 0.983; + absl::optional<double> start_forget_weight = 2; + absl::optional<int> resample_interval_ms = 500; + + bool use_reorder_optimizer = true; + double reorder_forget_factor = 0.9993; + int ms_per_loss_percent = 20; + + // Options that are externally populated. + int max_packets_in_buffer = 200; + int base_minimum_delay_ms = 0; + }; + + DelayManager(const Config& config, const TickTimer* tick_timer); + + virtual ~DelayManager(); + + DelayManager(const DelayManager&) = delete; + DelayManager& operator=(const DelayManager&) = delete; + + // Updates the delay manager that a new packet arrived with delay + // `arrival_delay_ms`. This updates the statistics and a new target buffer + // level is calculated. The `reordered` flag indicates if the packet was + // reordered. + virtual void Update(int arrival_delay_ms, bool reordered); + + // Resets all state. + virtual void Reset(); + + // Gets the target buffer level in milliseconds. If a minimum or maximum delay + // has been set, the target delay reported here also respects the configured + // min/max delay. + virtual int TargetDelayMs() const; + + // Reports the target delay that would be used if no minimum/maximum delay + // would be set. + virtual int UnlimitedTargetLevelMs() const; + + // Notifies the DelayManager of how much audio data is carried in each packet. + virtual int SetPacketAudioLength(int length_ms); + + // Accessors and mutators. + // Assuming `delay` is in valid range. + virtual bool SetMinimumDelay(int delay_ms); + virtual bool SetMaximumDelay(int delay_ms); + virtual bool SetBaseMinimumDelay(int delay_ms); + virtual int GetBaseMinimumDelay() const; + + // These accessors are only intended for testing purposes. + int effective_minimum_delay_ms_for_test() const { + return effective_minimum_delay_ms_; + } + + private: + // Provides value which minimum delay can't exceed based on current buffer + // size and given `maximum_delay_ms_`. Lower bound is a constant 0. + int MinimumDelayUpperBound() const; + + // Updates `effective_minimum_delay_ms_` delay based on current + // `minimum_delay_ms_`, `base_minimum_delay_ms_` and `maximum_delay_ms_` + // and buffer size. + void UpdateEffectiveMinimumDelay(); + + // Makes sure that `delay_ms` is less than maximum delay, if any maximum + // is set. Also, if possible check `delay_ms` to be less than 75% of + // `max_packets_in_buffer_`. + bool IsValidMinimumDelay(int delay_ms) const; + + bool IsValidBaseMinimumDelay(int delay_ms) const; + + // TODO(jakobi): set maximum buffer delay instead of number of packets. + const int max_packets_in_buffer_; + UnderrunOptimizer underrun_optimizer_; + std::unique_ptr<ReorderOptimizer> reorder_optimizer_; + + int base_minimum_delay_ms_; + int effective_minimum_delay_ms_; // Used as lower bound for target delay. + int minimum_delay_ms_; // Externally set minimum delay. + int maximum_delay_ms_; // Externally set maximum allowed delay. + + int packet_len_ms_ = 0; + int target_level_ms_ = 0; // Currently preferred buffer level. + int unlimited_target_level_ms_ = 0; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_DELAY_MANAGER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/delay_manager_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/delay_manager_unittest.cc new file mode 100644 index 0000000000..da5f53188c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/delay_manager_unittest.cc @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for DelayManager class. + +#include "modules/audio_coding/neteq/delay_manager.h" + +#include <math.h> + +#include <memory> + +#include "absl/types/optional.h" +#include "modules/audio_coding/neteq/histogram.h" +#include "modules/audio_coding/neteq/mock/mock_histogram.h" +#include "modules/audio_coding/neteq/mock/mock_statistics_calculator.h" +#include "rtc_base/checks.h" +#include "test/field_trial.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { +constexpr int kMaxNumberOfPackets = 200; +constexpr int kTimeStepMs = 10; +constexpr int kFrameSizeMs = 20; +constexpr int kMaxBufferSizeMs = kMaxNumberOfPackets * kFrameSizeMs; + +} // namespace + +class DelayManagerTest : public ::testing::Test { + protected: + DelayManagerTest(); + virtual void SetUp(); + void Update(int delay); + void IncreaseTime(int inc_ms); + + TickTimer tick_timer_; + DelayManager dm_; +}; + +DelayManagerTest::DelayManagerTest() + : dm_(DelayManager::Config(), &tick_timer_) {} + +void DelayManagerTest::SetUp() { + dm_.SetPacketAudioLength(kFrameSizeMs); +} + +void DelayManagerTest::Update(int delay) { + dm_.Update(delay, false); +} + +void DelayManagerTest::IncreaseTime(int inc_ms) { + for (int t = 0; t < inc_ms; t += kTimeStepMs) { + tick_timer_.Increment(); + } +} + +TEST_F(DelayManagerTest, CreateAndDestroy) { + // Nothing to do here. The test fixture creates and destroys the DelayManager + // object. +} + +TEST_F(DelayManagerTest, UpdateNormal) { + for (int i = 0; i < 50; ++i) { + Update(0); + IncreaseTime(kFrameSizeMs); + } + EXPECT_EQ(20, dm_.TargetDelayMs()); +} + +TEST_F(DelayManagerTest, MaxDelay) { + Update(0); + const int kMaxDelayMs = 60; + EXPECT_GT(dm_.TargetDelayMs(), kMaxDelayMs); + EXPECT_TRUE(dm_.SetMaximumDelay(kMaxDelayMs)); + Update(0); + EXPECT_EQ(kMaxDelayMs, dm_.TargetDelayMs()); +} + +TEST_F(DelayManagerTest, MinDelay) { + Update(0); + int kMinDelayMs = 7 * kFrameSizeMs; + EXPECT_LT(dm_.TargetDelayMs(), kMinDelayMs); + dm_.SetMinimumDelay(kMinDelayMs); + IncreaseTime(kFrameSizeMs); + Update(0); + EXPECT_EQ(kMinDelayMs, dm_.TargetDelayMs()); +} + +TEST_F(DelayManagerTest, BaseMinimumDelayCheckValidRange) { + // Base minimum delay should be between [0, 10000] milliseconds. + EXPECT_FALSE(dm_.SetBaseMinimumDelay(-1)); + EXPECT_FALSE(dm_.SetBaseMinimumDelay(10001)); + EXPECT_EQ(dm_.GetBaseMinimumDelay(), 0); + + EXPECT_TRUE(dm_.SetBaseMinimumDelay(7999)); + EXPECT_EQ(dm_.GetBaseMinimumDelay(), 7999); +} + +TEST_F(DelayManagerTest, BaseMinimumDelayLowerThanMinimumDelay) { + constexpr int kBaseMinimumDelayMs = 100; + constexpr int kMinimumDelayMs = 200; + + // Base minimum delay sets lower bound on minimum. That is why when base + // minimum delay is lower than minimum delay we use minimum delay. + RTC_DCHECK_LT(kBaseMinimumDelayMs, kMinimumDelayMs); + + EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMs)); + EXPECT_TRUE(dm_.SetMinimumDelay(kMinimumDelayMs)); + EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kMinimumDelayMs); +} + +TEST_F(DelayManagerTest, BaseMinimumDelayGreaterThanMinimumDelay) { + constexpr int kBaseMinimumDelayMs = 70; + constexpr int kMinimumDelayMs = 30; + + // Base minimum delay sets lower bound on minimum. That is why when base + // minimum delay is greater than minimum delay we use base minimum delay. + RTC_DCHECK_GT(kBaseMinimumDelayMs, kMinimumDelayMs); + + EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMs)); + EXPECT_TRUE(dm_.SetMinimumDelay(kMinimumDelayMs)); + EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kBaseMinimumDelayMs); +} + +TEST_F(DelayManagerTest, BaseMinimumDelayGreaterThanBufferSize) { + constexpr int kBaseMinimumDelayMs = kMaxBufferSizeMs + 1; + constexpr int kMinimumDelayMs = 12; + constexpr int kMaximumDelayMs = 20; + constexpr int kMaxBufferSizeMsQ75 = 3 * kMaxBufferSizeMs / 4; + + EXPECT_TRUE(dm_.SetMaximumDelay(kMaximumDelayMs)); + + // Base minimum delay is greater than minimum delay, that is why we clamp + // it to current the highest possible value which is maximum delay. + RTC_DCHECK_GT(kBaseMinimumDelayMs, kMinimumDelayMs); + RTC_DCHECK_GT(kBaseMinimumDelayMs, kMaxBufferSizeMs); + RTC_DCHECK_GT(kBaseMinimumDelayMs, kMaximumDelayMs); + RTC_DCHECK_LT(kMaximumDelayMs, kMaxBufferSizeMsQ75); + + EXPECT_TRUE(dm_.SetMinimumDelay(kMinimumDelayMs)); + EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMs)); + + // Unset maximum value. + EXPECT_TRUE(dm_.SetMaximumDelay(0)); + + // With maximum value unset, the highest possible value now is 75% of + // currently possible maximum buffer size. + EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kMaxBufferSizeMsQ75); +} + +TEST_F(DelayManagerTest, BaseMinimumDelayGreaterThanMaximumDelay) { + constexpr int kMaximumDelayMs = 400; + constexpr int kBaseMinimumDelayMs = kMaximumDelayMs + 1; + constexpr int kMinimumDelayMs = 20; + + // Base minimum delay is greater than minimum delay, that is why we clamp + // it to current the highest possible value which is kMaximumDelayMs. + RTC_DCHECK_GT(kBaseMinimumDelayMs, kMinimumDelayMs); + RTC_DCHECK_GT(kBaseMinimumDelayMs, kMaximumDelayMs); + RTC_DCHECK_LT(kMaximumDelayMs, kMaxBufferSizeMs); + + EXPECT_TRUE(dm_.SetMaximumDelay(kMaximumDelayMs)); + EXPECT_TRUE(dm_.SetMinimumDelay(kMinimumDelayMs)); + EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMs)); + EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kMaximumDelayMs); +} + +TEST_F(DelayManagerTest, BaseMinimumDelayLowerThanMaxSize) { + constexpr int kMaximumDelayMs = 400; + constexpr int kBaseMinimumDelayMs = kMaximumDelayMs - 1; + constexpr int kMinimumDelayMs = 20; + + // Base minimum delay is greater than minimum delay, and lower than maximum + // delays that is why it is used. + RTC_DCHECK_GT(kBaseMinimumDelayMs, kMinimumDelayMs); + RTC_DCHECK_LT(kBaseMinimumDelayMs, kMaximumDelayMs); + + EXPECT_TRUE(dm_.SetMaximumDelay(kMaximumDelayMs)); + EXPECT_TRUE(dm_.SetMinimumDelay(kMinimumDelayMs)); + EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMs)); + EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kBaseMinimumDelayMs); +} + +TEST_F(DelayManagerTest, MinimumDelayMemorization) { + // Check that when we increase base minimum delay to value higher than + // minimum delay then minimum delay is still memorized. This allows to + // restore effective minimum delay to memorized minimum delay value when we + // decrease base minimum delay. + constexpr int kBaseMinimumDelayMsLow = 10; + constexpr int kMinimumDelayMs = 20; + constexpr int kBaseMinimumDelayMsHigh = 30; + + EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMsLow)); + EXPECT_TRUE(dm_.SetMinimumDelay(kMinimumDelayMs)); + // Minimum delay is used as it is higher than base minimum delay. + EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kMinimumDelayMs); + + EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMsHigh)); + // Base minimum delay is used as it is now higher than minimum delay. + EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kBaseMinimumDelayMsHigh); + + EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMsLow)); + // Check that minimum delay is memorized and is used again. + EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kMinimumDelayMs); +} + +TEST_F(DelayManagerTest, BaseMinimumDelay) { + // First packet arrival. + Update(0); + + constexpr int kBaseMinimumDelayMs = 7 * kFrameSizeMs; + EXPECT_LT(dm_.TargetDelayMs(), kBaseMinimumDelayMs); + EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMs)); + EXPECT_EQ(dm_.GetBaseMinimumDelay(), kBaseMinimumDelayMs); + + IncreaseTime(kFrameSizeMs); + Update(0); + EXPECT_EQ(dm_.GetBaseMinimumDelay(), kBaseMinimumDelayMs); + EXPECT_EQ(kBaseMinimumDelayMs, dm_.TargetDelayMs()); +} + +TEST_F(DelayManagerTest, Failures) { + // Wrong packet size. + EXPECT_EQ(-1, dm_.SetPacketAudioLength(0)); + EXPECT_EQ(-1, dm_.SetPacketAudioLength(-1)); + + // Minimum delay higher than a maximum delay is not accepted. + EXPECT_TRUE(dm_.SetMaximumDelay(20)); + EXPECT_FALSE(dm_.SetMinimumDelay(40)); + + // Maximum delay less than minimum delay is not accepted. + EXPECT_TRUE(dm_.SetMaximumDelay(100)); + EXPECT_TRUE(dm_.SetMinimumDelay(80)); + EXPECT_FALSE(dm_.SetMaximumDelay(60)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/dsp_helper.cc b/third_party/libwebrtc/modules/audio_coding/neteq/dsp_helper.cc new file mode 100644 index 0000000000..a979f94214 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/dsp_helper.cc @@ -0,0 +1,373 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/dsp_helper.h" + +#include <string.h> // Access to memset. + +#include <algorithm> // Access to min, max. + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +namespace webrtc { + +// Table of constants used in method DspHelper::ParabolicFit(). +const int16_t DspHelper::kParabolaCoefficients[17][3] = { + {120, 32, 64}, {140, 44, 75}, {150, 50, 80}, {160, 57, 85}, + {180, 72, 96}, {200, 89, 107}, {210, 98, 112}, {220, 108, 117}, + {240, 128, 128}, {260, 150, 139}, {270, 162, 144}, {280, 174, 149}, + {300, 200, 160}, {320, 228, 171}, {330, 242, 176}, {340, 257, 181}, + {360, 288, 192}}; + +// Filter coefficients used when downsampling from the indicated sample rates +// (8, 16, 32, 48 kHz) to 4 kHz. Coefficients are in Q12. The corresponding Q0 +// values are provided in the comments before each array. + +// Q0 values: {0.3, 0.4, 0.3}. +const int16_t DspHelper::kDownsample8kHzTbl[3] = {1229, 1638, 1229}; + +// Q0 values: {0.15, 0.2, 0.3, 0.2, 0.15}. +const int16_t DspHelper::kDownsample16kHzTbl[5] = {614, 819, 1229, 819, 614}; + +// Q0 values: {0.1425, 0.1251, 0.1525, 0.1628, 0.1525, 0.1251, 0.1425}. +const int16_t DspHelper::kDownsample32kHzTbl[7] = {584, 512, 625, 667, + 625, 512, 584}; + +// Q0 values: {0.2487, 0.0952, 0.1042, 0.1074, 0.1042, 0.0952, 0.2487}. +const int16_t DspHelper::kDownsample48kHzTbl[7] = {1019, 390, 427, 440, + 427, 390, 1019}; + +int DspHelper::RampSignal(const int16_t* input, + size_t length, + int factor, + int increment, + int16_t* output) { + int factor_q20 = (factor << 6) + 32; + // TODO(hlundin): Add 32 to factor_q20 when converting back to Q14? + for (size_t i = 0; i < length; ++i) { + output[i] = (factor * input[i] + 8192) >> 14; + factor_q20 += increment; + factor_q20 = std::max(factor_q20, 0); // Never go negative. + factor = std::min(factor_q20 >> 6, 16384); + } + return factor; +} + +int DspHelper::RampSignal(int16_t* signal, + size_t length, + int factor, + int increment) { + return RampSignal(signal, length, factor, increment, signal); +} + +int DspHelper::RampSignal(AudioVector* signal, + size_t start_index, + size_t length, + int factor, + int increment) { + int factor_q20 = (factor << 6) + 32; + // TODO(hlundin): Add 32 to factor_q20 when converting back to Q14? + for (size_t i = start_index; i < start_index + length; ++i) { + (*signal)[i] = (factor * (*signal)[i] + 8192) >> 14; + factor_q20 += increment; + factor_q20 = std::max(factor_q20, 0); // Never go negative. + factor = std::min(factor_q20 >> 6, 16384); + } + return factor; +} + +int DspHelper::RampSignal(AudioMultiVector* signal, + size_t start_index, + size_t length, + int factor, + int increment) { + RTC_DCHECK_LE(start_index + length, signal->Size()); + if (start_index + length > signal->Size()) { + // Wrong parameters. Do nothing and return the scale factor unaltered. + return factor; + } + int end_factor = 0; + // Loop over the channels, starting at the same `factor` each time. + for (size_t channel = 0; channel < signal->Channels(); ++channel) { + end_factor = + RampSignal(&(*signal)[channel], start_index, length, factor, increment); + } + return end_factor; +} + +void DspHelper::PeakDetection(int16_t* data, + size_t data_length, + size_t num_peaks, + int fs_mult, + size_t* peak_index, + int16_t* peak_value) { + size_t min_index = 0; + size_t max_index = 0; + + for (size_t i = 0; i <= num_peaks - 1; i++) { + if (num_peaks == 1) { + // Single peak. The parabola fit assumes that an extra point is + // available; worst case it gets a zero on the high end of the signal. + // TODO(hlundin): This can potentially get much worse. It breaks the + // API contract, that the length of `data` is `data_length`. + data_length++; + } + + peak_index[i] = WebRtcSpl_MaxIndexW16(data, data_length - 1); + + if (i != num_peaks - 1) { + min_index = (peak_index[i] > 2) ? (peak_index[i] - 2) : 0; + max_index = std::min(data_length - 1, peak_index[i] + 2); + } + + if ((peak_index[i] != 0) && (peak_index[i] != (data_length - 2))) { + ParabolicFit(&data[peak_index[i] - 1], fs_mult, &peak_index[i], + &peak_value[i]); + } else { + if (peak_index[i] == data_length - 2) { + if (data[peak_index[i]] > data[peak_index[i] + 1]) { + ParabolicFit(&data[peak_index[i] - 1], fs_mult, &peak_index[i], + &peak_value[i]); + } else if (data[peak_index[i]] <= data[peak_index[i] + 1]) { + // Linear approximation. + peak_value[i] = (data[peak_index[i]] + data[peak_index[i] + 1]) >> 1; + peak_index[i] = (peak_index[i] * 2 + 1) * fs_mult; + } + } else { + peak_value[i] = data[peak_index[i]]; + peak_index[i] = peak_index[i] * 2 * fs_mult; + } + } + + if (i != num_peaks - 1) { + memset(&data[min_index], 0, + sizeof(data[0]) * (max_index - min_index + 1)); + } + } +} + +void DspHelper::ParabolicFit(int16_t* signal_points, + int fs_mult, + size_t* peak_index, + int16_t* peak_value) { + uint16_t fit_index[13]; + if (fs_mult == 1) { + fit_index[0] = 0; + fit_index[1] = 8; + fit_index[2] = 16; + } else if (fs_mult == 2) { + fit_index[0] = 0; + fit_index[1] = 4; + fit_index[2] = 8; + fit_index[3] = 12; + fit_index[4] = 16; + } else if (fs_mult == 4) { + fit_index[0] = 0; + fit_index[1] = 2; + fit_index[2] = 4; + fit_index[3] = 6; + fit_index[4] = 8; + fit_index[5] = 10; + fit_index[6] = 12; + fit_index[7] = 14; + fit_index[8] = 16; + } else { + fit_index[0] = 0; + fit_index[1] = 1; + fit_index[2] = 3; + fit_index[3] = 4; + fit_index[4] = 5; + fit_index[5] = 7; + fit_index[6] = 8; + fit_index[7] = 9; + fit_index[8] = 11; + fit_index[9] = 12; + fit_index[10] = 13; + fit_index[11] = 15; + fit_index[12] = 16; + } + + // num = -3 * signal_points[0] + 4 * signal_points[1] - signal_points[2]; + // den = signal_points[0] - 2 * signal_points[1] + signal_points[2]; + int32_t num = + (signal_points[0] * -3) + (signal_points[1] * 4) - signal_points[2]; + int32_t den = signal_points[0] + (signal_points[1] * -2) + signal_points[2]; + int32_t temp = num * 120; + int flag = 1; + int16_t stp = kParabolaCoefficients[fit_index[fs_mult]][0] - + kParabolaCoefficients[fit_index[fs_mult - 1]][0]; + int16_t strt = (kParabolaCoefficients[fit_index[fs_mult]][0] + + kParabolaCoefficients[fit_index[fs_mult - 1]][0]) / + 2; + int16_t lmt; + if (temp < -den * strt) { + lmt = strt - stp; + while (flag) { + if ((flag == fs_mult) || (temp > -den * lmt)) { + *peak_value = + (den * kParabolaCoefficients[fit_index[fs_mult - flag]][1] + + num * kParabolaCoefficients[fit_index[fs_mult - flag]][2] + + signal_points[0] * 256) / + 256; + *peak_index = *peak_index * 2 * fs_mult - flag; + flag = 0; + } else { + flag++; + lmt -= stp; + } + } + } else if (temp > -den * (strt + stp)) { + lmt = strt + 2 * stp; + while (flag) { + if ((flag == fs_mult) || (temp < -den * lmt)) { + int32_t temp_term_1 = + den * kParabolaCoefficients[fit_index[fs_mult + flag]][1]; + int32_t temp_term_2 = + num * kParabolaCoefficients[fit_index[fs_mult + flag]][2]; + int32_t temp_term_3 = signal_points[0] * 256; + *peak_value = (temp_term_1 + temp_term_2 + temp_term_3) / 256; + *peak_index = *peak_index * 2 * fs_mult + flag; + flag = 0; + } else { + flag++; + lmt += stp; + } + } + } else { + *peak_value = signal_points[1]; + *peak_index = *peak_index * 2 * fs_mult; + } +} + +size_t DspHelper::MinDistortion(const int16_t* signal, + size_t min_lag, + size_t max_lag, + size_t length, + int32_t* distortion_value) { + size_t best_index = 0; + int32_t min_distortion = WEBRTC_SPL_WORD32_MAX; + for (size_t i = min_lag; i <= max_lag; i++) { + int32_t sum_diff = 0; + const int16_t* data1 = signal; + const int16_t* data2 = signal - i; + for (size_t j = 0; j < length; j++) { + sum_diff += WEBRTC_SPL_ABS_W32(data1[j] - data2[j]); + } + // Compare with previous minimum. + if (sum_diff < min_distortion) { + min_distortion = sum_diff; + best_index = i; + } + } + *distortion_value = min_distortion; + return best_index; +} + +void DspHelper::CrossFade(const int16_t* input1, + const int16_t* input2, + size_t length, + int16_t* mix_factor, + int16_t factor_decrement, + int16_t* output) { + int16_t factor = *mix_factor; + int16_t complement_factor = 16384 - factor; + for (size_t i = 0; i < length; i++) { + output[i] = + (factor * input1[i] + complement_factor * input2[i] + 8192) >> 14; + factor -= factor_decrement; + complement_factor += factor_decrement; + } + *mix_factor = factor; +} + +void DspHelper::UnmuteSignal(const int16_t* input, + size_t length, + int16_t* factor, + int increment, + int16_t* output) { + uint16_t factor_16b = *factor; + int32_t factor_32b = (static_cast<int32_t>(factor_16b) << 6) + 32; + for (size_t i = 0; i < length; i++) { + output[i] = (factor_16b * input[i] + 8192) >> 14; + factor_32b = std::max(factor_32b + increment, 0); + factor_16b = std::min(16384, factor_32b >> 6); + } + *factor = factor_16b; +} + +void DspHelper::MuteSignal(int16_t* signal, int mute_slope, size_t length) { + int32_t factor = (16384 << 6) + 32; + for (size_t i = 0; i < length; i++) { + signal[i] = ((factor >> 6) * signal[i] + 8192) >> 14; + factor -= mute_slope; + } +} + +int DspHelper::DownsampleTo4kHz(const int16_t* input, + size_t input_length, + size_t output_length, + int input_rate_hz, + bool compensate_delay, + int16_t* output) { + // Set filter parameters depending on input frequency. + // NOTE: The phase delay values are wrong compared to the true phase delay + // of the filters. However, the error is preserved (through the +1 term) for + // consistency. + const int16_t* filter_coefficients; // Filter coefficients. + size_t filter_length; // Number of coefficients. + size_t filter_delay; // Phase delay in samples. + int16_t factor; // Conversion rate (inFsHz / 8000). + switch (input_rate_hz) { + case 8000: { + filter_length = 3; + factor = 2; + filter_coefficients = kDownsample8kHzTbl; + filter_delay = 1 + 1; + break; + } + case 16000: { + filter_length = 5; + factor = 4; + filter_coefficients = kDownsample16kHzTbl; + filter_delay = 2 + 1; + break; + } + case 32000: { + filter_length = 7; + factor = 8; + filter_coefficients = kDownsample32kHzTbl; + filter_delay = 3 + 1; + break; + } + case 48000: { + filter_length = 7; + factor = 12; + filter_coefficients = kDownsample48kHzTbl; + filter_delay = 3 + 1; + break; + } + default: { + RTC_DCHECK_NOTREACHED(); + return -1; + } + } + + if (!compensate_delay) { + // Disregard delay compensation. + filter_delay = 0; + } + + // Returns -1 if input signal is too short; 0 otherwise. + return WebRtcSpl_DownsampleFast( + &input[filter_length - 1], input_length - filter_length + 1, output, + output_length, filter_coefficients, filter_length, factor, filter_delay); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/dsp_helper.h b/third_party/libwebrtc/modules/audio_coding/neteq/dsp_helper.h new file mode 100644 index 0000000000..4aead7df18 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/dsp_helper.h @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_DSP_HELPER_H_ +#define MODULES_AUDIO_CODING_NETEQ_DSP_HELPER_H_ + +#include <stdint.h> +#include <string.h> + +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "modules/audio_coding/neteq/audio_vector.h" + +namespace webrtc { + +// This class contains various signal processing functions, all implemented as +// static methods. +class DspHelper { + public: + // Filter coefficients used when downsampling from the indicated sample rates + // (8, 16, 32, 48 kHz) to 4 kHz. Coefficients are in Q12. + static const int16_t kDownsample8kHzTbl[3]; + static const int16_t kDownsample16kHzTbl[5]; + static const int16_t kDownsample32kHzTbl[7]; + static const int16_t kDownsample48kHzTbl[7]; + + // Constants used to mute and unmute over 5 samples. The coefficients are + // in Q15. + static const int kMuteFactorStart8kHz = 27307; + static const int kMuteFactorIncrement8kHz = -5461; + static const int kUnmuteFactorStart8kHz = 5461; + static const int kUnmuteFactorIncrement8kHz = 5461; + static const int kMuteFactorStart16kHz = 29789; + static const int kMuteFactorIncrement16kHz = -2979; + static const int kUnmuteFactorStart16kHz = 2979; + static const int kUnmuteFactorIncrement16kHz = 2979; + static const int kMuteFactorStart32kHz = 31208; + static const int kMuteFactorIncrement32kHz = -1560; + static const int kUnmuteFactorStart32kHz = 1560; + static const int kUnmuteFactorIncrement32kHz = 1560; + static const int kMuteFactorStart48kHz = 31711; + static const int kMuteFactorIncrement48kHz = -1057; + static const int kUnmuteFactorStart48kHz = 1057; + static const int kUnmuteFactorIncrement48kHz = 1057; + + // Multiplies the signal with a gradually changing factor. + // The first sample is multiplied with `factor` (in Q14). For each sample, + // `factor` is increased (additive) by the `increment` (in Q20), which can + // be negative. Returns the scale factor after the last increment. + static int RampSignal(const int16_t* input, + size_t length, + int factor, + int increment, + int16_t* output); + + // Same as above, but with the samples of `signal` being modified in-place. + static int RampSignal(int16_t* signal, + size_t length, + int factor, + int increment); + + // Same as above, but processes `length` samples from `signal`, starting at + // `start_index`. + static int RampSignal(AudioVector* signal, + size_t start_index, + size_t length, + int factor, + int increment); + + // Same as above, but for an AudioMultiVector. + static int RampSignal(AudioMultiVector* signal, + size_t start_index, + size_t length, + int factor, + int increment); + + // Peak detection with parabolic fit. Looks for `num_peaks` maxima in `data`, + // having length `data_length` and sample rate multiplier `fs_mult`. The peak + // locations and values are written to the arrays `peak_index` and + // `peak_value`, respectively. Both arrays must hold at least `num_peaks` + // elements. + static void PeakDetection(int16_t* data, + size_t data_length, + size_t num_peaks, + int fs_mult, + size_t* peak_index, + int16_t* peak_value); + + // Estimates the height and location of a maximum. The three values in the + // array `signal_points` are used as basis for a parabolic fit, which is then + // used to find the maximum in an interpolated signal. The `signal_points` are + // assumed to be from a 4 kHz signal, while the maximum, written to + // `peak_index` and `peak_value` is given in the full sample rate, as + // indicated by the sample rate multiplier `fs_mult`. + static void ParabolicFit(int16_t* signal_points, + int fs_mult, + size_t* peak_index, + int16_t* peak_value); + + // Calculates the sum-abs-diff for `signal` when compared to a displaced + // version of itself. Returns the displacement lag that results in the minimum + // distortion. The resulting distortion is written to `distortion_value`. + // The values of `min_lag` and `max_lag` are boundaries for the search. + static size_t MinDistortion(const int16_t* signal, + size_t min_lag, + size_t max_lag, + size_t length, + int32_t* distortion_value); + + // Mixes `length` samples from `input1` and `input2` together and writes the + // result to `output`. The gain for `input1` starts at `mix_factor` (Q14) and + // is decreased by `factor_decrement` (Q14) for each sample. The gain for + // `input2` is the complement 16384 - mix_factor. + static void CrossFade(const int16_t* input1, + const int16_t* input2, + size_t length, + int16_t* mix_factor, + int16_t factor_decrement, + int16_t* output); + + // Scales `input` with an increasing gain. Applies `factor` (Q14) to the first + // sample and increases the gain by `increment` (Q20) for each sample. The + // result is written to `output`. `length` samples are processed. + static void UnmuteSignal(const int16_t* input, + size_t length, + int16_t* factor, + int increment, + int16_t* output); + + // Starts at unity gain and gradually fades out `signal`. For each sample, + // the gain is reduced by `mute_slope` (Q14). `length` samples are processed. + static void MuteSignal(int16_t* signal, int mute_slope, size_t length); + + // Downsamples `input` from `sample_rate_hz` to 4 kHz sample rate. The input + // has `input_length` samples, and the method will write `output_length` + // samples to `output`. Compensates for the phase delay of the downsampling + // filters if `compensate_delay` is true. Returns -1 if the input is too short + // to produce `output_length` samples, otherwise 0. + static int DownsampleTo4kHz(const int16_t* input, + size_t input_length, + size_t output_length, + int input_rate_hz, + bool compensate_delay, + int16_t* output); + + DspHelper(const DspHelper&) = delete; + DspHelper& operator=(const DspHelper&) = delete; + + private: + // Table of constants used in method DspHelper::ParabolicFit(). + static const int16_t kParabolaCoefficients[17][3]; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_DSP_HELPER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/dsp_helper_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/dsp_helper_unittest.cc new file mode 100644 index 0000000000..09247417d3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/dsp_helper_unittest.cc @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/dsp_helper.h" + +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(DspHelper, RampSignalArray) { + static const int kLen = 100; + int16_t input[kLen]; + int16_t output[kLen]; + // Fill input with 1000. + for (int i = 0; i < kLen; ++i) { + input[i] = 1000; + } + int start_factor = 0; + // Ramp from 0 to 1 (in Q14) over the array. Note that `increment` is in Q20, + // while the factor is in Q14, hence the shift by 6. + int increment = (16384 << 6) / kLen; + + // Test first method. + int stop_factor = + DspHelper::RampSignal(input, kLen, start_factor, increment, output); + EXPECT_EQ(16383, stop_factor); // Almost reach 1 in Q14. + for (int i = 0; i < kLen; ++i) { + EXPECT_EQ(1000 * i / kLen, output[i]); + } + + // Test second method. (Note that this modifies `input`.) + stop_factor = DspHelper::RampSignal(input, kLen, start_factor, increment); + EXPECT_EQ(16383, stop_factor); // Almost reach 1 in Q14. + for (int i = 0; i < kLen; ++i) { + EXPECT_EQ(1000 * i / kLen, input[i]); + } +} + +TEST(DspHelper, RampSignalAudioMultiVector) { + static const int kLen = 100; + static const int kChannels = 5; + AudioMultiVector input(kChannels, kLen * 3); + // Fill input with 1000. + for (int i = 0; i < kLen * 3; ++i) { + for (int channel = 0; channel < kChannels; ++channel) { + input[channel][i] = 1000; + } + } + // We want to start ramping at `start_index` and keep ramping for `kLen` + // samples. + int start_index = kLen; + int start_factor = 0; + // Ramp from 0 to 1 (in Q14) in `kLen` samples. Note that `increment` is in + // Q20, while the factor is in Q14, hence the shift by 6. + int increment = (16384 << 6) / kLen; + + int stop_factor = + DspHelper::RampSignal(&input, start_index, kLen, start_factor, increment); + EXPECT_EQ(16383, stop_factor); // Almost reach 1 in Q14. + // Verify that the first `kLen` samples are left untouched. + int i; + for (i = 0; i < kLen; ++i) { + for (int channel = 0; channel < kChannels; ++channel) { + EXPECT_EQ(1000, input[channel][i]); + } + } + // Verify that the next block of `kLen` samples are ramped. + for (; i < 2 * kLen; ++i) { + for (int channel = 0; channel < kChannels; ++channel) { + EXPECT_EQ(1000 * (i - kLen) / kLen, input[channel][i]); + } + } + // Verify the last `kLen` samples are left untouched. + for (; i < 3 * kLen; ++i) { + for (int channel = 0; channel < kChannels; ++channel) { + EXPECT_EQ(1000, input[channel][i]); + } + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_buffer.cc b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_buffer.cc new file mode 100644 index 0000000000..115bfcf97b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_buffer.cc @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/dtmf_buffer.h" + +#include <algorithm> // max + +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +// Modify the code to obtain backwards bit-exactness. Once bit-exactness is no +// longer required, this #define should be removed (and the code that it +// enables). +#define LEGACY_BITEXACT + +namespace webrtc { + +DtmfBuffer::DtmfBuffer(int fs_hz) { + SetSampleRate(fs_hz); +} + +DtmfBuffer::~DtmfBuffer() = default; + +void DtmfBuffer::Flush() { + buffer_.clear(); +} + +// The ParseEvent method parses 4 bytes from `payload` according to this format +// from RFC 4733: +// +// 0 1 2 3 +// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// | event |E|R| volume | duration | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// +// Legend (adapted from RFC 4733) +// - event: The event field is a number between 0 and 255 identifying a +// specific telephony event. The buffer will not accept any event +// numbers larger than 15. +// - E: If set to a value of one, the "end" bit indicates that this +// packet contains the end of the event. For long-lasting events +// that have to be split into segments, only the final packet for +// the final segment will have the E bit set. +// - R: Reserved. +// - volume: For DTMF digits and other events representable as tones, this +// field describes the power level of the tone, expressed in dBm0 +// after dropping the sign. Power levels range from 0 to -63 dBm0. +// Thus, larger values denote lower volume. The buffer discards +// values larger than 36 (i.e., lower than -36 dBm0). +// - duration: The duration field indicates the duration of the event or segment +// being reported, in timestamp units, expressed as an unsigned +// integer in network byte order. For a non-zero value, the event +// or segment began at the instant identified by the RTP timestamp +// and has so far lasted as long as indicated by this parameter. +// The event may or may not have ended. If the event duration +// exceeds the maximum representable by the duration field, the +// event is split into several contiguous segments. The buffer will +// discard zero-duration events. +// +int DtmfBuffer::ParseEvent(uint32_t rtp_timestamp, + const uint8_t* payload, + size_t payload_length_bytes, + DtmfEvent* event) { + RTC_CHECK(payload); + RTC_CHECK(event); + if (payload_length_bytes < 4) { + RTC_LOG(LS_WARNING) << "ParseEvent payload too short"; + return kPayloadTooShort; + } + + event->event_no = payload[0]; + event->end_bit = ((payload[1] & 0x80) != 0); + event->volume = (payload[1] & 0x3F); + event->duration = payload[2] << 8 | payload[3]; + event->timestamp = rtp_timestamp; + return kOK; +} + +// Inserts a DTMF event into the buffer. The event should be parsed from the +// bit stream using the ParseEvent method above before inserting it in the +// buffer. +// DTMF events can be quite long, and in most cases the duration of the event +// is not known when the first packet describing it is sent. To deal with that, +// the RFC 4733 specifies that multiple packets are sent for one and the same +// event as it is being created (typically, as the user is pressing the key). +// These packets will all share the same start timestamp and event number, +// while the duration will be the cumulative duration from the start. When +// inserting a new event, the InsertEvent method tries to find a matching event +// already in the buffer. If so, the new event is simply merged with the +// existing one. +int DtmfBuffer::InsertEvent(const DtmfEvent& event) { + if (event.event_no < 0 || event.event_no > 15 || event.volume < 0 || + event.volume > 63 || event.duration <= 0 || event.duration > 65535) { + RTC_LOG(LS_WARNING) << "InsertEvent invalid parameters"; + return kInvalidEventParameters; + } + DtmfList::iterator it = buffer_.begin(); + while (it != buffer_.end()) { + if (MergeEvents(it, event)) { + // A matching event was found and the new event was merged. + return kOK; + } + ++it; + } + buffer_.push_back(event); + // Sort the buffer using CompareEvents to rank the events. + buffer_.sort(CompareEvents); + return kOK; +} + +bool DtmfBuffer::GetEvent(uint32_t current_timestamp, DtmfEvent* event) { + DtmfList::iterator it = buffer_.begin(); + while (it != buffer_.end()) { + // `event_end` is an estimate of where the current event ends. If the end + // bit is set, we know that the event ends at `timestamp` + `duration`. + uint32_t event_end = it->timestamp + it->duration; +#ifdef LEGACY_BITEXACT + bool next_available = false; +#endif + if (!it->end_bit) { + // If the end bit is not set, we allow extrapolation of the event for + // some time. + event_end += max_extrapolation_samples_; + DtmfList::iterator next = it; + ++next; + if (next != buffer_.end()) { + // If there is a next event in the buffer, we will not extrapolate over + // the start of that new event. + event_end = std::min(event_end, next->timestamp); +#ifdef LEGACY_BITEXACT + next_available = true; +#endif + } + } + if (current_timestamp >= it->timestamp && + current_timestamp <= event_end) { // TODO(hlundin): Change to <. + // Found a matching event. + if (event) { + event->event_no = it->event_no; + event->end_bit = it->end_bit; + event->volume = it->volume; + event->duration = it->duration; + event->timestamp = it->timestamp; + } +#ifdef LEGACY_BITEXACT + if (it->end_bit && current_timestamp + frame_len_samples_ >= event_end) { + // We are done playing this. Erase the event. + buffer_.erase(it); + } +#endif + return true; + } else if (current_timestamp > event_end) { // TODO(hlundin): Change to >=. +// Erase old event. Operation returns a valid pointer to the next element +// in the list. +#ifdef LEGACY_BITEXACT + if (!next_available) { + if (event) { + event->event_no = it->event_no; + event->end_bit = it->end_bit; + event->volume = it->volume; + event->duration = it->duration; + event->timestamp = it->timestamp; + } + it = buffer_.erase(it); + return true; + } else { + it = buffer_.erase(it); + } +#else + it = buffer_.erase(it); +#endif + } else { + ++it; + } + } + return false; +} + +size_t DtmfBuffer::Length() const { + return buffer_.size(); +} + +bool DtmfBuffer::Empty() const { + return buffer_.empty(); +} + +int DtmfBuffer::SetSampleRate(int fs_hz) { + if (fs_hz != 8000 && + fs_hz != 16000 && + fs_hz != 32000 && + fs_hz != 44100 && + fs_hz != 48000) { + return kInvalidSampleRate; + } + max_extrapolation_samples_ = 7 * fs_hz / 100; + frame_len_samples_ = fs_hz / 100; + return kOK; +} + +// The method returns true if the two events are considered to be the same. +// The are defined as equal if they share the same timestamp and event number. +// The special case with long-lasting events that have to be split into segments +// is not handled in this method. These will be treated as separate events in +// the buffer. +bool DtmfBuffer::SameEvent(const DtmfEvent& a, const DtmfEvent& b) { + return (a.event_no == b.event_no) && (a.timestamp == b.timestamp); +} + +bool DtmfBuffer::MergeEvents(DtmfList::iterator it, const DtmfEvent& event) { + if (SameEvent(*it, event)) { + if (!it->end_bit) { + // Do not extend the duration of an event for which the end bit was + // already received. + it->duration = std::max(event.duration, it->duration); + } + if (event.end_bit) { + it->end_bit = true; + } + return true; + } else { + return false; + } +} + +// Returns true if `a` goes before `b` in the sorting order ("`a` < `b`"). +// The events are ranked using their start timestamp (taking wrap-around into +// account). In the unlikely situation that two events share the same start +// timestamp, the event number is used to rank the two. Note that packets +// that belong to the same events, and therefore sharing the same start +// timestamp, have already been merged before the sort method is called. +bool DtmfBuffer::CompareEvents(const DtmfEvent& a, const DtmfEvent& b) { + if (a.timestamp == b.timestamp) { + return a.event_no < b.event_no; + } + // Take wrap-around into account. + return (static_cast<uint32_t>(b.timestamp - a.timestamp) < 0xFFFFFFFF / 2); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_buffer.h b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_buffer.h new file mode 100644 index 0000000000..62b751525c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_buffer.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_DTMF_BUFFER_H_ +#define MODULES_AUDIO_CODING_NETEQ_DTMF_BUFFER_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <list> + +namespace webrtc { + +struct DtmfEvent { + uint32_t timestamp; + int event_no; + int volume; + int duration; + bool end_bit; + + // Constructors + DtmfEvent() + : timestamp(0), event_no(0), volume(0), duration(0), end_bit(false) {} + DtmfEvent(uint32_t ts, int ev, int vol, int dur, bool end) + : timestamp(ts), event_no(ev), volume(vol), duration(dur), end_bit(end) {} +}; + +// This is the buffer holding DTMF events while waiting for them to be played. +class DtmfBuffer { + public: + enum BufferReturnCodes { + kOK = 0, + kInvalidPointer, + kPayloadTooShort, + kInvalidEventParameters, + kInvalidSampleRate + }; + + // Set up the buffer for use at sample rate `fs_hz`. + explicit DtmfBuffer(int fs_hz); + + virtual ~DtmfBuffer(); + + DtmfBuffer(const DtmfBuffer&) = delete; + DtmfBuffer& operator=(const DtmfBuffer&) = delete; + + // Flushes the buffer. + virtual void Flush(); + + // Static method to parse 4 bytes from `payload` as a DTMF event (RFC 4733) + // and write the parsed information into the struct `event`. Input variable + // `rtp_timestamp` is simply copied into the struct. + static int ParseEvent(uint32_t rtp_timestamp, + const uint8_t* payload, + size_t payload_length_bytes, + DtmfEvent* event); + + // Inserts `event` into the buffer. The method looks for a matching event and + // merges the two if a match is found. + virtual int InsertEvent(const DtmfEvent& event); + + // Checks if a DTMF event should be played at time `current_timestamp`. If so, + // the method returns true; otherwise false. The parameters of the event to + // play will be written to `event`. + virtual bool GetEvent(uint32_t current_timestamp, DtmfEvent* event); + + // Number of events in the buffer. + virtual size_t Length() const; + + virtual bool Empty() const; + + // Set a new sample rate. + virtual int SetSampleRate(int fs_hz); + + private: + typedef std::list<DtmfEvent> DtmfList; + + int max_extrapolation_samples_; + int frame_len_samples_; // TODO(hlundin): Remove this later. + + // Compares two events and returns true if they are the same. + static bool SameEvent(const DtmfEvent& a, const DtmfEvent& b); + + // Merges `event` to the event pointed out by `it`. The method checks that + // the two events are the same (using the SameEvent method), and merges them + // if that was the case, returning true. If the events are not the same, false + // is returned. + bool MergeEvents(DtmfList::iterator it, const DtmfEvent& event); + + // Method used by the sort algorithm to rank events in the buffer. + static bool CompareEvents(const DtmfEvent& a, const DtmfEvent& b); + + DtmfList buffer_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_DTMF_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_buffer_unittest.cc new file mode 100644 index 0000000000..83745b6c09 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_buffer_unittest.cc @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/dtmf_buffer.h" + +#ifdef WIN32 +#include <winsock2.h> // ntohl() +#else +#include <arpa/inet.h> // ntohl() +#endif + +#include <iostream> + +#include "test/gtest.h" + +// Modify the tests so that they pass with the modifications done to DtmfBuffer +// for backwards bit-exactness. Once bit-exactness is no longer required, this +// #define should be removed (and the code that it enables). +#define LEGACY_BITEXACT + +namespace webrtc { + +static int sample_rate_hz = 8000; + +static uint32_t MakeDtmfPayload(int event, bool end, int volume, int duration) { + uint32_t payload = 0; + // 0 1 2 3 + // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // | event |E|R| volume | duration | + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + payload |= (event & 0x00FF) << 24; + payload |= (end ? 0x00800000 : 0x00000000); + payload |= (volume & 0x003F) << 16; + payload |= (duration & 0xFFFF); + payload = ntohl(payload); + return payload; +} + +static bool EqualEvents(const DtmfEvent& a, const DtmfEvent& b) { + return (a.duration == b.duration && a.end_bit == b.end_bit && + a.event_no == b.event_no && a.timestamp == b.timestamp && + a.volume == b.volume); +} + +TEST(DtmfBuffer, CreateAndDestroy) { + DtmfBuffer* buffer = new DtmfBuffer(sample_rate_hz); + delete buffer; +} + +// Test the event parser. +TEST(DtmfBuffer, ParseEvent) { + int event_no = 7; + bool end_bit = true; + int volume = 17; + int duration = 4711; + uint32_t timestamp = 0x12345678; + uint32_t payload = MakeDtmfPayload(event_no, end_bit, volume, duration); + uint8_t* payload_ptr = reinterpret_cast<uint8_t*>(&payload); + DtmfEvent event; + EXPECT_EQ(DtmfBuffer::kOK, DtmfBuffer::ParseEvent(timestamp, payload_ptr, + sizeof(payload), &event)); + EXPECT_EQ(duration, event.duration); + EXPECT_EQ(end_bit, event.end_bit); + EXPECT_EQ(event_no, event.event_no); + EXPECT_EQ(timestamp, event.timestamp); + EXPECT_EQ(volume, event.volume); + + EXPECT_EQ(DtmfBuffer::kPayloadTooShort, + DtmfBuffer::ParseEvent(timestamp, payload_ptr, 3, &event)); +} + +TEST(DtmfBuffer, SimpleInsertAndGet) { + int event_no = 7; + bool end_bit = true; + int volume = 17; + int duration = 4711; + uint32_t timestamp = 0x12345678; + DtmfEvent event(timestamp, event_no, volume, duration, end_bit); + DtmfBuffer buffer(sample_rate_hz); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event)); + EXPECT_EQ(1u, buffer.Length()); + EXPECT_FALSE(buffer.Empty()); + DtmfEvent out_event; + // Too early to get event. + EXPECT_FALSE(buffer.GetEvent(timestamp - 10, &out_event)); + EXPECT_EQ(1u, buffer.Length()); + EXPECT_FALSE(buffer.Empty()); + // Get the event at its starting timestamp. + EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event)); + EXPECT_TRUE(EqualEvents(event, out_event)); + EXPECT_EQ(1u, buffer.Length()); + EXPECT_FALSE(buffer.Empty()); + // Get the event some time into the event. + EXPECT_TRUE(buffer.GetEvent(timestamp + duration / 2, &out_event)); + EXPECT_TRUE(EqualEvents(event, out_event)); + EXPECT_EQ(1u, buffer.Length()); + EXPECT_FALSE(buffer.Empty()); +// Give a "current" timestamp after the event has ended. +#ifdef LEGACY_BITEXACT + EXPECT_TRUE(buffer.GetEvent(timestamp + duration + 10, &out_event)); +#endif + EXPECT_FALSE(buffer.GetEvent(timestamp + duration + 10, &out_event)); + EXPECT_EQ(0u, buffer.Length()); + EXPECT_TRUE(buffer.Empty()); +} + +TEST(DtmfBuffer, MergingPackets) { + int event_no = 0; + bool end_bit = false; + int volume = 17; + int duration = 80; + uint32_t timestamp = 0x12345678; + DtmfEvent event(timestamp, event_no, volume, duration, end_bit); + DtmfBuffer buffer(sample_rate_hz); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event)); + + event.duration += 80; + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event)); + + event.duration += 80; + event.end_bit = true; + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event)); + + EXPECT_EQ(1u, buffer.Length()); + + DtmfEvent out_event; + EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event)); + EXPECT_TRUE(EqualEvents(event, out_event)); +} + +// This test case inserts one shorter event completely overlapped by one longer +// event. The expected outcome is that only the longer event is played. +TEST(DtmfBuffer, OverlappingEvents) { + int event_no = 0; + bool end_bit = true; + int volume = 1; + int duration = 80; + uint32_t timestamp = 0x12345678 + 80; + DtmfEvent short_event(timestamp, event_no, volume, duration, end_bit); + DtmfBuffer buffer(sample_rate_hz); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(short_event)); + + event_no = 10; + end_bit = false; + timestamp = 0x12345678; + DtmfEvent long_event(timestamp, event_no, volume, duration, end_bit); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(long_event)); + + long_event.duration += 80; + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(long_event)); + + long_event.duration += 80; + long_event.end_bit = true; + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(long_event)); + + EXPECT_EQ(2u, buffer.Length()); + + DtmfEvent out_event; + // Expect to get the long event. + EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event)); + EXPECT_TRUE(EqualEvents(long_event, out_event)); +// Expect no more events. +#ifdef LEGACY_BITEXACT + EXPECT_TRUE( + buffer.GetEvent(timestamp + long_event.duration + 10, &out_event)); + EXPECT_TRUE(EqualEvents(long_event, out_event)); + EXPECT_TRUE( + buffer.GetEvent(timestamp + long_event.duration + 10, &out_event)); + EXPECT_TRUE(EqualEvents(short_event, out_event)); +#else + EXPECT_FALSE( + buffer.GetEvent(timestamp + long_event.duration + 10, &out_event)); +#endif + EXPECT_TRUE(buffer.Empty()); +} + +TEST(DtmfBuffer, ExtrapolationTime) { + int event_no = 0; + bool end_bit = false; + int volume = 1; + int duration = 80; + uint32_t timestamp = 0x12345678; + DtmfEvent event1(timestamp, event_no, volume, duration, end_bit); + DtmfBuffer buffer(sample_rate_hz); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event1)); + EXPECT_EQ(1u, buffer.Length()); + + DtmfEvent out_event; + // Get the event at the start. + EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event)); + EXPECT_TRUE(EqualEvents(event1, out_event)); + // Also get the event 100 samples after the end of the event (since we're + // missing the end bit). + uint32_t timestamp_now = timestamp + duration + 100; + EXPECT_TRUE(buffer.GetEvent(timestamp_now, &out_event)); + EXPECT_TRUE(EqualEvents(event1, out_event)); + // Insert another event starting back-to-back with the previous event. + timestamp += duration; + event_no = 1; + DtmfEvent event2(timestamp, event_no, volume, duration, end_bit); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event2)); + EXPECT_EQ(2u, buffer.Length()); + // Now we expect to get the new event when supplying `timestamp_now`. + EXPECT_TRUE(buffer.GetEvent(timestamp_now, &out_event)); + EXPECT_TRUE(EqualEvents(event2, out_event)); + // Expect the the first event to be erased now. + EXPECT_EQ(1u, buffer.Length()); + // Move `timestamp_now` to more than 560 samples after the end of the second + // event. Expect that event to be erased. + timestamp_now = timestamp + duration + 600; +#ifdef LEGACY_BITEXACT + EXPECT_TRUE(buffer.GetEvent(timestamp_now, &out_event)); +#endif + EXPECT_FALSE(buffer.GetEvent(timestamp_now, &out_event)); + EXPECT_TRUE(buffer.Empty()); +} + +TEST(DtmfBuffer, TimestampWraparound) { + int event_no = 0; + bool end_bit = true; + int volume = 1; + int duration = 80; + uint32_t timestamp1 = 0xFFFFFFFF - duration; + DtmfEvent event1(timestamp1, event_no, volume, duration, end_bit); + uint32_t timestamp2 = 0; + DtmfEvent event2(timestamp2, event_no, volume, duration, end_bit); + DtmfBuffer buffer(sample_rate_hz); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event1)); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event2)); + EXPECT_EQ(2u, buffer.Length()); + DtmfEvent out_event; + EXPECT_TRUE(buffer.GetEvent(timestamp1, &out_event)); + EXPECT_TRUE(EqualEvents(event1, out_event)); +#ifdef LEGACY_BITEXACT + EXPECT_EQ(1u, buffer.Length()); +#else + EXPECT_EQ(2u, buffer.Length()); +#endif + + buffer.Flush(); + // Reverse the insert order. Expect same results. + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event2)); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event1)); + EXPECT_EQ(2u, buffer.Length()); + EXPECT_TRUE(buffer.GetEvent(timestamp1, &out_event)); + EXPECT_TRUE(EqualEvents(event1, out_event)); +#ifdef LEGACY_BITEXACT + EXPECT_EQ(1u, buffer.Length()); +#else + EXPECT_EQ(2u, buffer.Length()); +#endif +} + +TEST(DtmfBuffer, InvalidEvents) { + int event_no = 0; + bool end_bit = true; + int volume = 1; + int duration = 80; + uint32_t timestamp = 0x12345678; + DtmfEvent event(timestamp, event_no, volume, duration, end_bit); + DtmfBuffer buffer(sample_rate_hz); + + // Invalid event number. + event.event_no = -1; + EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event)); + event.event_no = 16; + EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event)); + event.event_no = 0; // Valid value; + + // Invalid volume. + event.volume = -1; + EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event)); + event.volume = 64; + EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event)); + event.volume = 0; // Valid value; + + // Invalid duration. + event.duration = -1; + EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event)); + event.duration = 0; + EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event)); + event.duration = 0xFFFF + 1; + EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event)); + event.duration = 1; // Valid value; + + // Finish with a valid event, just to verify that all is ok. + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event)); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_tone_generator.cc b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_tone_generator.cc new file mode 100644 index 0000000000..9061e27c67 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_tone_generator.cc @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This class provides a generator for DTMF tones. The tone generation is based +// on a sinusoid recursion. Each sinusoid is generated using a recursion +// formula; x[n] = a * x[n-1] - x[n-2], where the coefficient +// a = 2*cos(2*pi*f/fs). The recursion is started with x[-1] = 0 and +// x[-2] = sin(2*pi*f/fs). (Note that with this initialization, the resulting +// sinusoid gets a "negative" rotation; x[n] = sin(-2*pi*f/fs * n + phi), but +// kept this way due to historical reasons.) +// TODO(hlundin): Change to positive rotation? +// +// Each key on the telephone keypad corresponds to an "event", 0-15. Each event +// is mapped to a tone pair, with a low and a high frequency. There are four +// low and four high frequencies, each corresponding to a row and column, +// respectively, on the keypad as illustrated below. +// +// 1209 Hz 1336 Hz 1477 Hz 1633 Hz +// 697 Hz 1 2 3 12 +// 770 Hz 4 5 6 13 +// 852 Hz 7 8 9 14 +// 941 Hz 10 0 11 15 + +#include "modules/audio_coding/neteq/dtmf_tone_generator.h" + +#include "modules/audio_coding/neteq/audio_vector.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +// The filter coefficient a = 2*cos(2*pi*f/fs) for the low frequency tone, for +// sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0 through 15. +// Values are in Q14. +const int DtmfToneGenerator::kCoeff1[4][16] = { + {24219, 27980, 27980, 27980, 26956, 26956, 26956, 25701, 25701, 25701, + 24219, 24219, 27980, 26956, 25701, 24219}, + {30556, 31548, 31548, 31548, 31281, 31281, 31281, 30951, 30951, 30951, + 30556, 30556, 31548, 31281, 30951, 30556}, + {32210, 32462, 32462, 32462, 32394, 32394, 32394, 32311, 32311, 32311, + 32210, 32210, 32462, 32394, 32311, 32210}, + {32520, 32632, 32632, 32632, 32602, 32602, 32602, 32564, 32564, 32564, + 32520, 32520, 32632, 32602, 32564, 32520}}; + +// The filter coefficient a = 2*cos(2*pi*f/fs) for the high frequency tone, for +// sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0 through 15. +// Values are in Q14. +const int DtmfToneGenerator::kCoeff2[4][16] = { + {16325, 19073, 16325, 13085, 19073, 16325, 13085, 19073, 16325, 13085, + 19073, 13085, 9315, 9315, 9315, 9315}, + {28361, 29144, 28361, 27409, 29144, 28361, 27409, 29144, 28361, 27409, + 29144, 27409, 26258, 26258, 26258, 26258}, + {31647, 31849, 31647, 31400, 31849, 31647, 31400, 31849, 31647, 31400, + 31849, 31400, 31098, 31098, 31098, 31098}, + {32268, 32359, 32268, 32157, 32359, 32268, 32157, 32359, 32268, 32157, + 32359, 32157, 32022, 32022, 32022, 32022}}; + +// The initialization value x[-2] = sin(2*pi*f/fs) for the low frequency tone, +// for sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0-15. +// Values are in Q14. +const int DtmfToneGenerator::kInitValue1[4][16] = { + {11036, 8528, 8528, 8528, 9315, 9315, 9315, 10163, 10163, 10163, 11036, + 11036, 8528, 9315, 10163, 11036}, + {5918, 4429, 4429, 4429, 4879, 4879, 4879, 5380, 5380, 5380, 5918, 5918, + 4429, 4879, 5380, 5918}, + {3010, 2235, 2235, 2235, 2468, 2468, 2468, 2728, 2728, 2728, 3010, 3010, + 2235, 2468, 2728, 3010}, + {2013, 1493, 1493, 1493, 1649, 1649, 1649, 1823, 1823, 1823, 2013, 2013, + 1493, 1649, 1823, 2013}}; + +// The initialization value x[-2] = sin(2*pi*f/fs) for the high frequency tone, +// for sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0-15. +// Values are in Q14. +const int DtmfToneGenerator::kInitValue2[4][16] = { + {14206, 13323, 14206, 15021, 13323, 14206, 15021, 13323, 14206, 15021, + 13323, 15021, 15708, 15708, 15708, 15708}, + {8207, 7490, 8207, 8979, 7490, 8207, 8979, 7490, 8207, 8979, 7490, 8979, + 9801, 9801, 9801, 9801}, + {4249, 3853, 4249, 4685, 3853, 4249, 4685, 3853, 4249, 4685, 3853, 4685, + 5164, 5164, 5164, 5164}, + {2851, 2582, 2851, 3148, 2582, 2851, 3148, 2582, 2851, 3148, 2582, 3148, + 3476, 3476, 3476, 3476}}; + +// Amplitude multipliers for volume values 0 through 63, corresponding to +// 0 dBm0 through -63 dBm0. Values are in Q14. +// for a in range(0, 64): +// print round(16141.0 * 10**(-float(a)/20)) +const int DtmfToneGenerator::kAmplitude[64] = { + 16141, 14386, 12821, 11427, 10184, 9077, 8090, 7210, 6426, 5727, 5104, + 4549, 4054, 3614, 3221, 2870, 2558, 2280, 2032, 1811, 1614, 1439, + 1282, 1143, 1018, 908, 809, 721, 643, 573, 510, 455, 405, + 361, 322, 287, 256, 228, 203, 181, 161, 144, 128, 114, + 102, 91, 81, 72, 64, 57, 51, 45, 41, 36, 32, + 29, 26, 23, 20, 18, 16, 14, 13, 11}; + +// Constructor. +DtmfToneGenerator::DtmfToneGenerator() + : initialized_(false), coeff1_(0), coeff2_(0), amplitude_(0) {} + +// Initialize the DTMF generator with sample rate fs Hz (8000, 16000, 32000, +// 48000), event (0-15) and attenuation (0-36 dB). +// Returns 0 on success, otherwise an error code. +int DtmfToneGenerator::Init(int fs, int event, int attenuation) { + initialized_ = false; + size_t fs_index; + if (fs == 8000) { + fs_index = 0; + } else if (fs == 16000) { + fs_index = 1; + } else if (fs == 32000) { + fs_index = 2; + } else if (fs == 48000) { + fs_index = 3; + } else { + RTC_DCHECK_NOTREACHED(); + fs_index = 1; // Default to 8000 Hz. + } + + if (event < 0 || event > 15) { + return kParameterError; // Invalid event number. + } + + if (attenuation < 0 || attenuation > 63) { + return kParameterError; // Invalid attenuation. + } + + // Look up oscillator coefficient for low and high frequencies. + RTC_DCHECK_LE(0, fs_index); + RTC_DCHECK_GT(arraysize(kCoeff1), fs_index); + RTC_DCHECK_GT(arraysize(kCoeff2), fs_index); + RTC_DCHECK_LE(0, event); + RTC_DCHECK_GT(arraysize(kCoeff1[fs_index]), event); + RTC_DCHECK_GT(arraysize(kCoeff2[fs_index]), event); + coeff1_ = kCoeff1[fs_index][event]; + coeff2_ = kCoeff2[fs_index][event]; + + // Look up amplitude multiplier. + RTC_DCHECK_LE(0, attenuation); + RTC_DCHECK_GT(arraysize(kAmplitude), attenuation); + amplitude_ = kAmplitude[attenuation]; + + // Initialize sample history. + RTC_DCHECK_LE(0, fs_index); + RTC_DCHECK_GT(arraysize(kInitValue1), fs_index); + RTC_DCHECK_GT(arraysize(kInitValue2), fs_index); + RTC_DCHECK_LE(0, event); + RTC_DCHECK_GT(arraysize(kInitValue1[fs_index]), event); + RTC_DCHECK_GT(arraysize(kInitValue2[fs_index]), event); + sample_history1_[0] = kInitValue1[fs_index][event]; + sample_history1_[1] = 0; + sample_history2_[0] = kInitValue2[fs_index][event]; + sample_history2_[1] = 0; + + initialized_ = true; + return 0; +} + +// Reset tone generator to uninitialized state. +void DtmfToneGenerator::Reset() { + initialized_ = false; +} + +// Generate num_samples of DTMF signal and write to `output`. +int DtmfToneGenerator::Generate(size_t num_samples, AudioMultiVector* output) { + if (!initialized_) { + return kNotInitialized; + } + + if (!output) { + return kParameterError; + } + + output->AssertSize(num_samples); + for (size_t i = 0; i < num_samples; ++i) { + // Use recursion formula y[n] = a * y[n - 1] - y[n - 2]. + int16_t temp_val_low = + ((coeff1_ * sample_history1_[1] + 8192) >> 14) - sample_history1_[0]; + int16_t temp_val_high = + ((coeff2_ * sample_history2_[1] + 8192) >> 14) - sample_history2_[0]; + + // Update recursion memory. + sample_history1_[0] = sample_history1_[1]; + sample_history1_[1] = temp_val_low; + sample_history2_[0] = sample_history2_[1]; + sample_history2_[1] = temp_val_high; + + // Attenuate the low frequency tone 3 dB. + int32_t temp_val = + kAmpMultiplier * temp_val_low + temp_val_high * (1 << 15); + // Normalize the signal to Q14 with proper rounding. + temp_val = (temp_val + 16384) >> 15; + // Scale the signal to correct volume. + (*output)[0][i] = + static_cast<int16_t>((temp_val * amplitude_ + 8192) >> 14); + } + // Copy first channel to all other channels. + for (size_t channel = 1; channel < output->Channels(); ++channel) { + output->CopyChannel(0, channel); + } + + return static_cast<int>(num_samples); +} + +bool DtmfToneGenerator::initialized() const { + return initialized_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_tone_generator.h b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_tone_generator.h new file mode 100644 index 0000000000..35114f4f49 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_tone_generator.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_DTMF_TONE_GENERATOR_H_ +#define MODULES_AUDIO_CODING_NETEQ_DTMF_TONE_GENERATOR_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "modules/audio_coding/neteq/audio_multi_vector.h" + +namespace webrtc { + +// This class provides a generator for DTMF tones. +class DtmfToneGenerator { + public: + enum ReturnCodes { + kNotInitialized = -1, + kParameterError = -2, + }; + + DtmfToneGenerator(); + virtual ~DtmfToneGenerator() {} + + DtmfToneGenerator(const DtmfToneGenerator&) = delete; + DtmfToneGenerator& operator=(const DtmfToneGenerator&) = delete; + + virtual int Init(int fs, int event, int attenuation); + virtual void Reset(); + virtual int Generate(size_t num_samples, AudioMultiVector* output); + virtual bool initialized() const; + + private: + static const int kCoeff1[4][16]; // 1st oscillator model coefficient table. + static const int kCoeff2[4][16]; // 2nd oscillator model coefficient table. + static const int kInitValue1[4][16]; // Initialization for 1st oscillator. + static const int kInitValue2[4][16]; // Initialization for 2nd oscillator. + static const int kAmplitude[64]; // Amplitude for 0 through -63 dBm0. + static const int16_t kAmpMultiplier = 23171; // 3 dB attenuation (in Q15). + + bool initialized_; // True if generator is initialized properly. + int coeff1_; // 1st oscillator coefficient for this event. + int coeff2_; // 2nd oscillator coefficient for this event. + int amplitude_; // Amplitude for this event. + int16_t sample_history1_[2]; // Last 2 samples for the 1st oscillator. + int16_t sample_history2_[2]; // Last 2 samples for the 2nd oscillator. +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_DTMF_TONE_GENERATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_tone_generator_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_tone_generator_unittest.cc new file mode 100644 index 0000000000..e843706dd3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/dtmf_tone_generator_unittest.cc @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for DtmfToneGenerator class. + +#include "modules/audio_coding/neteq/dtmf_tone_generator.h" + +#include <math.h> + +#include "common_audio/include/audio_util.h" +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { + +class DtmfToneGeneratorTest : public ::testing::Test { + protected: + static const double kLowFreqHz[16]; + static const double kHighFreqHz[16]; + // This is the attenuation applied to all cases. + const double kBaseAttenuation = 16141.0 / 16384.0; + const double k3dbAttenuation = 23171.0 / 32768; + const int kNumSamples = 10; + + void TestAllTones(int fs_hz, int channels) { + AudioMultiVector signal(channels); + + for (int event = 0; event <= 15; ++event) { + rtc::StringBuilder ss; + ss << "Checking event " << event << " at sample rate " << fs_hz; + SCOPED_TRACE(ss.str()); + const int kAttenuation = 0; + ASSERT_EQ(0, tone_gen_.Init(fs_hz, event, kAttenuation)); + EXPECT_TRUE(tone_gen_.initialized()); + EXPECT_EQ(kNumSamples, tone_gen_.Generate(kNumSamples, &signal)); + + double f1 = kLowFreqHz[event]; + double f2 = kHighFreqHz[event]; + const double pi = 3.14159265358979323846; + + for (int n = 0; n < kNumSamples; ++n) { + double x = k3dbAttenuation * sin(2.0 * pi * f1 / fs_hz * (-n - 1)) + + sin(2.0 * pi * f2 / fs_hz * (-n - 1)); + x *= kBaseAttenuation; + x = ldexp(x, 14); // Scale to Q14. + for (int channel = 0; channel < channels; ++channel) { + EXPECT_NEAR(x, static_cast<double>(signal[channel][n]), 25); + } + } + + tone_gen_.Reset(); + EXPECT_FALSE(tone_gen_.initialized()); + } + } + + void TestAmplitudes(int fs_hz, int channels) { + AudioMultiVector signal(channels); + AudioMultiVector ref_signal(channels); + + const int event_vec[] = {0, 4, 9, 13}; // Test a few events. + for (int e = 0; e < 4; ++e) { + int event = event_vec[e]; + // Create full-scale reference. + ASSERT_EQ(0, tone_gen_.Init(fs_hz, event, 0)); // 0 attenuation. + EXPECT_EQ(kNumSamples, tone_gen_.Generate(kNumSamples, &ref_signal)); + // Test every 5 steps (to save time). + for (int attenuation = 1; attenuation <= 63; attenuation += 5) { + rtc::StringBuilder ss; + ss << "Checking event " << event << " at sample rate " << fs_hz; + ss << "; attenuation " << attenuation; + SCOPED_TRACE(ss.str()); + ASSERT_EQ(0, tone_gen_.Init(fs_hz, event, attenuation)); + EXPECT_EQ(kNumSamples, tone_gen_.Generate(kNumSamples, &signal)); + for (int n = 0; n < kNumSamples; ++n) { + double attenuation_factor = + DbToRatio(-static_cast<float>(attenuation)); + // Verify that the attenuation is correct. + for (int channel = 0; channel < channels; ++channel) { + EXPECT_NEAR(attenuation_factor * ref_signal[channel][n], + signal[channel][n], 2); + } + } + + tone_gen_.Reset(); + } + } + } + + DtmfToneGenerator tone_gen_; +}; + +// Low and high frequencies for events 0 through 15. +const double DtmfToneGeneratorTest::kLowFreqHz[16] = { + 941.0, 697.0, 697.0, 697.0, 770.0, 770.0, 770.0, 852.0, + 852.0, 852.0, 941.0, 941.0, 697.0, 770.0, 852.0, 941.0}; +const double DtmfToneGeneratorTest::kHighFreqHz[16] = { + 1336.0, 1209.0, 1336.0, 1477.0, 1209.0, 1336.0, 1477.0, 1209.0, + 1336.0, 1477.0, 1209.0, 1477.0, 1633.0, 1633.0, 1633.0, 1633.0}; + +TEST_F(DtmfToneGeneratorTest, Test8000Mono) { + TestAllTones(8000, 1); + TestAmplitudes(8000, 1); +} + +TEST_F(DtmfToneGeneratorTest, Test16000Mono) { + TestAllTones(16000, 1); + TestAmplitudes(16000, 1); +} + +TEST_F(DtmfToneGeneratorTest, Test32000Mono) { + TestAllTones(32000, 1); + TestAmplitudes(32000, 1); +} + +TEST_F(DtmfToneGeneratorTest, Test48000Mono) { + TestAllTones(48000, 1); + TestAmplitudes(48000, 1); +} + +TEST_F(DtmfToneGeneratorTest, Test8000Stereo) { + TestAllTones(8000, 2); + TestAmplitudes(8000, 2); +} + +TEST_F(DtmfToneGeneratorTest, Test16000Stereo) { + TestAllTones(16000, 2); + TestAmplitudes(16000, 2); +} + +TEST_F(DtmfToneGeneratorTest, Test32000Stereo) { + TestAllTones(32000, 2); + TestAmplitudes(32000, 2); +} + +TEST_F(DtmfToneGeneratorTest, Test48000Stereo) { + TestAllTones(48000, 2); + TestAmplitudes(48000, 2); +} + +TEST(DtmfToneGenerator, TestErrors) { + DtmfToneGenerator tone_gen; + const int kNumSamples = 10; + AudioMultiVector signal(1); // One channel. + + // Try to generate tones without initializing. + EXPECT_EQ(DtmfToneGenerator::kNotInitialized, + tone_gen.Generate(kNumSamples, &signal)); + + const int fs = 16000; // Valid sample rate. + const int event = 7; // Valid event. + const int attenuation = 0; // Valid attenuation. + // Initialize with invalid event -1. + EXPECT_EQ(DtmfToneGenerator::kParameterError, + tone_gen.Init(fs, -1, attenuation)); + // Initialize with invalid event 16. + EXPECT_EQ(DtmfToneGenerator::kParameterError, + tone_gen.Init(fs, 16, attenuation)); + // Initialize with invalid attenuation -1. + EXPECT_EQ(DtmfToneGenerator::kParameterError, tone_gen.Init(fs, event, -1)); + // Initialize with invalid attenuation 64. + EXPECT_EQ(DtmfToneGenerator::kParameterError, tone_gen.Init(fs, event, 64)); + EXPECT_FALSE(tone_gen.initialized()); // Should still be uninitialized. + + // Initialize with valid parameters. + ASSERT_EQ(0, tone_gen.Init(fs, event, attenuation)); + EXPECT_TRUE(tone_gen.initialized()); + // NULL pointer to destination. + EXPECT_EQ(DtmfToneGenerator::kParameterError, + tone_gen.Generate(kNumSamples, NULL)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/expand.cc b/third_party/libwebrtc/modules/audio_coding/neteq/expand.cc new file mode 100644 index 0000000000..9c3274609f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/expand.cc @@ -0,0 +1,888 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/expand.h" + +#include <string.h> // memset + +#include <algorithm> // min, max +#include <limits> // numeric_limits<T> + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "modules/audio_coding/neteq/background_noise.h" +#include "modules/audio_coding/neteq/cross_correlation.h" +#include "modules/audio_coding/neteq/dsp_helper.h" +#include "modules/audio_coding/neteq/random_vector.h" +#include "modules/audio_coding/neteq/statistics_calculator.h" +#include "modules/audio_coding/neteq/sync_buffer.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +Expand::Expand(BackgroundNoise* background_noise, + SyncBuffer* sync_buffer, + RandomVector* random_vector, + StatisticsCalculator* statistics, + int fs, + size_t num_channels) + : random_vector_(random_vector), + sync_buffer_(sync_buffer), + first_expand_(true), + fs_hz_(fs), + num_channels_(num_channels), + consecutive_expands_(0), + background_noise_(background_noise), + statistics_(statistics), + overlap_length_(5 * fs / 8000), + lag_index_direction_(0), + current_lag_index_(0), + stop_muting_(false), + expand_duration_samples_(0), + channel_parameters_(new ChannelParameters[num_channels_]) { + RTC_DCHECK(fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000); + RTC_DCHECK_LE(fs, + static_cast<int>(kMaxSampleRate)); // Should not be possible. + RTC_DCHECK_GT(num_channels_, 0); + memset(expand_lags_, 0, sizeof(expand_lags_)); + Reset(); +} + +Expand::~Expand() = default; + +void Expand::Reset() { + first_expand_ = true; + consecutive_expands_ = 0; + max_lag_ = 0; + for (size_t ix = 0; ix < num_channels_; ++ix) { + channel_parameters_[ix].expand_vector0.Clear(); + channel_parameters_[ix].expand_vector1.Clear(); + } +} + +int Expand::Process(AudioMultiVector* output) { + int16_t random_vector[kMaxSampleRate / 8000 * 120 + 30]; + int16_t scaled_random_vector[kMaxSampleRate / 8000 * 125]; + static const int kTempDataSize = 3600; + int16_t temp_data[kTempDataSize]; // TODO(hlundin) Remove this. + int16_t* voiced_vector_storage = temp_data; + int16_t* voiced_vector = &voiced_vector_storage[overlap_length_]; + static const size_t kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder; + int16_t unvoiced_array_memory[kNoiseLpcOrder + kMaxSampleRate / 8000 * 125]; + int16_t* unvoiced_vector = unvoiced_array_memory + kUnvoicedLpcOrder; + int16_t* noise_vector = unvoiced_array_memory + kNoiseLpcOrder; + + int fs_mult = fs_hz_ / 8000; + + if (first_expand_) { + // Perform initial setup if this is the first expansion since last reset. + AnalyzeSignal(random_vector); + first_expand_ = false; + expand_duration_samples_ = 0; + } else { + // This is not the first expansion, parameters are already estimated. + // Extract a noise segment. + size_t rand_length = max_lag_; + // This only applies to SWB where length could be larger than 256. + RTC_DCHECK_LE(rand_length, kMaxSampleRate / 8000 * 120 + 30); + GenerateRandomVector(2, rand_length, random_vector); + } + + // Generate signal. + UpdateLagIndex(); + + // Voiced part. + // Generate a weighted vector with the current lag. + size_t expansion_vector_length = max_lag_ + overlap_length_; + size_t current_lag = expand_lags_[current_lag_index_]; + // Copy lag+overlap data. + size_t expansion_vector_position = + expansion_vector_length - current_lag - overlap_length_; + size_t temp_length = current_lag + overlap_length_; + for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) { + ChannelParameters& parameters = channel_parameters_[channel_ix]; + if (current_lag_index_ == 0) { + // Use only expand_vector0. + RTC_DCHECK_LE(expansion_vector_position + temp_length, + parameters.expand_vector0.Size()); + parameters.expand_vector0.CopyTo(temp_length, expansion_vector_position, + voiced_vector_storage); + } else if (current_lag_index_ == 1) { + std::unique_ptr<int16_t[]> temp_0(new int16_t[temp_length]); + parameters.expand_vector0.CopyTo(temp_length, expansion_vector_position, + temp_0.get()); + std::unique_ptr<int16_t[]> temp_1(new int16_t[temp_length]); + parameters.expand_vector1.CopyTo(temp_length, expansion_vector_position, + temp_1.get()); + // Mix 3/4 of expand_vector0 with 1/4 of expand_vector1. + WebRtcSpl_ScaleAndAddVectorsWithRound(temp_0.get(), 3, temp_1.get(), 1, 2, + voiced_vector_storage, temp_length); + } else if (current_lag_index_ == 2) { + // Mix 1/2 of expand_vector0 with 1/2 of expand_vector1. + RTC_DCHECK_LE(expansion_vector_position + temp_length, + parameters.expand_vector0.Size()); + RTC_DCHECK_LE(expansion_vector_position + temp_length, + parameters.expand_vector1.Size()); + + std::unique_ptr<int16_t[]> temp_0(new int16_t[temp_length]); + parameters.expand_vector0.CopyTo(temp_length, expansion_vector_position, + temp_0.get()); + std::unique_ptr<int16_t[]> temp_1(new int16_t[temp_length]); + parameters.expand_vector1.CopyTo(temp_length, expansion_vector_position, + temp_1.get()); + WebRtcSpl_ScaleAndAddVectorsWithRound(temp_0.get(), 1, temp_1.get(), 1, 1, + voiced_vector_storage, temp_length); + } + + // Get tapering window parameters. Values are in Q15. + int16_t muting_window, muting_window_increment; + int16_t unmuting_window, unmuting_window_increment; + if (fs_hz_ == 8000) { + muting_window = DspHelper::kMuteFactorStart8kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement8kHz; + unmuting_window = DspHelper::kUnmuteFactorStart8kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement8kHz; + } else if (fs_hz_ == 16000) { + muting_window = DspHelper::kMuteFactorStart16kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement16kHz; + unmuting_window = DspHelper::kUnmuteFactorStart16kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement16kHz; + } else if (fs_hz_ == 32000) { + muting_window = DspHelper::kMuteFactorStart32kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement32kHz; + unmuting_window = DspHelper::kUnmuteFactorStart32kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement32kHz; + } else { // fs_ == 48000 + muting_window = DspHelper::kMuteFactorStart48kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement48kHz; + unmuting_window = DspHelper::kUnmuteFactorStart48kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement48kHz; + } + + // Smooth the expanded if it has not been muted to a low amplitude and + // `current_voice_mix_factor` is larger than 0.5. + if ((parameters.mute_factor > 819) && + (parameters.current_voice_mix_factor > 8192)) { + size_t start_ix = sync_buffer_->Size() - overlap_length_; + for (size_t i = 0; i < overlap_length_; i++) { + // Do overlap add between new vector and overlap. + (*sync_buffer_)[channel_ix][start_ix + i] = + (((*sync_buffer_)[channel_ix][start_ix + i] * muting_window) + + (((parameters.mute_factor * voiced_vector_storage[i]) >> 14) * + unmuting_window) + + 16384) >> + 15; + muting_window += muting_window_increment; + unmuting_window += unmuting_window_increment; + } + } else if (parameters.mute_factor == 0) { + // The expanded signal will consist of only comfort noise if + // mute_factor = 0. Set the output length to 15 ms for best noise + // production. + // TODO(hlundin): This has been disabled since the length of + // parameters.expand_vector0 and parameters.expand_vector1 no longer + // match with expand_lags_, causing invalid reads and writes. Is it a good + // idea to enable this again, and solve the vector size problem? + // max_lag_ = fs_mult * 120; + // expand_lags_[0] = fs_mult * 120; + // expand_lags_[1] = fs_mult * 120; + // expand_lags_[2] = fs_mult * 120; + } + + // Unvoiced part. + // Filter `scaled_random_vector` through `ar_filter_`. + memcpy(unvoiced_vector - kUnvoicedLpcOrder, parameters.ar_filter_state, + sizeof(int16_t) * kUnvoicedLpcOrder); + int32_t add_constant = 0; + if (parameters.ar_gain_scale > 0) { + add_constant = 1 << (parameters.ar_gain_scale - 1); + } + WebRtcSpl_AffineTransformVector(scaled_random_vector, random_vector, + parameters.ar_gain, add_constant, + parameters.ar_gain_scale, current_lag); + WebRtcSpl_FilterARFastQ12(scaled_random_vector, unvoiced_vector, + parameters.ar_filter, kUnvoicedLpcOrder + 1, + current_lag); + memcpy(parameters.ar_filter_state, + &(unvoiced_vector[current_lag - kUnvoicedLpcOrder]), + sizeof(int16_t) * kUnvoicedLpcOrder); + + // Combine voiced and unvoiced contributions. + + // Set a suitable cross-fading slope. + // For lag = + // <= 31 * fs_mult => go from 1 to 0 in about 8 ms; + // (>= 31 .. <= 63) * fs_mult => go from 1 to 0 in about 16 ms; + // >= 64 * fs_mult => go from 1 to 0 in about 32 ms. + // temp_shift = getbits(max_lag_) - 5. + int temp_shift = + (31 - WebRtcSpl_NormW32(rtc::dchecked_cast<int32_t>(max_lag_))) - 5; + int16_t mix_factor_increment = 256 >> temp_shift; + if (stop_muting_) { + mix_factor_increment = 0; + } + + // Create combined signal by shifting in more and more of unvoiced part. + temp_shift = 8 - temp_shift; // = getbits(mix_factor_increment). + size_t temp_length = + (parameters.current_voice_mix_factor - parameters.voice_mix_factor) >> + temp_shift; + temp_length = std::min(temp_length, current_lag); + DspHelper::CrossFade(voiced_vector, unvoiced_vector, temp_length, + ¶meters.current_voice_mix_factor, + mix_factor_increment, temp_data); + + // End of cross-fading period was reached before end of expanded signal + // path. Mix the rest with a fixed mixing factor. + if (temp_length < current_lag) { + if (mix_factor_increment != 0) { + parameters.current_voice_mix_factor = parameters.voice_mix_factor; + } + int16_t temp_scale = 16384 - parameters.current_voice_mix_factor; + WebRtcSpl_ScaleAndAddVectorsWithRound( + voiced_vector + temp_length, parameters.current_voice_mix_factor, + unvoiced_vector + temp_length, temp_scale, 14, + temp_data + temp_length, current_lag - temp_length); + } + + // Select muting slope depending on how many consecutive expands we have + // done. + if (consecutive_expands_ == 3) { + // Let the mute factor decrease from 1.0 to 0.95 in 6.25 ms. + // mute_slope = 0.0010 / fs_mult in Q20. + parameters.mute_slope = std::max(parameters.mute_slope, 1049 / fs_mult); + } + if (consecutive_expands_ == 7) { + // Let the mute factor decrease from 1.0 to 0.90 in 6.25 ms. + // mute_slope = 0.0020 / fs_mult in Q20. + parameters.mute_slope = std::max(parameters.mute_slope, 2097 / fs_mult); + } + + // Mute segment according to slope value. + if ((consecutive_expands_ != 0) || !parameters.onset) { + // Mute to the previous level, then continue with the muting. + WebRtcSpl_AffineTransformVector( + temp_data, temp_data, parameters.mute_factor, 8192, 14, current_lag); + + if (!stop_muting_) { + DspHelper::MuteSignal(temp_data, parameters.mute_slope, current_lag); + + // Shift by 6 to go from Q20 to Q14. + // TODO(hlundin): Adding 8192 before shifting 6 steps seems wrong. + // Legacy. + int16_t gain = static_cast<int16_t>( + 16384 - (((current_lag * parameters.mute_slope) + 8192) >> 6)); + gain = ((gain * parameters.mute_factor) + 8192) >> 14; + + // Guard against getting stuck with very small (but sometimes audible) + // gain. + if ((consecutive_expands_ > 3) && (gain >= parameters.mute_factor)) { + parameters.mute_factor = 0; + } else { + parameters.mute_factor = gain; + } + } + } + + // Background noise part. + background_noise_->GenerateBackgroundNoise( + random_vector, channel_ix, channel_parameters_[channel_ix].mute_slope, + TooManyExpands(), current_lag, unvoiced_array_memory); + + // Add background noise to the combined voiced-unvoiced signal. + for (size_t i = 0; i < current_lag; i++) { + temp_data[i] = temp_data[i] + noise_vector[i]; + } + if (channel_ix == 0) { + output->AssertSize(current_lag); + } else { + RTC_DCHECK_EQ(output->Size(), current_lag); + } + (*output)[channel_ix].OverwriteAt(temp_data, current_lag, 0); + } + + // Increase call number and cap it. + consecutive_expands_ = consecutive_expands_ >= kMaxConsecutiveExpands + ? kMaxConsecutiveExpands + : consecutive_expands_ + 1; + expand_duration_samples_ += output->Size(); + // Clamp the duration counter at 2 seconds. + expand_duration_samples_ = std::min(expand_duration_samples_, + rtc::dchecked_cast<size_t>(fs_hz_ * 2)); + return 0; +} + +void Expand::SetParametersForNormalAfterExpand() { + current_lag_index_ = 0; + lag_index_direction_ = 0; + stop_muting_ = true; // Do not mute signal any more. + statistics_->LogDelayedPacketOutageEvent(expand_duration_samples_, fs_hz_); + statistics_->EndExpandEvent(fs_hz_); +} + +void Expand::SetParametersForMergeAfterExpand() { + current_lag_index_ = -1; /* out of the 3 possible ones */ + lag_index_direction_ = 1; /* make sure we get the "optimal" lag */ + stop_muting_ = true; + statistics_->EndExpandEvent(fs_hz_); +} + +bool Expand::Muted() const { + if (first_expand_ || stop_muting_) + return false; + RTC_DCHECK(channel_parameters_); + for (size_t ch = 0; ch < num_channels_; ++ch) { + if (channel_parameters_[ch].mute_factor != 0) + return false; + } + return true; +} + +size_t Expand::overlap_length() const { + return overlap_length_; +} + +void Expand::InitializeForAnExpandPeriod() { + lag_index_direction_ = 1; + current_lag_index_ = -1; + stop_muting_ = false; + random_vector_->set_seed_increment(1); + consecutive_expands_ = 0; + for (size_t ix = 0; ix < num_channels_; ++ix) { + channel_parameters_[ix].current_voice_mix_factor = 16384; // 1.0 in Q14. + channel_parameters_[ix].mute_factor = 16384; // 1.0 in Q14. + // Start with 0 gain for background noise. + background_noise_->SetMuteFactor(ix, 0); + } +} + +bool Expand::TooManyExpands() { + return consecutive_expands_ >= kMaxConsecutiveExpands; +} + +void Expand::AnalyzeSignal(int16_t* random_vector) { + int32_t auto_correlation[kUnvoicedLpcOrder + 1]; + int16_t reflection_coeff[kUnvoicedLpcOrder]; + int16_t correlation_vector[kMaxSampleRate / 8000 * 102]; + size_t best_correlation_index[kNumCorrelationCandidates]; + int16_t best_correlation[kNumCorrelationCandidates]; + size_t best_distortion_index[kNumCorrelationCandidates]; + int16_t best_distortion[kNumCorrelationCandidates]; + int32_t correlation_vector2[(99 * kMaxSampleRate / 8000) + 1]; + int32_t best_distortion_w32[kNumCorrelationCandidates]; + static const size_t kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder; + int16_t unvoiced_array_memory[kNoiseLpcOrder + kMaxSampleRate / 8000 * 125]; + int16_t* unvoiced_vector = unvoiced_array_memory + kUnvoicedLpcOrder; + + int fs_mult = fs_hz_ / 8000; + + // Pre-calculate common multiplications with fs_mult. + size_t fs_mult_4 = static_cast<size_t>(fs_mult * 4); + size_t fs_mult_20 = static_cast<size_t>(fs_mult * 20); + size_t fs_mult_120 = static_cast<size_t>(fs_mult * 120); + size_t fs_mult_dist_len = fs_mult * kDistortionLength; + size_t fs_mult_lpc_analysis_len = fs_mult * kLpcAnalysisLength; + + const size_t signal_length = static_cast<size_t>(256 * fs_mult); + + const size_t audio_history_position = sync_buffer_->Size() - signal_length; + std::unique_ptr<int16_t[]> audio_history(new int16_t[signal_length]); + (*sync_buffer_)[0].CopyTo(signal_length, audio_history_position, + audio_history.get()); + + // Initialize. + InitializeForAnExpandPeriod(); + + // Calculate correlation in downsampled domain (4 kHz sample rate). + size_t correlation_length = 51; // TODO(hlundin): Legacy bit-exactness. + // If it is decided to break bit-exactness `correlation_length` should be + // initialized to the return value of Correlation(). + Correlation(audio_history.get(), signal_length, correlation_vector); + + // Find peaks in correlation vector. + DspHelper::PeakDetection(correlation_vector, correlation_length, + kNumCorrelationCandidates, fs_mult, + best_correlation_index, best_correlation); + + // Adjust peak locations; cross-correlation lags start at 2.5 ms + // (20 * fs_mult samples). + best_correlation_index[0] += fs_mult_20; + best_correlation_index[1] += fs_mult_20; + best_correlation_index[2] += fs_mult_20; + + // Calculate distortion around the `kNumCorrelationCandidates` best lags. + int distortion_scale = 0; + for (size_t i = 0; i < kNumCorrelationCandidates; i++) { + size_t min_index = + std::max(fs_mult_20, best_correlation_index[i] - fs_mult_4); + size_t max_index = + std::min(fs_mult_120 - 1, best_correlation_index[i] + fs_mult_4); + best_distortion_index[i] = DspHelper::MinDistortion( + &(audio_history[signal_length - fs_mult_dist_len]), min_index, + max_index, fs_mult_dist_len, &best_distortion_w32[i]); + distortion_scale = std::max(16 - WebRtcSpl_NormW32(best_distortion_w32[i]), + distortion_scale); + } + // Shift the distortion values to fit in 16 bits. + WebRtcSpl_VectorBitShiftW32ToW16(best_distortion, kNumCorrelationCandidates, + best_distortion_w32, distortion_scale); + + // Find the maximizing index `i` of the cost function + // f[i] = best_correlation[i] / best_distortion[i]. + int32_t best_ratio = std::numeric_limits<int32_t>::min(); + size_t best_index = std::numeric_limits<size_t>::max(); + for (size_t i = 0; i < kNumCorrelationCandidates; ++i) { + int32_t ratio; + if (best_distortion[i] > 0) { + ratio = (best_correlation[i] * (1 << 16)) / best_distortion[i]; + } else if (best_correlation[i] == 0) { + ratio = 0; // No correlation set result to zero. + } else { + ratio = std::numeric_limits<int32_t>::max(); // Denominator is zero. + } + if (ratio > best_ratio) { + best_index = i; + best_ratio = ratio; + } + } + + size_t distortion_lag = best_distortion_index[best_index]; + size_t correlation_lag = best_correlation_index[best_index]; + max_lag_ = std::max(distortion_lag, correlation_lag); + + // Calculate the exact best correlation in the range between + // `correlation_lag` and `distortion_lag`. + correlation_length = std::max(std::min(distortion_lag + 10, fs_mult_120), + static_cast<size_t>(60 * fs_mult)); + + size_t start_index = std::min(distortion_lag, correlation_lag); + size_t correlation_lags = static_cast<size_t>( + WEBRTC_SPL_ABS_W16((distortion_lag - correlation_lag)) + 1); + RTC_DCHECK_LE(correlation_lags, static_cast<size_t>(99 * fs_mult + 1)); + + for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) { + ChannelParameters& parameters = channel_parameters_[channel_ix]; + if (channel_ix > 0) { + // When channel_ix == 0, audio_history contains the correct audio. For the + // other cases, we will have to copy the correct channel into + // audio_history. + (*sync_buffer_)[channel_ix].CopyTo(signal_length, audio_history_position, + audio_history.get()); + } + + // Calculate suitable scaling. + int16_t signal_max = WebRtcSpl_MaxAbsValueW16( + &audio_history[signal_length - correlation_length - start_index - + correlation_lags], + correlation_length + start_index + correlation_lags - 1); + int correlation_scale = + (31 - WebRtcSpl_NormW32(signal_max * signal_max)) + + (31 - WebRtcSpl_NormW32(static_cast<int32_t>(correlation_length))) - 31; + correlation_scale = std::max(0, correlation_scale); + + // Calculate the correlation, store in `correlation_vector2`. + WebRtcSpl_CrossCorrelation( + correlation_vector2, + &(audio_history[signal_length - correlation_length]), + &(audio_history[signal_length - correlation_length - start_index]), + correlation_length, correlation_lags, correlation_scale, -1); + + // Find maximizing index. + best_index = WebRtcSpl_MaxIndexW32(correlation_vector2, correlation_lags); + int32_t max_correlation = correlation_vector2[best_index]; + // Compensate index with start offset. + best_index = best_index + start_index; + + // Calculate energies. + int32_t energy1 = WebRtcSpl_DotProductWithScale( + &(audio_history[signal_length - correlation_length]), + &(audio_history[signal_length - correlation_length]), + correlation_length, correlation_scale); + int32_t energy2 = WebRtcSpl_DotProductWithScale( + &(audio_history[signal_length - correlation_length - best_index]), + &(audio_history[signal_length - correlation_length - best_index]), + correlation_length, correlation_scale); + + // Calculate the correlation coefficient between the two portions of the + // signal. + int32_t corr_coefficient; + if ((energy1 > 0) && (energy2 > 0)) { + int energy1_scale = std::max(16 - WebRtcSpl_NormW32(energy1), 0); + int energy2_scale = std::max(16 - WebRtcSpl_NormW32(energy2), 0); + // Make sure total scaling is even (to simplify scale factor after sqrt). + if ((energy1_scale + energy2_scale) & 1) { + // If sum is odd, add 1 to make it even. + energy1_scale += 1; + } + int32_t scaled_energy1 = energy1 >> energy1_scale; + int32_t scaled_energy2 = energy2 >> energy2_scale; + int16_t sqrt_energy_product = static_cast<int16_t>( + WebRtcSpl_SqrtFloor(scaled_energy1 * scaled_energy2)); + // Calculate max_correlation / sqrt(energy1 * energy2) in Q14. + int cc_shift = 14 - (energy1_scale + energy2_scale) / 2; + max_correlation = WEBRTC_SPL_SHIFT_W32(max_correlation, cc_shift); + corr_coefficient = + WebRtcSpl_DivW32W16(max_correlation, sqrt_energy_product); + // Cap at 1.0 in Q14. + corr_coefficient = std::min(16384, corr_coefficient); + } else { + corr_coefficient = 0; + } + + // Extract the two vectors expand_vector0 and expand_vector1 from + // `audio_history`. + size_t expansion_length = max_lag_ + overlap_length_; + const int16_t* vector1 = &(audio_history[signal_length - expansion_length]); + const int16_t* vector2 = vector1 - distortion_lag; + // Normalize the second vector to the same energy as the first. + energy1 = WebRtcSpl_DotProductWithScale(vector1, vector1, expansion_length, + correlation_scale); + energy2 = WebRtcSpl_DotProductWithScale(vector2, vector2, expansion_length, + correlation_scale); + // Confirm that amplitude ratio sqrt(energy1 / energy2) is within 0.5 - 2.0, + // i.e., energy1 / energy2 is within 0.25 - 4. + int16_t amplitude_ratio; + if ((energy1 / 4 < energy2) && (energy1 > energy2 / 4)) { + // Energy constraint fulfilled. Use both vectors and scale them + // accordingly. + int32_t scaled_energy2 = std::max(16 - WebRtcSpl_NormW32(energy2), 0); + int32_t scaled_energy1 = scaled_energy2 - 13; + // Calculate scaled_energy1 / scaled_energy2 in Q13. + int32_t energy_ratio = + WebRtcSpl_DivW32W16(WEBRTC_SPL_SHIFT_W32(energy1, -scaled_energy1), + static_cast<int16_t>(energy2 >> scaled_energy2)); + // Calculate sqrt ratio in Q13 (sqrt of en1/en2 in Q26). + amplitude_ratio = + static_cast<int16_t>(WebRtcSpl_SqrtFloor(energy_ratio << 13)); + // Copy the two vectors and give them the same energy. + parameters.expand_vector0.Clear(); + parameters.expand_vector0.PushBack(vector1, expansion_length); + parameters.expand_vector1.Clear(); + if (parameters.expand_vector1.Size() < expansion_length) { + parameters.expand_vector1.Extend(expansion_length - + parameters.expand_vector1.Size()); + } + std::unique_ptr<int16_t[]> temp_1(new int16_t[expansion_length]); + WebRtcSpl_AffineTransformVector( + temp_1.get(), const_cast<int16_t*>(vector2), amplitude_ratio, 4096, + 13, expansion_length); + parameters.expand_vector1.OverwriteAt(temp_1.get(), expansion_length, 0); + } else { + // Energy change constraint not fulfilled. Only use last vector. + parameters.expand_vector0.Clear(); + parameters.expand_vector0.PushBack(vector1, expansion_length); + // Copy from expand_vector0 to expand_vector1. + parameters.expand_vector0.CopyTo(¶meters.expand_vector1); + // Set the energy_ratio since it is used by muting slope. + if ((energy1 / 4 < energy2) || (energy2 == 0)) { + amplitude_ratio = 4096; // 0.5 in Q13. + } else { + amplitude_ratio = 16384; // 2.0 in Q13. + } + } + + // Set the 3 lag values. + if (distortion_lag == correlation_lag) { + expand_lags_[0] = distortion_lag; + expand_lags_[1] = distortion_lag; + expand_lags_[2] = distortion_lag; + } else { + // `distortion_lag` and `correlation_lag` are not equal; use different + // combinations of the two. + // First lag is `distortion_lag` only. + expand_lags_[0] = distortion_lag; + // Second lag is the average of the two. + expand_lags_[1] = (distortion_lag + correlation_lag) / 2; + // Third lag is the average again, but rounding towards `correlation_lag`. + if (distortion_lag > correlation_lag) { + expand_lags_[2] = (distortion_lag + correlation_lag - 1) / 2; + } else { + expand_lags_[2] = (distortion_lag + correlation_lag + 1) / 2; + } + } + + // Calculate the LPC and the gain of the filters. + + // Calculate kUnvoicedLpcOrder + 1 lags of the auto-correlation function. + size_t temp_index = + signal_length - fs_mult_lpc_analysis_len - kUnvoicedLpcOrder; + // Copy signal to temporary vector to be able to pad with leading zeros. + int16_t* temp_signal = + new int16_t[fs_mult_lpc_analysis_len + kUnvoicedLpcOrder]; + memset(temp_signal, 0, + sizeof(int16_t) * (fs_mult_lpc_analysis_len + kUnvoicedLpcOrder)); + memcpy(&temp_signal[kUnvoicedLpcOrder], + &audio_history[temp_index + kUnvoicedLpcOrder], + sizeof(int16_t) * fs_mult_lpc_analysis_len); + CrossCorrelationWithAutoShift( + &temp_signal[kUnvoicedLpcOrder], &temp_signal[kUnvoicedLpcOrder], + fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1, -1, auto_correlation); + delete[] temp_signal; + + // Verify that variance is positive. + if (auto_correlation[0] > 0) { + // Estimate AR filter parameters using Levinson-Durbin algorithm; + // kUnvoicedLpcOrder + 1 filter coefficients. + int16_t stability = + WebRtcSpl_LevinsonDurbin(auto_correlation, parameters.ar_filter, + reflection_coeff, kUnvoicedLpcOrder); + + // Keep filter parameters only if filter is stable. + if (stability != 1) { + // Set first coefficient to 4096 (1.0 in Q12). + parameters.ar_filter[0] = 4096; + // Set remaining `kUnvoicedLpcOrder` coefficients to zero. + WebRtcSpl_MemSetW16(parameters.ar_filter + 1, 0, kUnvoicedLpcOrder); + } + } + + if (channel_ix == 0) { + // Extract a noise segment. + size_t noise_length; + if (distortion_lag < 40) { + noise_length = 2 * distortion_lag + 30; + } else { + noise_length = distortion_lag + 30; + } + if (noise_length <= RandomVector::kRandomTableSize) { + memcpy(random_vector, RandomVector::kRandomTable, + sizeof(int16_t) * noise_length); + } else { + // Only applies to SWB where length could be larger than + // `kRandomTableSize`. + memcpy(random_vector, RandomVector::kRandomTable, + sizeof(int16_t) * RandomVector::kRandomTableSize); + RTC_DCHECK_LE(noise_length, kMaxSampleRate / 8000 * 120 + 30); + random_vector_->IncreaseSeedIncrement(2); + random_vector_->Generate( + noise_length - RandomVector::kRandomTableSize, + &random_vector[RandomVector::kRandomTableSize]); + } + } + + // Set up state vector and calculate scale factor for unvoiced filtering. + memcpy(parameters.ar_filter_state, + &(audio_history[signal_length - kUnvoicedLpcOrder]), + sizeof(int16_t) * kUnvoicedLpcOrder); + memcpy(unvoiced_vector - kUnvoicedLpcOrder, + &(audio_history[signal_length - 128 - kUnvoicedLpcOrder]), + sizeof(int16_t) * kUnvoicedLpcOrder); + WebRtcSpl_FilterMAFastQ12(&audio_history[signal_length - 128], + unvoiced_vector, parameters.ar_filter, + kUnvoicedLpcOrder + 1, 128); + const int unvoiced_max_abs = [&] { + const int16_t max_abs = WebRtcSpl_MaxAbsValueW16(unvoiced_vector, 128); + // Since WebRtcSpl_MaxAbsValueW16 returns 2^15 - 1 when the input contains + // -2^15, we have to conservatively bump the return value by 1 + // if it is 2^15 - 1. + return max_abs == WEBRTC_SPL_WORD16_MAX ? max_abs + 1 : max_abs; + }(); + // Pick the smallest n such that 2^n > unvoiced_max_abs; then the maximum + // value of the dot product is less than 2^7 * 2^(2*n) = 2^(2*n + 7), so to + // prevent overflows we want 2n + 7 <= 31, which means we should shift by + // 2n + 7 - 31 bits, if this value is greater than zero. + int unvoiced_prescale = + std::max(0, 2 * WebRtcSpl_GetSizeInBits(unvoiced_max_abs) - 24); + + int32_t unvoiced_energy = WebRtcSpl_DotProductWithScale( + unvoiced_vector, unvoiced_vector, 128, unvoiced_prescale); + + // Normalize `unvoiced_energy` to 28 or 29 bits to preserve sqrt() accuracy. + int16_t unvoiced_scale = WebRtcSpl_NormW32(unvoiced_energy) - 3; + // Make sure we do an odd number of shifts since we already have 7 shifts + // from dividing with 128 earlier. This will make the total scale factor + // even, which is suitable for the sqrt. + unvoiced_scale += ((unvoiced_scale & 0x1) ^ 0x1); + unvoiced_energy = WEBRTC_SPL_SHIFT_W32(unvoiced_energy, unvoiced_scale); + int16_t unvoiced_gain = + static_cast<int16_t>(WebRtcSpl_SqrtFloor(unvoiced_energy)); + parameters.ar_gain_scale = + 13 + (unvoiced_scale + 7 - unvoiced_prescale) / 2; + parameters.ar_gain = unvoiced_gain; + + // Calculate voice_mix_factor from corr_coefficient. + // Let x = corr_coefficient. Then, we compute: + // if (x > 0.48) + // voice_mix_factor = (-5179 + 19931x - 16422x^2 + 5776x^3) / 4096; + // else + // voice_mix_factor = 0; + if (corr_coefficient > 7875) { + int16_t x1, x2, x3; + // `corr_coefficient` is in Q14. + x1 = static_cast<int16_t>(corr_coefficient); + x2 = (x1 * x1) >> 14; // Shift 14 to keep result in Q14. + x3 = (x1 * x2) >> 14; + static const int kCoefficients[4] = {-5179, 19931, -16422, 5776}; + int32_t temp_sum = kCoefficients[0] * 16384; + temp_sum += kCoefficients[1] * x1; + temp_sum += kCoefficients[2] * x2; + temp_sum += kCoefficients[3] * x3; + parameters.voice_mix_factor = + static_cast<int16_t>(std::min(temp_sum / 4096, 16384)); + parameters.voice_mix_factor = + std::max(parameters.voice_mix_factor, static_cast<int16_t>(0)); + } else { + parameters.voice_mix_factor = 0; + } + + // Calculate muting slope. Reuse value from earlier scaling of + // `expand_vector0` and `expand_vector1`. + int16_t slope = amplitude_ratio; + if (slope > 12288) { + // slope > 1.5. + // Calculate (1 - (1 / slope)) / distortion_lag = + // (slope - 1) / (distortion_lag * slope). + // `slope` is in Q13, so 1 corresponds to 8192. Shift up to Q25 before + // the division. + // Shift the denominator from Q13 to Q5 before the division. The result of + // the division will then be in Q20. + int16_t denom = + rtc::saturated_cast<int16_t>((distortion_lag * slope) >> 8); + int temp_ratio = WebRtcSpl_DivW32W16((slope - 8192) << 12, denom); + if (slope > 14746) { + // slope > 1.8. + // Divide by 2, with proper rounding. + parameters.mute_slope = (temp_ratio + 1) / 2; + } else { + // Divide by 8, with proper rounding. + parameters.mute_slope = (temp_ratio + 4) / 8; + } + parameters.onset = true; + } else { + // Calculate (1 - slope) / distortion_lag. + // Shift `slope` by 7 to Q20 before the division. The result is in Q20. + parameters.mute_slope = WebRtcSpl_DivW32W16( + (8192 - slope) * 128, static_cast<int16_t>(distortion_lag)); + if (parameters.voice_mix_factor <= 13107) { + // Make sure the mute factor decreases from 1.0 to 0.9 in no more than + // 6.25 ms. + // mute_slope >= 0.005 / fs_mult in Q20. + parameters.mute_slope = std::max(5243 / fs_mult, parameters.mute_slope); + } else if (slope > 8028) { + parameters.mute_slope = 0; + } + parameters.onset = false; + } + } +} + +Expand::ChannelParameters::ChannelParameters() + : mute_factor(16384), + ar_gain(0), + ar_gain_scale(0), + voice_mix_factor(0), + current_voice_mix_factor(0), + onset(false), + mute_slope(0) { + memset(ar_filter, 0, sizeof(ar_filter)); + memset(ar_filter_state, 0, sizeof(ar_filter_state)); +} + +void Expand::Correlation(const int16_t* input, + size_t input_length, + int16_t* output) const { + // Set parameters depending on sample rate. + const int16_t* filter_coefficients; + size_t num_coefficients; + int16_t downsampling_factor; + if (fs_hz_ == 8000) { + num_coefficients = 3; + downsampling_factor = 2; + filter_coefficients = DspHelper::kDownsample8kHzTbl; + } else if (fs_hz_ == 16000) { + num_coefficients = 5; + downsampling_factor = 4; + filter_coefficients = DspHelper::kDownsample16kHzTbl; + } else if (fs_hz_ == 32000) { + num_coefficients = 7; + downsampling_factor = 8; + filter_coefficients = DspHelper::kDownsample32kHzTbl; + } else { // fs_hz_ == 48000. + num_coefficients = 7; + downsampling_factor = 12; + filter_coefficients = DspHelper::kDownsample48kHzTbl; + } + + // Correlate from lag 10 to lag 60 in downsampled domain. + // (Corresponds to 20-120 for narrow-band, 40-240 for wide-band, and so on.) + static const size_t kCorrelationStartLag = 10; + static const size_t kNumCorrelationLags = 54; + static const size_t kCorrelationLength = 60; + // Downsample to 4 kHz sample rate. + static const size_t kDownsampledLength = + kCorrelationStartLag + kNumCorrelationLags + kCorrelationLength; + int16_t downsampled_input[kDownsampledLength]; + static const size_t kFilterDelay = 0; + WebRtcSpl_DownsampleFast( + input + input_length - kDownsampledLength * downsampling_factor, + kDownsampledLength * downsampling_factor, downsampled_input, + kDownsampledLength, filter_coefficients, num_coefficients, + downsampling_factor, kFilterDelay); + + // Normalize `downsampled_input` to using all 16 bits. + int16_t max_value = + WebRtcSpl_MaxAbsValueW16(downsampled_input, kDownsampledLength); + int16_t norm_shift = 16 - WebRtcSpl_NormW32(max_value); + WebRtcSpl_VectorBitShiftW16(downsampled_input, kDownsampledLength, + downsampled_input, norm_shift); + + int32_t correlation[kNumCorrelationLags]; + CrossCorrelationWithAutoShift( + &downsampled_input[kDownsampledLength - kCorrelationLength], + &downsampled_input[kDownsampledLength - kCorrelationLength - + kCorrelationStartLag], + kCorrelationLength, kNumCorrelationLags, -1, correlation); + + // Normalize and move data from 32-bit to 16-bit vector. + int32_t max_correlation = + WebRtcSpl_MaxAbsValueW32(correlation, kNumCorrelationLags); + int16_t norm_shift2 = static_cast<int16_t>( + std::max(18 - WebRtcSpl_NormW32(max_correlation), 0)); + WebRtcSpl_VectorBitShiftW32ToW16(output, kNumCorrelationLags, correlation, + norm_shift2); +} + +void Expand::UpdateLagIndex() { + current_lag_index_ = current_lag_index_ + lag_index_direction_; + // Change direction if needed. + if (current_lag_index_ <= 0) { + lag_index_direction_ = 1; + } + if (current_lag_index_ >= kNumLags - 1) { + lag_index_direction_ = -1; + } +} + +Expand* ExpandFactory::Create(BackgroundNoise* background_noise, + SyncBuffer* sync_buffer, + RandomVector* random_vector, + StatisticsCalculator* statistics, + int fs, + size_t num_channels) const { + return new Expand(background_noise, sync_buffer, random_vector, statistics, + fs, num_channels); +} + +void Expand::GenerateRandomVector(int16_t seed_increment, + size_t length, + int16_t* random_vector) { + // TODO(turajs): According to hlundin The loop should not be needed. Should be + // just as good to generate all of the vector in one call. + size_t samples_generated = 0; + const size_t kMaxRandSamples = RandomVector::kRandomTableSize; + while (samples_generated < length) { + size_t rand_length = std::min(length - samples_generated, kMaxRandSamples); + random_vector_->IncreaseSeedIncrement(seed_increment); + random_vector_->Generate(rand_length, &random_vector[samples_generated]); + samples_generated += rand_length; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/expand.h b/third_party/libwebrtc/modules/audio_coding/neteq/expand.h new file mode 100644 index 0000000000..2e64583ec2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/expand.h @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_EXPAND_H_ +#define MODULES_AUDIO_CODING_NETEQ_EXPAND_H_ + + +#include <memory> + +#include "modules/audio_coding/neteq/audio_vector.h" + +namespace webrtc { + +// Forward declarations. +class AudioMultiVector; +class BackgroundNoise; +class RandomVector; +class StatisticsCalculator; +class SyncBuffer; + +// This class handles extrapolation of audio data from the sync_buffer to +// produce packet-loss concealment. +// TODO(hlundin): Refactor this class to divide the long methods into shorter +// ones. +class Expand { + public: + Expand(BackgroundNoise* background_noise, + SyncBuffer* sync_buffer, + RandomVector* random_vector, + StatisticsCalculator* statistics, + int fs, + size_t num_channels); + + virtual ~Expand(); + + Expand(const Expand&) = delete; + Expand& operator=(const Expand&) = delete; + + // Resets the object. + virtual void Reset(); + + // The main method to produce concealment data. The data is appended to the + // end of `output`. + virtual int Process(AudioMultiVector* output); + + // Prepare the object to do extra expansion during normal operation following + // a period of expands. + virtual void SetParametersForNormalAfterExpand(); + + // Prepare the object to do extra expansion during merge operation following + // a period of expands. + virtual void SetParametersForMergeAfterExpand(); + + // Returns the mute factor for `channel`. + int16_t MuteFactor(size_t channel) const { + RTC_DCHECK_LT(channel, num_channels_); + return channel_parameters_[channel].mute_factor; + } + + // Returns true if expansion has been faded down to zero amplitude (for all + // channels); false otherwise. + bool Muted() const; + + // Accessors and mutators. + virtual size_t overlap_length() const; + size_t max_lag() const { return max_lag_; } + + protected: + static const int kMaxConsecutiveExpands = 200; + void GenerateRandomVector(int16_t seed_increment, + size_t length, + int16_t* random_vector); + + // Initializes member variables at the beginning of an expand period. + void InitializeForAnExpandPeriod(); + + bool TooManyExpands(); + + // Analyzes the signal history in `sync_buffer_`, and set up all parameters + // necessary to produce concealment data. + void AnalyzeSignal(int16_t* random_vector); + + RandomVector* const random_vector_; + SyncBuffer* const sync_buffer_; + bool first_expand_; + const int fs_hz_; + const size_t num_channels_; + int consecutive_expands_; + + private: + static const size_t kUnvoicedLpcOrder = 6; + static const size_t kNumCorrelationCandidates = 3; + static const size_t kDistortionLength = 20; + static const size_t kLpcAnalysisLength = 160; + static const size_t kMaxSampleRate = 48000; + static const int kNumLags = 3; + + struct ChannelParameters { + ChannelParameters(); + int16_t mute_factor; + int16_t ar_filter[kUnvoicedLpcOrder + 1]; + int16_t ar_filter_state[kUnvoicedLpcOrder]; + int16_t ar_gain; + int16_t ar_gain_scale; + int16_t voice_mix_factor; /* Q14 */ + int16_t current_voice_mix_factor; /* Q14 */ + AudioVector expand_vector0; + AudioVector expand_vector1; + bool onset; + int mute_slope; /* Q20 */ + }; + + // Calculate the auto-correlation of `input`, with length `input_length` + // samples. The correlation is calculated from a downsampled version of + // `input`, and is written to `output`. + void Correlation(const int16_t* input, + size_t input_length, + int16_t* output) const; + + void UpdateLagIndex(); + + BackgroundNoise* const background_noise_; + StatisticsCalculator* const statistics_; + const size_t overlap_length_; + size_t max_lag_; + size_t expand_lags_[kNumLags]; + int lag_index_direction_; + int current_lag_index_; + bool stop_muting_; + size_t expand_duration_samples_; + std::unique_ptr<ChannelParameters[]> channel_parameters_; +}; + +struct ExpandFactory { + ExpandFactory() {} + virtual ~ExpandFactory() {} + + virtual Expand* Create(BackgroundNoise* background_noise, + SyncBuffer* sync_buffer, + RandomVector* random_vector, + StatisticsCalculator* statistics, + int fs, + size_t num_channels) const; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_EXPAND_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/expand_uma_logger.cc b/third_party/libwebrtc/modules/audio_coding/neteq/expand_uma_logger.cc new file mode 100644 index 0000000000..a91358b489 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/expand_uma_logger.cc @@ -0,0 +1,71 @@ +/* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/expand_uma_logger.h" + +#include "absl/strings/string_view.h" +#include "rtc_base/checks.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { +namespace { +std::unique_ptr<TickTimer::Countdown> GetNewCountdown( + const TickTimer& tick_timer, + int logging_period_s) { + return tick_timer.GetNewCountdown((logging_period_s * 1000) / + tick_timer.ms_per_tick()); +} +} // namespace + +ExpandUmaLogger::ExpandUmaLogger(absl::string_view uma_name, + int logging_period_s, + const TickTimer* tick_timer) + : uma_name_(uma_name), + logging_period_s_(logging_period_s), + tick_timer_(*tick_timer), + timer_(GetNewCountdown(tick_timer_, logging_period_s_)) { + RTC_DCHECK(tick_timer); + RTC_DCHECK_GT(logging_period_s_, 0); +} + +ExpandUmaLogger::~ExpandUmaLogger() = default; + +void ExpandUmaLogger::UpdateSampleCounter(uint64_t samples, + int sample_rate_hz) { + if ((last_logged_value_ && *last_logged_value_ > samples) || + sample_rate_hz_ != sample_rate_hz) { + // Sanity checks. The incremental counter moved backwards, or sample rate + // changed. + last_logged_value_.reset(); + } + last_value_ = samples; + sample_rate_hz_ = sample_rate_hz; + if (!last_logged_value_) { + last_logged_value_ = absl::optional<uint64_t>(samples); + } + + if (!timer_->Finished()) { + // Not yet time to log. + return; + } + + RTC_DCHECK(last_logged_value_); + RTC_DCHECK_GE(last_value_, *last_logged_value_); + const uint64_t diff = last_value_ - *last_logged_value_; + last_logged_value_ = absl::optional<uint64_t>(last_value_); + // Calculate rate in percent. + RTC_DCHECK_GT(sample_rate_hz, 0); + const int rate = (100 * diff) / (sample_rate_hz * logging_period_s_); + RTC_DCHECK_GE(rate, 0); + RTC_DCHECK_LE(rate, 100); + RTC_HISTOGRAM_PERCENTAGE_SPARSE(uma_name_, rate); + timer_ = GetNewCountdown(tick_timer_, logging_period_s_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/expand_uma_logger.h b/third_party/libwebrtc/modules/audio_coding/neteq/expand_uma_logger.h new file mode 100644 index 0000000000..cc5c20a886 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/expand_uma_logger.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_EXPAND_UMA_LOGGER_H_ +#define MODULES_AUDIO_CODING_NETEQ_EXPAND_UMA_LOGGER_H_ + +#include <stdint.h> + +#include <memory> +#include <string> + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "api/neteq/tick_timer.h" + +namespace webrtc { + +// This class is used to periodically log values to a UMA histogram. The caller +// is expected to update this class with an incremental sample counter which +// counts expand samples. At the end of each logging period, the class will +// calculate the fraction of samples that were expand samples during that period +// and report that in percent. The logging period must be strictly positive. +// Does not take ownership of tick_timer and the pointer must refer to a valid +// object that outlives the one constructed. +class ExpandUmaLogger { + public: + ExpandUmaLogger(absl::string_view uma_name, + int logging_period_s, + const TickTimer* tick_timer); + + ~ExpandUmaLogger(); + + ExpandUmaLogger(const ExpandUmaLogger&) = delete; + ExpandUmaLogger& operator=(const ExpandUmaLogger&) = delete; + + // In this call, value should be an incremental sample counter. The sample + // rate must be strictly positive. + void UpdateSampleCounter(uint64_t value, int sample_rate_hz); + + private: + const std::string uma_name_; + const int logging_period_s_; + const TickTimer& tick_timer_; + std::unique_ptr<TickTimer::Countdown> timer_; + absl::optional<uint64_t> last_logged_value_; + uint64_t last_value_ = 0; + int sample_rate_hz_ = 0; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_EXPAND_UMA_LOGGER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/expand_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/expand_unittest.cc new file mode 100644 index 0000000000..9355fce5e1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/expand_unittest.cc @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for Expand class. + +#include "modules/audio_coding/neteq/expand.h" + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_coding/neteq/background_noise.h" +#include "modules/audio_coding/neteq/random_vector.h" +#include "modules/audio_coding/neteq/statistics_calculator.h" +#include "modules/audio_coding/neteq/sync_buffer.h" +#include "modules/audio_coding/neteq/tools/resample_input_audio_file.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +TEST(Expand, CreateAndDestroy) { + int fs = 8000; + size_t channels = 1; + BackgroundNoise bgn(channels); + SyncBuffer sync_buffer(1, 1000); + RandomVector random_vector; + StatisticsCalculator statistics; + Expand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs, channels); +} + +TEST(Expand, CreateUsingFactory) { + int fs = 8000; + size_t channels = 1; + BackgroundNoise bgn(channels); + SyncBuffer sync_buffer(1, 1000); + RandomVector random_vector; + StatisticsCalculator statistics; + ExpandFactory expand_factory; + Expand* expand = expand_factory.Create(&bgn, &sync_buffer, &random_vector, + &statistics, fs, channels); + EXPECT_TRUE(expand != NULL); + delete expand; +} + +namespace { +class FakeStatisticsCalculator : public StatisticsCalculator { + public: + void LogDelayedPacketOutageEvent(int num_samples, int fs_hz) override { + last_outage_duration_samples_ = num_samples; + } + + int last_outage_duration_samples() const { + return last_outage_duration_samples_; + } + + private: + int last_outage_duration_samples_ = 0; +}; + +// This is the same size that is given to the SyncBuffer object in NetEq. +const size_t kNetEqSyncBufferLengthMs = 720; +} // namespace + +class ExpandTest : public ::testing::Test { + protected: + ExpandTest() + : input_file_(test::ResourcePath("audio_coding/testfile32kHz", "pcm"), + 32000), + test_sample_rate_hz_(32000), + num_channels_(1), + background_noise_(num_channels_), + sync_buffer_(num_channels_, + kNetEqSyncBufferLengthMs * test_sample_rate_hz_ / 1000), + expand_(&background_noise_, + &sync_buffer_, + &random_vector_, + &statistics_, + test_sample_rate_hz_, + num_channels_) { + input_file_.set_output_rate_hz(test_sample_rate_hz_); + } + + void SetUp() override { + // Fast-forward the input file until there is speech (about 1.1 second into + // the file). + const int speech_start_samples = + static_cast<int>(test_sample_rate_hz_ * 1.1f); + ASSERT_TRUE(input_file_.Seek(speech_start_samples)); + + // Pre-load the sync buffer with speech data. + std::unique_ptr<int16_t[]> temp(new int16_t[sync_buffer_.Size()]); + ASSERT_TRUE(input_file_.Read(sync_buffer_.Size(), temp.get())); + sync_buffer_.Channel(0).OverwriteAt(temp.get(), sync_buffer_.Size(), 0); + ASSERT_EQ(1u, num_channels_) << "Fix: Must populate all channels."; + } + + test::ResampleInputAudioFile input_file_; + int test_sample_rate_hz_; + size_t num_channels_; + BackgroundNoise background_noise_; + SyncBuffer sync_buffer_; + RandomVector random_vector_; + FakeStatisticsCalculator statistics_; + Expand expand_; +}; + +// This test calls the expand object to produce concealment data a few times, +// and then ends by calling SetParametersForNormalAfterExpand. This simulates +// the situation where the packet next up for decoding was just delayed, not +// lost. +TEST_F(ExpandTest, DelayedPacketOutage) { + AudioMultiVector output(num_channels_); + size_t sum_output_len_samples = 0; + for (int i = 0; i < 10; ++i) { + EXPECT_EQ(0, expand_.Process(&output)); + EXPECT_GT(output.Size(), 0u); + sum_output_len_samples += output.Size(); + EXPECT_EQ(0, statistics_.last_outage_duration_samples()); + } + expand_.SetParametersForNormalAfterExpand(); + // Convert `sum_output_len_samples` to milliseconds. + EXPECT_EQ(rtc::checked_cast<int>(sum_output_len_samples), + statistics_.last_outage_duration_samples()); +} + +// This test is similar to DelayedPacketOutage, but ends by calling +// SetParametersForMergeAfterExpand. This simulates the situation where the +// packet next up for decoding was actually lost (or at least a later packet +// arrived before it). +TEST_F(ExpandTest, LostPacketOutage) { + AudioMultiVector output(num_channels_); + for (int i = 0; i < 10; ++i) { + EXPECT_EQ(0, expand_.Process(&output)); + EXPECT_GT(output.Size(), 0u); + EXPECT_EQ(0, statistics_.last_outage_duration_samples()); + } + expand_.SetParametersForMergeAfterExpand(); + EXPECT_EQ(0, statistics_.last_outage_duration_samples()); +} + +// This test is similar to the DelayedPacketOutage test above, but with the +// difference that Expand::Reset() is called after 5 calls to Expand::Process(). +// This should reset the statistics, and will in the end lead to an outage of +// 5 periods instead of 10. +TEST_F(ExpandTest, CheckOutageStatsAfterReset) { + AudioMultiVector output(num_channels_); + size_t sum_output_len_samples = 0; + for (int i = 0; i < 10; ++i) { + EXPECT_EQ(0, expand_.Process(&output)); + EXPECT_GT(output.Size(), 0u); + sum_output_len_samples += output.Size(); + if (i == 5) { + expand_.Reset(); + sum_output_len_samples = 0; + } + EXPECT_EQ(0, statistics_.last_outage_duration_samples()); + } + expand_.SetParametersForNormalAfterExpand(); + // Convert `sum_output_len_samples` to milliseconds. + EXPECT_EQ(rtc::checked_cast<int>(sum_output_len_samples), + statistics_.last_outage_duration_samples()); +} + +namespace { +// Runs expand until Muted() returns true. Times out after 1000 calls. +void ExpandUntilMuted(size_t num_channels, Expand* expand) { + EXPECT_FALSE(expand->Muted()) << "Instance is muted from the start"; + AudioMultiVector output(num_channels); + int num_calls = 0; + while (!expand->Muted()) { + ASSERT_LT(num_calls++, 1000) << "Test timed out"; + EXPECT_EQ(0, expand->Process(&output)); + } +} +} // namespace + +// Verifies that Muted() returns true after a long expand period. Also verifies +// that Muted() is reset to false after calling Reset(), +// SetParametersForMergeAfterExpand() and SetParametersForNormalAfterExpand(). +TEST_F(ExpandTest, Muted) { + ExpandUntilMuted(num_channels_, &expand_); + expand_.Reset(); + EXPECT_FALSE(expand_.Muted()); // Should be back to unmuted. + + ExpandUntilMuted(num_channels_, &expand_); + expand_.SetParametersForMergeAfterExpand(); + EXPECT_FALSE(expand_.Muted()); // Should be back to unmuted. + + expand_.Reset(); // Must reset in order to start a new expand period. + ExpandUntilMuted(num_channels_, &expand_); + expand_.SetParametersForNormalAfterExpand(); + EXPECT_FALSE(expand_.Muted()); // Should be back to unmuted. +} + +// TODO(hlundin): Write more tests. + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/g3doc/index.md b/third_party/libwebrtc/modules/audio_coding/neteq/g3doc/index.md new file mode 100644 index 0000000000..e97324d89d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/g3doc/index.md @@ -0,0 +1,102 @@ +<?% config.freshness.reviewed = '2021-04-13' %?> +<?% config.freshness.owner = 'jakobi' %?> + +# NetEq + +NetEq is the audio jitter buffer and packet loss concealer. The jitter buffer is +an adaptive jitter buffer, meaning that the buffering delay is continuously +optimized based on the network conditions. Its main goal is to ensure a smooth +playout of incoming audio packets from the network with a low amount of audio +artifacts (alterations to the original content of the packets) while at the same +time keep the delay as low as possible. + +## API + +At a high level, the NetEq API has two main functions: +[`InsertPacket`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/neteq/neteq.h;l=198;drc=4461f059d180fe8c2886d422ebd1cb55b5c83e72) +and +[`GetAudio`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/neteq/neteq.h;l=219;drc=4461f059d180fe8c2886d422ebd1cb55b5c83e72). + +### InsertPacket + +[`InsertPacket`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/neteq/neteq.h;l=198;drc=4461f059d180fe8c2886d422ebd1cb55b5c83e72) +delivers an RTP packet from the network to NetEq where the following happens: + +1. The packet is discarded if it is too late for playout (for example if it was + reordered). Otherwize it is put into the packet buffer where it is stored + until it is time for playout. If the buffer is full, discard all the + existing packets (this should be rare). +2. The interarrival time between packets is analyzed and statistics is updated + which is used to derive a new target playout delay. The interarrival time is + measured in the number of GetAudio ‘ticks’ and thus clock drift between the + sender and receiver can be accounted for. + +### GetAudio + +[`GetAudio`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/neteq/neteq.h;l=219;drc=4461f059d180fe8c2886d422ebd1cb55b5c83e72) +pulls 10 ms of audio from NetEq for playout. A much simplified decision logic is +as follows: + +1. If there is 10 ms audio in the sync buffer then return that. +2. If the next packet is available (based on RTP timestamp) in the packet + buffer then decode it and append the result to the sync buffer. + 1. Compare the current delay estimate (filtered buffer level) with the + target delay and time stretch (accelerate or decelerate) the contents of + the sync buffer if the buffer level is too high or too low. + 2. Return 10 ms of audio from the sync buffer. +3. If the last decoded packet was a discontinuous transmission (DTX) packet + then generate comfort noise. +4. If there is no available packet for decoding due to the next packet having + not arrived or been lost then generate packet loss concealment by + extrapolating the remaining audio in the sync buffer or by asking the + decoder to produce it. + +In summary, the output is the result one of the following operations: + +* Normal: audio decoded from a packet. +* Acceleration: accelerated playout of a decoded packet. +* Preemptive expand: decelerated playout of a decoded packet. +* Expand: packet loss concealment generated by NetEq or the decoder. +* Merge: audio stitched together from packet loss concealment to decoded data + in case of a loss. +* Comfort noise (CNG): comfort noise generated by NetEq or the decoder between + talk spurts due to discontinuous transmission of packets (DTX). + +## Statistics + +There are a number of functions that can be used to query the internal state of +NetEq, statistics about the type of audio output and latency metrics such as how +long time packets have waited in the buffer. + +* [`NetworkStatistics`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/neteq/neteq.h;l=273;drc=4461f059d180fe8c2886d422ebd1cb55b5c83e72): + instantaneous values or stats averaged over the duration since last call to + this function. +* [`GetLifetimeStatistics`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/neteq/neteq.h;l=280;drc=4461f059d180fe8c2886d422ebd1cb55b5c83e72): + cumulative stats that persist over the lifetime of the class. +* [`GetOperationsAndState`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/neteq/neteq.h;l=284;drc=4461f059d180fe8c2886d422ebd1cb55b5c83e72): + information about the internal state of NetEq (is only inteded to be used + for testing and debugging). + +## Tests and tools + +* [`neteq_rtpplay`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_coding/neteq/tools/neteq_rtpplay.cc;drc=cee751abff598fc19506f77de08bea7c61b9dcca): + Simulate NetEq behavior based on either an RTP dump, a PCAP file or an RTC + event log. A replacement audio file can also be used instead of the original + payload. Outputs aggregated statistics and optionally an audio file to + listen to. +* [`neteq_speed_test`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_coding/neteq/test/neteq_speed_test.cc;drc=2ab97f6f8e27b47c0d9beeb8b6ca5387bda9f55c): + Measure performance of NetEq, used on perf bots. +* Unit tests including bit exactness tests where RTP file is used as an input + to NetEq, the output is concatenated and a checksum is calculated and + compared against a reference. + +## Other responsibilities + +* Dual-tone multi-frequency signaling (DTMF): receive telephone events and + produce dual tone waveforms. +* Forward error correction (RED or codec inband FEC): split inserted packets + and prioritize the payloads. +* NACK (negative acknowledgement): keep track of lost packets and generate a + list of packets to NACK. +* Audio/video sync: NetEq can be instructed to increase the latency in order + to keep audio and video in sync. diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/histogram.cc b/third_party/libwebrtc/modules/audio_coding/neteq/histogram.cc new file mode 100644 index 0000000000..e4b7f10379 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/histogram.cc @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/histogram.h" + +#include <algorithm> +#include <cstdlib> +#include <numeric> + +#include "absl/types/optional.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +Histogram::Histogram(size_t num_buckets, + int forget_factor, + absl::optional<double> start_forget_weight) + : buckets_(num_buckets, 0), + forget_factor_(0), + base_forget_factor_(forget_factor), + add_count_(0), + start_forget_weight_(start_forget_weight) { + RTC_DCHECK_LT(base_forget_factor_, 1 << 15); +} + +Histogram::~Histogram() {} + +// Each element in the vector is first multiplied by the forgetting factor +// `forget_factor_`. Then the vector element indicated by `iat_packets` is then +// increased (additive) by 1 - `forget_factor_`. This way, the probability of +// `value` is slightly increased, while the sum of the histogram remains +// constant (=1). +// Due to inaccuracies in the fixed-point arithmetic, the histogram may no +// longer sum up to 1 (in Q30) after the update. To correct this, a correction +// term is added or subtracted from the first element (or elements) of the +// vector. +// The forgetting factor `forget_factor_` is also updated. When the DelayManager +// is reset, the factor is set to 0 to facilitate rapid convergence in the +// beginning. With each update of the histogram, the factor is increased towards +// the steady-state value `base_forget_factor_`. +void Histogram::Add(int value) { + RTC_DCHECK(value >= 0); + RTC_DCHECK(value < static_cast<int>(buckets_.size())); + int vector_sum = 0; // Sum up the vector elements as they are processed. + // Multiply each element in `buckets_` with `forget_factor_`. + for (int& bucket : buckets_) { + bucket = (static_cast<int64_t>(bucket) * forget_factor_) >> 15; + vector_sum += bucket; + } + + // Increase the probability for the currently observed inter-arrival time + // by 1 - `forget_factor_`. The factor is in Q15, `buckets_` in Q30. + // Thus, left-shift 15 steps to obtain result in Q30. + buckets_[value] += (32768 - forget_factor_) << 15; + vector_sum += (32768 - forget_factor_) << 15; // Add to vector sum. + + // `buckets_` should sum up to 1 (in Q30), but it may not due to + // fixed-point rounding errors. + vector_sum -= 1 << 30; // Should be zero. Compensate if not. + if (vector_sum != 0) { + // Modify a few values early in `buckets_`. + int flip_sign = vector_sum > 0 ? -1 : 1; + for (int& bucket : buckets_) { + // Add/subtract 1/16 of the element, but not more than `vector_sum`. + int correction = flip_sign * std::min(std::abs(vector_sum), bucket >> 4); + bucket += correction; + vector_sum += correction; + if (std::abs(vector_sum) == 0) { + break; + } + } + } + RTC_DCHECK(vector_sum == 0); // Verify that the above is correct. + + ++add_count_; + + // Update `forget_factor_` (changes only during the first seconds after a + // reset). The factor converges to `base_forget_factor_`. + if (start_forget_weight_) { + if (forget_factor_ != base_forget_factor_) { + int old_forget_factor = forget_factor_; + int forget_factor = + (1 << 15) * (1 - start_forget_weight_.value() / (add_count_ + 1)); + forget_factor_ = + std::max(0, std::min(base_forget_factor_, forget_factor)); + // The histogram is updated recursively by forgetting the old histogram + // with `forget_factor_` and adding a new sample multiplied by |1 - + // forget_factor_|. We need to make sure that the effective weight on the + // new sample is no smaller than those on the old samples, i.e., to + // satisfy the following DCHECK. + RTC_DCHECK_GE((1 << 15) - forget_factor_, + ((1 << 15) - old_forget_factor) * forget_factor_ >> 15); + } + } else { + forget_factor_ += (base_forget_factor_ - forget_factor_ + 3) >> 2; + } +} + +int Histogram::Quantile(int probability) { + // Find the bucket for which the probability of observing an + // inter-arrival time larger than or equal to `index` is larger than or + // equal to `probability`. The sought probability is estimated using + // the histogram as the reverse cumulant PDF, i.e., the sum of elements from + // the end up until `index`. Now, since the sum of all elements is 1 + // (in Q30) by definition, and since the solution is often a low value for + // `iat_index`, it is more efficient to start with `sum` = 1 and subtract + // elements from the start of the histogram. + int inverse_probability = (1 << 30) - probability; + size_t index = 0; // Start from the beginning of `buckets_`. + int sum = 1 << 30; // Assign to 1 in Q30. + sum -= buckets_[index]; + + while ((sum > inverse_probability) && (index < buckets_.size() - 1)) { + // Subtract the probabilities one by one until the sum is no longer greater + // than `inverse_probability`. + ++index; + sum -= buckets_[index]; + } + return static_cast<int>(index); +} + +// Set the histogram vector to an exponentially decaying distribution +// buckets_[i] = 0.5^(i+1), i = 0, 1, 2, ... +// buckets_ is in Q30. +void Histogram::Reset() { + // Set temp_prob to (slightly more than) 1 in Q14. This ensures that the sum + // of buckets_ is 1. + uint16_t temp_prob = 0x4002; // 16384 + 2 = 100000000000010 binary. + for (int& bucket : buckets_) { + temp_prob >>= 1; + bucket = temp_prob << 16; + } + forget_factor_ = 0; // Adapt the histogram faster for the first few packets. + add_count_ = 0; +} + +int Histogram::NumBuckets() const { + return buckets_.size(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/histogram.h b/third_party/libwebrtc/modules/audio_coding/neteq/histogram.h new file mode 100644 index 0000000000..265a10e00a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/histogram.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_HISTOGRAM_H_ +#define MODULES_AUDIO_CODING_NETEQ_HISTOGRAM_H_ + +#include <string.h> // Provide access to size_t. + +#include <vector> + +#include "absl/types/optional.h" + +namespace webrtc { + +class Histogram { + public: + // Creates histogram with capacity `num_buckets` and `forget_factor` in Q15. + Histogram(size_t num_buckets, + int forget_factor, + absl::optional<double> start_forget_weight = absl::nullopt); + + virtual ~Histogram(); + + // Resets the histogram to the default start distribution. + virtual void Reset(); + + // Add entry in bucket `index`. + virtual void Add(int index); + + // Calculates the quantile at `probability` (in Q30) of the histogram + // distribution. + virtual int Quantile(int probability); + + // Returns the number of buckets in the histogram. + virtual int NumBuckets() const; + + // Returns the probability for each bucket in Q30. + const std::vector<int>& buckets() const { return buckets_; } + + // Accessors only intended for testing purposes. + int base_forget_factor_for_testing() const { return base_forget_factor_; } + int forget_factor_for_testing() const { return forget_factor_; } + absl::optional<double> start_forget_weight_for_testing() const { + return start_forget_weight_; + } + + private: + std::vector<int> buckets_; + int forget_factor_; // Q15 + const int base_forget_factor_; + int add_count_; + const absl::optional<double> start_forget_weight_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_NETEQ_HISTOGRAM_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/histogram_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/histogram_unittest.cc new file mode 100644 index 0000000000..e30a2956dc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/histogram_unittest.cc @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/histogram.h" + +#include <cmath> + +#include "test/gtest.h" + +namespace webrtc { + +TEST(HistogramTest, Initialization) { + Histogram histogram(65, 32440); + histogram.Reset(); + const auto& buckets = histogram.buckets(); + double sum = 0.0; + for (size_t i = 0; i < buckets.size(); i++) { + EXPECT_NEAR(ldexp(std::pow(0.5, static_cast<int>(i + 1)), 30), buckets[i], + 65537); + // Tolerance 65537 in Q30 corresponds to a delta of approximately 0.00006. + sum += buckets[i]; + } + EXPECT_EQ(1 << 30, static_cast<int>(sum)); // Should be 1 in Q30. +} + +TEST(HistogramTest, Add) { + Histogram histogram(10, 32440); + histogram.Reset(); + const std::vector<int> before = histogram.buckets(); + const int index = 5; + histogram.Add(index); + const std::vector<int> after = histogram.buckets(); + EXPECT_GT(after[index], before[index]); + int sum = 0; + for (int bucket : after) { + sum += bucket; + } + EXPECT_EQ(1 << 30, sum); +} + +TEST(HistogramTest, ForgetFactor) { + Histogram histogram(10, 32440); + histogram.Reset(); + const std::vector<int> before = histogram.buckets(); + const int index = 4; + histogram.Add(index); + const std::vector<int> after = histogram.buckets(); + for (int i = 0; i < histogram.NumBuckets(); ++i) { + if (i != index) { + EXPECT_LT(after[i], before[i]); + } + } +} + +TEST(HistogramTest, ReachSteadyStateForgetFactor) { + static constexpr int kSteadyStateForgetFactor = (1 << 15) * 0.9993; + Histogram histogram(100, kSteadyStateForgetFactor, 1.0); + histogram.Reset(); + int n = (1 << 15) / ((1 << 15) - kSteadyStateForgetFactor); + for (int i = 0; i < n; ++i) { + histogram.Add(0); + } + EXPECT_EQ(histogram.forget_factor_for_testing(), kSteadyStateForgetFactor); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/merge.cc b/third_party/libwebrtc/modules/audio_coding/neteq/merge.cc new file mode 100644 index 0000000000..0aec6d2597 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/merge.cc @@ -0,0 +1,391 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/merge.h" + +#include <string.h> // memmove, memcpy, memset, size_t + +#include <algorithm> // min, max +#include <memory> + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "modules/audio_coding/neteq/cross_correlation.h" +#include "modules/audio_coding/neteq/dsp_helper.h" +#include "modules/audio_coding/neteq/expand.h" +#include "modules/audio_coding/neteq/sync_buffer.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { + +Merge::Merge(int fs_hz, + size_t num_channels, + Expand* expand, + SyncBuffer* sync_buffer) + : fs_hz_(fs_hz), + num_channels_(num_channels), + fs_mult_(fs_hz_ / 8000), + timestamps_per_call_(static_cast<size_t>(fs_hz_ / 100)), + expand_(expand), + sync_buffer_(sync_buffer), + expanded_(num_channels_) { + RTC_DCHECK_GT(num_channels_, 0); +} + +Merge::~Merge() = default; + +size_t Merge::Process(int16_t* input, + size_t input_length, + AudioMultiVector* output) { + // TODO(hlundin): Change to an enumerator and skip assert. + RTC_DCHECK(fs_hz_ == 8000 || fs_hz_ == 16000 || fs_hz_ == 32000 || + fs_hz_ == 48000); + RTC_DCHECK_LE(fs_hz_, kMaxSampleRate); // Should not be possible. + if (input_length == 0) { + return 0; + } + + size_t old_length; + size_t expand_period; + // Get expansion data to overlap and mix with. + size_t expanded_length = GetExpandedSignal(&old_length, &expand_period); + + // Transfer input signal to an AudioMultiVector. + AudioMultiVector input_vector(num_channels_); + input_vector.PushBackInterleaved( + rtc::ArrayView<const int16_t>(input, input_length)); + size_t input_length_per_channel = input_vector.Size(); + RTC_DCHECK_EQ(input_length_per_channel, input_length / num_channels_); + + size_t best_correlation_index = 0; + size_t output_length = 0; + + std::unique_ptr<int16_t[]> input_channel( + new int16_t[input_length_per_channel]); + std::unique_ptr<int16_t[]> expanded_channel(new int16_t[expanded_length]); + for (size_t channel = 0; channel < num_channels_; ++channel) { + input_vector[channel].CopyTo(input_length_per_channel, 0, + input_channel.get()); + expanded_[channel].CopyTo(expanded_length, 0, expanded_channel.get()); + + const int16_t new_mute_factor = std::min<int16_t>( + 16384, SignalScaling(input_channel.get(), input_length_per_channel, + expanded_channel.get())); + + if (channel == 0) { + // Downsample, correlate, and find strongest correlation period for the + // reference (i.e., first) channel only. + // Downsample to 4kHz sample rate. + Downsample(input_channel.get(), input_length_per_channel, + expanded_channel.get(), expanded_length); + + // Calculate the lag of the strongest correlation period. + best_correlation_index = CorrelateAndPeakSearch( + old_length, input_length_per_channel, expand_period); + } + + temp_data_.resize(input_length_per_channel + best_correlation_index); + int16_t* decoded_output = temp_data_.data() + best_correlation_index; + + // Mute the new decoded data if needed (and unmute it linearly). + // This is the overlapping part of expanded_signal. + size_t interpolation_length = + std::min(kMaxCorrelationLength * fs_mult_, + expanded_length - best_correlation_index); + interpolation_length = + std::min(interpolation_length, input_length_per_channel); + + RTC_DCHECK_LE(new_mute_factor, 16384); + int16_t mute_factor = + std::max(expand_->MuteFactor(channel), new_mute_factor); + RTC_DCHECK_GE(mute_factor, 0); + + if (mute_factor < 16384) { + // Set a suitable muting slope (Q20). 0.004 for NB, 0.002 for WB, + // and so on, or as fast as it takes to come back to full gain within the + // frame length. + const int back_to_fullscale_inc = static_cast<int>( + ((16384 - mute_factor) << 6) / input_length_per_channel); + const int increment = std::max(4194 / fs_mult_, back_to_fullscale_inc); + mute_factor = static_cast<int16_t>(DspHelper::RampSignal( + input_channel.get(), interpolation_length, mute_factor, increment)); + DspHelper::UnmuteSignal(&input_channel[interpolation_length], + input_length_per_channel - interpolation_length, + &mute_factor, increment, + &decoded_output[interpolation_length]); + } else { + // No muting needed. + memmove( + &decoded_output[interpolation_length], + &input_channel[interpolation_length], + sizeof(int16_t) * (input_length_per_channel - interpolation_length)); + } + + // Do overlap and mix linearly. + int16_t increment = + static_cast<int16_t>(16384 / (interpolation_length + 1)); // In Q14. + int16_t local_mute_factor = 16384 - increment; + memmove(temp_data_.data(), expanded_channel.get(), + sizeof(int16_t) * best_correlation_index); + DspHelper::CrossFade(&expanded_channel[best_correlation_index], + input_channel.get(), interpolation_length, + &local_mute_factor, increment, decoded_output); + + output_length = best_correlation_index + input_length_per_channel; + if (channel == 0) { + RTC_DCHECK(output->Empty()); // Output should be empty at this point. + output->AssertSize(output_length); + } else { + RTC_DCHECK_EQ(output->Size(), output_length); + } + (*output)[channel].OverwriteAt(temp_data_.data(), output_length, 0); + } + + // Copy back the first part of the data to `sync_buffer_` and remove it from + // `output`. + sync_buffer_->ReplaceAtIndex(*output, old_length, sync_buffer_->next_index()); + output->PopFront(old_length); + + // Return new added length. `old_length` samples were borrowed from + // `sync_buffer_`. + RTC_DCHECK_GE(output_length, old_length); + return output_length - old_length; +} + +size_t Merge::GetExpandedSignal(size_t* old_length, size_t* expand_period) { + // Check how much data that is left since earlier. + *old_length = sync_buffer_->FutureLength(); + // Should never be less than overlap_length. + RTC_DCHECK_GE(*old_length, expand_->overlap_length()); + // Generate data to merge the overlap with using expand. + expand_->SetParametersForMergeAfterExpand(); + + if (*old_length >= 210 * kMaxSampleRate / 8000) { + // TODO(hlundin): Write test case for this. + // The number of samples available in the sync buffer is more than what fits + // in expanded_signal. Keep the first 210 * kMaxSampleRate / 8000 samples, + // but shift them towards the end of the buffer. This is ok, since all of + // the buffer will be expand data anyway, so as long as the beginning is + // left untouched, we're fine. + size_t length_diff = *old_length - 210 * kMaxSampleRate / 8000; + sync_buffer_->InsertZerosAtIndex(length_diff, sync_buffer_->next_index()); + *old_length = 210 * kMaxSampleRate / 8000; + // This is the truncated length. + } + // This assert should always be true thanks to the if statement above. + RTC_DCHECK_GE(210 * kMaxSampleRate / 8000, *old_length); + + AudioMultiVector expanded_temp(num_channels_); + expand_->Process(&expanded_temp); + *expand_period = expanded_temp.Size(); // Samples per channel. + + expanded_.Clear(); + // Copy what is left since earlier into the expanded vector. + expanded_.PushBackFromIndex(*sync_buffer_, sync_buffer_->next_index()); + RTC_DCHECK_EQ(expanded_.Size(), *old_length); + RTC_DCHECK_GT(expanded_temp.Size(), 0); + // Do "ugly" copy and paste from the expanded in order to generate more data + // to correlate (but not interpolate) with. + const size_t required_length = static_cast<size_t>((120 + 80 + 2) * fs_mult_); + if (expanded_.Size() < required_length) { + while (expanded_.Size() < required_length) { + // Append one more pitch period each time. + expanded_.PushBack(expanded_temp); + } + // Trim the length to exactly `required_length`. + expanded_.PopBack(expanded_.Size() - required_length); + } + RTC_DCHECK_GE(expanded_.Size(), required_length); + return required_length; +} + +int16_t Merge::SignalScaling(const int16_t* input, + size_t input_length, + const int16_t* expanded_signal) const { + // Adjust muting factor if new vector is more or less of the BGN energy. + const auto mod_input_length = rtc::SafeMin<size_t>( + 64 * rtc::dchecked_cast<size_t>(fs_mult_), input_length); + + // Missing input, do no muting + if (mod_input_length == 0) { + return 16384; + } + + const int16_t expanded_max = + WebRtcSpl_MaxAbsValueW16(expanded_signal, mod_input_length); + int32_t factor = + (expanded_max * expanded_max) / (std::numeric_limits<int32_t>::max() / + static_cast<int32_t>(mod_input_length)); + const int expanded_shift = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor); + int32_t energy_expanded = WebRtcSpl_DotProductWithScale( + expanded_signal, expanded_signal, mod_input_length, expanded_shift); + + // Calculate energy of input signal. + const int16_t input_max = WebRtcSpl_MaxAbsValueW16(input, mod_input_length); + factor = (input_max * input_max) / (std::numeric_limits<int32_t>::max() / + static_cast<int32_t>(mod_input_length)); + const int input_shift = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor); + int32_t energy_input = WebRtcSpl_DotProductWithScale( + input, input, mod_input_length, input_shift); + + // Align to the same Q-domain. + if (input_shift > expanded_shift) { + energy_expanded = energy_expanded >> (input_shift - expanded_shift); + } else { + energy_input = energy_input >> (expanded_shift - input_shift); + } + + // Calculate muting factor to use for new frame. + int16_t mute_factor; + if (energy_input > energy_expanded) { + // Normalize `energy_input` to 14 bits. + int16_t temp_shift = WebRtcSpl_NormW32(energy_input) - 17; + energy_input = WEBRTC_SPL_SHIFT_W32(energy_input, temp_shift); + // Put `energy_expanded` in a domain 14 higher, so that + // energy_expanded / energy_input is in Q14. + energy_expanded = WEBRTC_SPL_SHIFT_W32(energy_expanded, temp_shift + 14); + // Calculate sqrt(energy_expanded / energy_input) in Q14. + mute_factor = static_cast<int16_t>( + WebRtcSpl_SqrtFloor((energy_expanded / energy_input) << 14)); + } else { + // Set to 1 (in Q14) when `expanded` has higher energy than `input`. + mute_factor = 16384; + } + + return mute_factor; +} + +// TODO(hlundin): There are some parameter values in this method that seem +// strange. Compare with Expand::Correlation. +void Merge::Downsample(const int16_t* input, + size_t input_length, + const int16_t* expanded_signal, + size_t expanded_length) { + const int16_t* filter_coefficients; + size_t num_coefficients; + int decimation_factor = fs_hz_ / 4000; + static const size_t kCompensateDelay = 0; + size_t length_limit = static_cast<size_t>(fs_hz_ / 100); // 10 ms in samples. + if (fs_hz_ == 8000) { + filter_coefficients = DspHelper::kDownsample8kHzTbl; + num_coefficients = 3; + } else if (fs_hz_ == 16000) { + filter_coefficients = DspHelper::kDownsample16kHzTbl; + num_coefficients = 5; + } else if (fs_hz_ == 32000) { + filter_coefficients = DspHelper::kDownsample32kHzTbl; + num_coefficients = 7; + } else { // fs_hz_ == 48000 + filter_coefficients = DspHelper::kDownsample48kHzTbl; + num_coefficients = 7; + } + size_t signal_offset = num_coefficients - 1; + WebRtcSpl_DownsampleFast( + &expanded_signal[signal_offset], expanded_length - signal_offset, + expanded_downsampled_, kExpandDownsampLength, filter_coefficients, + num_coefficients, decimation_factor, kCompensateDelay); + if (input_length <= length_limit) { + // Not quite long enough, so we have to cheat a bit. + // If the input is shorter than the offset, we consider the input to be 0 + // length. This will cause us to skip the downsampling since it makes no + // sense anyway, and input_downsampled_ will be filled with zeros. This is + // clearly a pathological case, and the signal quality will suffer, but + // there is not much we can do. + const size_t temp_len = + input_length > signal_offset ? input_length - signal_offset : 0; + // TODO(hlundin): Should `downsamp_temp_len` be corrected for round-off + // errors? I.e., (temp_len + decimation_factor - 1) / decimation_factor? + size_t downsamp_temp_len = temp_len / decimation_factor; + if (downsamp_temp_len > 0) { + WebRtcSpl_DownsampleFast(&input[signal_offset], temp_len, + input_downsampled_, downsamp_temp_len, + filter_coefficients, num_coefficients, + decimation_factor, kCompensateDelay); + } + memset(&input_downsampled_[downsamp_temp_len], 0, + sizeof(int16_t) * (kInputDownsampLength - downsamp_temp_len)); + } else { + WebRtcSpl_DownsampleFast( + &input[signal_offset], input_length - signal_offset, input_downsampled_, + kInputDownsampLength, filter_coefficients, num_coefficients, + decimation_factor, kCompensateDelay); + } +} + +size_t Merge::CorrelateAndPeakSearch(size_t start_position, + size_t input_length, + size_t expand_period) const { + // Calculate correlation without any normalization. + const size_t max_corr_length = kMaxCorrelationLength; + size_t stop_position_downsamp = + std::min(max_corr_length, expand_->max_lag() / (fs_mult_ * 2) + 1); + + int32_t correlation[kMaxCorrelationLength]; + CrossCorrelationWithAutoShift(input_downsampled_, expanded_downsampled_, + kInputDownsampLength, stop_position_downsamp, 1, + correlation); + + // Normalize correlation to 14 bits and copy to a 16-bit array. + const size_t pad_length = expand_->overlap_length() - 1; + const size_t correlation_buffer_size = 2 * pad_length + kMaxCorrelationLength; + std::unique_ptr<int16_t[]> correlation16( + new int16_t[correlation_buffer_size]); + memset(correlation16.get(), 0, correlation_buffer_size * sizeof(int16_t)); + int16_t* correlation_ptr = &correlation16[pad_length]; + int32_t max_correlation = + WebRtcSpl_MaxAbsValueW32(correlation, stop_position_downsamp); + int norm_shift = std::max(0, 17 - WebRtcSpl_NormW32(max_correlation)); + WebRtcSpl_VectorBitShiftW32ToW16(correlation_ptr, stop_position_downsamp, + correlation, norm_shift); + + // Calculate allowed starting point for peak finding. + // The peak location bestIndex must fulfill two criteria: + // (1) w16_bestIndex + input_length < + // timestamps_per_call_ + expand_->overlap_length(); + // (2) w16_bestIndex + input_length < start_position. + size_t start_index = timestamps_per_call_ + expand_->overlap_length(); + start_index = std::max(start_position, start_index); + start_index = (input_length > start_index) ? 0 : (start_index - input_length); + // Downscale starting index to 4kHz domain. (fs_mult_ * 2 = fs_hz_ / 4000.) + size_t start_index_downsamp = start_index / (fs_mult_ * 2); + + // Calculate a modified `stop_position_downsamp` to account for the increased + // start index `start_index_downsamp` and the effective array length. + size_t modified_stop_pos = + std::min(stop_position_downsamp, + kMaxCorrelationLength + pad_length - start_index_downsamp); + size_t best_correlation_index; + int16_t best_correlation; + static const size_t kNumCorrelationCandidates = 1; + DspHelper::PeakDetection(&correlation_ptr[start_index_downsamp], + modified_stop_pos, kNumCorrelationCandidates, + fs_mult_, &best_correlation_index, + &best_correlation); + // Compensate for modified start index. + best_correlation_index += start_index; + + // Ensure that underrun does not occur for 10ms case => we have to get at + // least 10ms + overlap . (This should never happen thanks to the above + // modification of peak-finding starting point.) + while (((best_correlation_index + input_length) < + (timestamps_per_call_ + expand_->overlap_length())) || + ((best_correlation_index + input_length) < start_position)) { + RTC_DCHECK_NOTREACHED(); // Should never happen. + best_correlation_index += expand_period; // Jump one lag ahead. + } + return best_correlation_index; +} + +size_t Merge::RequiredFutureSamples() { + return fs_hz_ / 100 * num_channels_; // 10 ms. +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/merge.h b/third_party/libwebrtc/modules/audio_coding/neteq/merge.h new file mode 100644 index 0000000000..2f27106bfe --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/merge.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MERGE_H_ +#define MODULES_AUDIO_CODING_NETEQ_MERGE_H_ + +#include "modules/audio_coding/neteq/audio_multi_vector.h" + +namespace webrtc { + +// Forward declarations. +class Expand; +class SyncBuffer; + +// This class handles the transition from expansion to normal operation. +// When a packet is not available for decoding when needed, the expand operation +// is called to generate extrapolation data. If the missing packet arrives, +// i.e., it was just delayed, it can be decoded and appended directly to the +// end of the expanded data (thanks to how the Expand class operates). However, +// if a later packet arrives instead, the loss is a fact, and the new data must +// be stitched together with the end of the expanded data. This stitching is +// what the Merge class does. +class Merge { + public: + Merge(int fs_hz, + size_t num_channels, + Expand* expand, + SyncBuffer* sync_buffer); + virtual ~Merge(); + + Merge(const Merge&) = delete; + Merge& operator=(const Merge&) = delete; + + // The main method to produce the audio data. The decoded data is supplied in + // `input`, having `input_length` samples in total for all channels + // (interleaved). The result is written to `output`. The number of channels + // allocated in `output` defines the number of channels that will be used when + // de-interleaving `input`. + virtual size_t Process(int16_t* input, + size_t input_length, + AudioMultiVector* output); + + virtual size_t RequiredFutureSamples(); + + protected: + const int fs_hz_; + const size_t num_channels_; + + private: + static const int kMaxSampleRate = 48000; + static const size_t kExpandDownsampLength = 100; + static const size_t kInputDownsampLength = 40; + static const size_t kMaxCorrelationLength = 60; + + // Calls `expand_` to get more expansion data to merge with. The data is + // written to `expanded_signal_`. Returns the length of the expanded data, + // while `expand_period` will be the number of samples in one expansion period + // (typically one pitch period). The value of `old_length` will be the number + // of samples that were taken from the `sync_buffer_`. + size_t GetExpandedSignal(size_t* old_length, size_t* expand_period); + + // Analyzes `input` and `expanded_signal` and returns muting factor (Q14) to + // be used on the new data. + int16_t SignalScaling(const int16_t* input, + size_t input_length, + const int16_t* expanded_signal) const; + + // Downsamples `input` (`input_length` samples) and `expanded_signal` to + // 4 kHz sample rate. The downsampled signals are written to + // `input_downsampled_` and `expanded_downsampled_`, respectively. + void Downsample(const int16_t* input, + size_t input_length, + const int16_t* expanded_signal, + size_t expanded_length); + + // Calculates cross-correlation between `input_downsampled_` and + // `expanded_downsampled_`, and finds the correlation maximum. The maximizing + // lag is returned. + size_t CorrelateAndPeakSearch(size_t start_position, + size_t input_length, + size_t expand_period) const; + + const int fs_mult_; // fs_hz_ / 8000. + const size_t timestamps_per_call_; + Expand* expand_; + SyncBuffer* sync_buffer_; + int16_t expanded_downsampled_[kExpandDownsampLength]; + int16_t input_downsampled_[kInputDownsampLength]; + AudioMultiVector expanded_; + std::vector<int16_t> temp_data_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MERGE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/merge_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/merge_unittest.cc new file mode 100644 index 0000000000..d5a55eb056 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/merge_unittest.cc @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for Merge class. + +#include "modules/audio_coding/neteq/merge.h" + +#include <algorithm> +#include <vector> + +#include "modules/audio_coding/neteq/background_noise.h" +#include "modules/audio_coding/neteq/expand.h" +#include "modules/audio_coding/neteq/random_vector.h" +#include "modules/audio_coding/neteq/statistics_calculator.h" +#include "modules/audio_coding/neteq/sync_buffer.h" +#include "modules/audio_coding/neteq/tools/resample_input_audio_file.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +TEST(Merge, CreateAndDestroy) { + int fs = 8000; + size_t channels = 1; + BackgroundNoise bgn(channels); + SyncBuffer sync_buffer(1, 1000); + RandomVector random_vector; + StatisticsCalculator statistics; + Expand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs, channels); + Merge merge(fs, channels, &expand, &sync_buffer); +} + +namespace { +// This is the same size that is given to the SyncBuffer object in NetEq. +const size_t kNetEqSyncBufferLengthMs = 720; +} // namespace + +class MergeTest : public testing::TestWithParam<size_t> { + protected: + MergeTest() + : input_file_(test::ResourcePath("audio_coding/testfile32kHz", "pcm"), + 32000), + test_sample_rate_hz_(8000), + num_channels_(1), + background_noise_(num_channels_), + sync_buffer_(num_channels_, + kNetEqSyncBufferLengthMs * test_sample_rate_hz_ / 1000), + expand_(&background_noise_, + &sync_buffer_, + &random_vector_, + &statistics_, + test_sample_rate_hz_, + num_channels_), + merge_(test_sample_rate_hz_, num_channels_, &expand_, &sync_buffer_) { + input_file_.set_output_rate_hz(test_sample_rate_hz_); + } + + void SetUp() override { + // Fast-forward the input file until there is speech (about 1.1 second into + // the file). + const int speech_start_samples = + static_cast<int>(test_sample_rate_hz_ * 1.1f); + ASSERT_TRUE(input_file_.Seek(speech_start_samples)); + + // Pre-load the sync buffer with speech data. + std::unique_ptr<int16_t[]> temp(new int16_t[sync_buffer_.Size()]); + ASSERT_TRUE(input_file_.Read(sync_buffer_.Size(), temp.get())); + sync_buffer_.Channel(0).OverwriteAt(temp.get(), sync_buffer_.Size(), 0); + // Move index such that the sync buffer appears to have 5 ms left to play. + sync_buffer_.set_next_index(sync_buffer_.next_index() - + test_sample_rate_hz_ * 5 / 1000); + ASSERT_EQ(1u, num_channels_) << "Fix: Must populate all channels."; + ASSERT_GT(sync_buffer_.FutureLength(), 0u); + } + + test::ResampleInputAudioFile input_file_; + int test_sample_rate_hz_; + size_t num_channels_; + BackgroundNoise background_noise_; + SyncBuffer sync_buffer_; + RandomVector random_vector_; + StatisticsCalculator statistics_; + Expand expand_; + Merge merge_; +}; + +TEST_P(MergeTest, Process) { + AudioMultiVector output(num_channels_); + // Start by calling Expand once, to prime the state. + EXPECT_EQ(0, expand_.Process(&output)); + EXPECT_GT(output.Size(), 0u); + output.Clear(); + // Now call Merge, but with a very short decoded input. Try different length + // if the input. + const size_t input_len = GetParam(); + std::vector<int16_t> input(input_len, 17); + merge_.Process(input.data(), input_len, &output); + EXPECT_GT(output.Size(), 0u); +} + +// Instantiate with values for the input length that are interesting in +// Merge::Downsample. Why are these values interesting? +// - In 8000 Hz sample rate, signal_offset in Merge::Downsample will be 2, so +// the values 1, 2, 3 are just around that value. +// - Also in 8000 Hz, the variable length_limit in the same method will be 80, +// so values 80 and 81 will be on either side of the branch point +// "input_length <= length_limit". +// - Finally, 160 is simply 20 ms in 8000 Hz, which is a common packet size. +INSTANTIATE_TEST_SUITE_P(DifferentInputLengths, + MergeTest, + testing::Values(1, 2, 3, 80, 81, 160)); +// TODO(hlundin): Write more tests. + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_buffer_level_filter.h b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_buffer_level_filter.h new file mode 100644 index 0000000000..503f6ac6bd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_buffer_level_filter.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_BUFFER_LEVEL_FILTER_H_ +#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_BUFFER_LEVEL_FILTER_H_ + +#include "modules/audio_coding/neteq/buffer_level_filter.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockBufferLevelFilter : public BufferLevelFilter { + public: + MOCK_METHOD(void, + Update, + (size_t buffer_size_samples, int time_stretched_samples)); + MOCK_METHOD(int, filtered_current_level, (), (const)); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_BUFFER_LEVEL_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_decoder_database.h b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_decoder_database.h new file mode 100644 index 0000000000..2394120e99 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_decoder_database.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DECODER_DATABASE_H_ +#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DECODER_DATABASE_H_ + +#include <string> + +#include "modules/audio_coding/neteq/decoder_database.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockDecoderDatabase : public DecoderDatabase { + public: + explicit MockDecoderDatabase( + rtc::scoped_refptr<AudioDecoderFactory> factory = nullptr) + : DecoderDatabase(factory, absl::nullopt) {} + ~MockDecoderDatabase() override { Die(); } + MOCK_METHOD(void, Die, ()); + MOCK_METHOD(bool, Empty, (), (const, override)); + MOCK_METHOD(int, Size, (), (const, override)); + MOCK_METHOD(int, + RegisterPayload, + (int rtp_payload_type, const SdpAudioFormat& audio_format), + (override)); + MOCK_METHOD(int, Remove, (uint8_t rtp_payload_type), (override)); + MOCK_METHOD(void, RemoveAll, (), (override)); + MOCK_METHOD(const DecoderInfo*, + GetDecoderInfo, + (uint8_t rtp_payload_type), + (const, override)); + MOCK_METHOD(int, + SetActiveDecoder, + (uint8_t rtp_payload_type, bool* new_decoder), + (override)); + MOCK_METHOD(AudioDecoder*, GetActiveDecoder, (), (const, override)); + MOCK_METHOD(int, SetActiveCngDecoder, (uint8_t rtp_payload_type), (override)); + MOCK_METHOD(ComfortNoiseDecoder*, GetActiveCngDecoder, (), (const, override)); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DECODER_DATABASE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_delay_manager.h b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_delay_manager.h new file mode 100644 index 0000000000..d783f8743b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_delay_manager.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DELAY_MANAGER_H_ +#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DELAY_MANAGER_H_ + +#include "api/neteq/tick_timer.h" +#include "modules/audio_coding/neteq/delay_manager.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockDelayManager : public DelayManager { + public: + MockDelayManager(const MockDelayManager::Config& config, + const TickTimer* tick_timer) + : DelayManager(config, tick_timer) {} + MOCK_METHOD(int, TargetDelayMs, (), (const)); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DELAY_MANAGER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_dtmf_buffer.h b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_dtmf_buffer.h new file mode 100644 index 0000000000..c60c56d36b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_dtmf_buffer.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_BUFFER_H_ +#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_BUFFER_H_ + +#include "modules/audio_coding/neteq/dtmf_buffer.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockDtmfBuffer : public DtmfBuffer { + public: + MockDtmfBuffer(int fs) : DtmfBuffer(fs) {} + ~MockDtmfBuffer() override { Die(); } + MOCK_METHOD(void, Die, ()); + MOCK_METHOD(void, Flush, (), (override)); + MOCK_METHOD(int, InsertEvent, (const DtmfEvent& event), (override)); + MOCK_METHOD(bool, + GetEvent, + (uint32_t current_timestamp, DtmfEvent* event), + (override)); + MOCK_METHOD(size_t, Length, (), (const, override)); + MOCK_METHOD(bool, Empty, (), (const, override)); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_dtmf_tone_generator.h b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_dtmf_tone_generator.h new file mode 100644 index 0000000000..60de167c29 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_dtmf_tone_generator.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_TONE_GENERATOR_H_ +#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_TONE_GENERATOR_H_ + +#include "modules/audio_coding/neteq/dtmf_tone_generator.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockDtmfToneGenerator : public DtmfToneGenerator { + public: + ~MockDtmfToneGenerator() override { Die(); } + MOCK_METHOD(void, Die, ()); + MOCK_METHOD(int, Init, (int fs, int event, int attenuation), (override)); + MOCK_METHOD(void, Reset, (), (override)); + MOCK_METHOD(int, + Generate, + (size_t num_samples, AudioMultiVector* output), + (override)); + MOCK_METHOD(bool, initialized, (), (const, override)); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_TONE_GENERATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_expand.h b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_expand.h new file mode 100644 index 0000000000..9d66779021 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_expand.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_EXPAND_H_ +#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_EXPAND_H_ + +#include "modules/audio_coding/neteq/expand.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockExpand : public Expand { + public: + MockExpand(BackgroundNoise* background_noise, + SyncBuffer* sync_buffer, + RandomVector* random_vector, + StatisticsCalculator* statistics, + int fs, + size_t num_channels) + : Expand(background_noise, + sync_buffer, + random_vector, + statistics, + fs, + num_channels) {} + ~MockExpand() override { Die(); } + MOCK_METHOD(void, Die, ()); + MOCK_METHOD(void, Reset, (), (override)); + MOCK_METHOD(int, Process, (AudioMultiVector * output), (override)); + MOCK_METHOD(void, SetParametersForNormalAfterExpand, (), (override)); + MOCK_METHOD(void, SetParametersForMergeAfterExpand, (), (override)); + MOCK_METHOD(size_t, overlap_length, (), (const, override)); +}; + +} // namespace webrtc + +namespace webrtc { + +class MockExpandFactory : public ExpandFactory { + public: + MOCK_METHOD(Expand*, + Create, + (BackgroundNoise * background_noise, + SyncBuffer* sync_buffer, + RandomVector* random_vector, + StatisticsCalculator* statistics, + int fs, + size_t num_channels), + (const, override)); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_EXPAND_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_histogram.h b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_histogram.h new file mode 100644 index 0000000000..03abbc1d4b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_histogram.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_HISTOGRAM_H_ +#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_HISTOGRAM_H_ + +#include "modules/audio_coding/neteq/histogram.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockHistogram : public Histogram { + public: + MockHistogram(size_t num_buckets, int forget_factor) + : Histogram(num_buckets, forget_factor) {} + virtual ~MockHistogram() {} + + MOCK_METHOD(void, Add, (int), (override)); + MOCK_METHOD(int, Quantile, (int), (override)); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_HISTOGRAM_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_neteq_controller.h b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_neteq_controller.h new file mode 100644 index 0000000000..6d88e09216 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_neteq_controller.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_NETEQ_CONTROLLER_H_ +#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_NETEQ_CONTROLLER_H_ + +#include "api/neteq/neteq_controller.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockNetEqController : public NetEqController { + public: + MockNetEqController() = default; + ~MockNetEqController() override { Die(); } + MOCK_METHOD(void, Die, ()); + MOCK_METHOD(void, Reset, (), (override)); + MOCK_METHOD(void, SoftReset, (), (override)); + MOCK_METHOD(NetEq::Operation, + GetDecision, + (const NetEqStatus& neteq_status, bool* reset_decoder), + (override)); + MOCK_METHOD(void, RegisterEmptyPacket, (), (override)); + MOCK_METHOD(void, + SetSampleRate, + (int fs_hz, size_t output_size_samples), + (override)); + MOCK_METHOD(bool, SetMaximumDelay, (int delay_ms), (override)); + MOCK_METHOD(bool, SetMinimumDelay, (int delay_ms), (override)); + MOCK_METHOD(bool, SetBaseMinimumDelay, (int delay_ms), (override)); + MOCK_METHOD(int, GetBaseMinimumDelay, (), (const, override)); + MOCK_METHOD(bool, CngRfc3389On, (), (const, override)); + MOCK_METHOD(bool, CngOff, (), (const, override)); + MOCK_METHOD(void, SetCngOff, (), (override)); + MOCK_METHOD(void, ExpandDecision, (NetEq::Operation operation), (override)); + MOCK_METHOD(void, AddSampleMemory, (int32_t value), (override)); + MOCK_METHOD(int, TargetLevelMs, (), (const, override)); + MOCK_METHOD(absl::optional<int>, + PacketArrived, + (int fs_hz, + bool should_update_stats, + const PacketArrivedInfo& info), + (override)); + MOCK_METHOD(void, NotifyMutedState, (), (override)); + MOCK_METHOD(bool, PeakFound, (), (const, override)); + MOCK_METHOD(int, GetFilteredBufferLevel, (), (const, override)); + MOCK_METHOD(void, set_sample_memory, (int32_t value), (override)); + MOCK_METHOD(size_t, noise_fast_forward, (), (const, override)); + MOCK_METHOD(size_t, packet_length_samples, (), (const, override)); + MOCK_METHOD(void, set_packet_length_samples, (size_t value), (override)); + MOCK_METHOD(void, set_prev_time_scale, (bool value), (override)); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_NETEQ_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_packet_buffer.h b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_packet_buffer.h new file mode 100644 index 0000000000..48357ea466 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_packet_buffer.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_PACKET_BUFFER_H_ +#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_PACKET_BUFFER_H_ + +#include "modules/audio_coding/neteq/packet_buffer.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockPacketBuffer : public PacketBuffer { + public: + MockPacketBuffer(size_t max_number_of_packets, const TickTimer* tick_timer) + : PacketBuffer(max_number_of_packets, tick_timer) {} + ~MockPacketBuffer() override { Die(); } + MOCK_METHOD(void, Die, ()); + MOCK_METHOD(void, Flush, (StatisticsCalculator * stats), (override)); + MOCK_METHOD(void, + PartialFlush, + (int target_level_ms, + size_t sample_rate, + size_t last_decoded_length, + StatisticsCalculator* stats), + (override)); + MOCK_METHOD(bool, Empty, (), (const, override)); + MOCK_METHOD(int, + InsertPacket, + (Packet && packet, + StatisticsCalculator* stats, + size_t last_decoded_length, + size_t sample_rate, + int target_level_ms, + const DecoderDatabase& decoder_database), + (override)); + MOCK_METHOD(int, + InsertPacketList, + (PacketList * packet_list, + const DecoderDatabase& decoder_database, + absl::optional<uint8_t>* current_rtp_payload_type, + absl::optional<uint8_t>* current_cng_rtp_payload_type, + StatisticsCalculator* stats, + size_t last_decoded_length, + size_t sample_rate, + int target_level_ms), + (override)); + MOCK_METHOD(int, + NextTimestamp, + (uint32_t * next_timestamp), + (const, override)); + MOCK_METHOD(int, + NextHigherTimestamp, + (uint32_t timestamp, uint32_t* next_timestamp), + (const, override)); + MOCK_METHOD(const Packet*, PeekNextPacket, (), (const, override)); + MOCK_METHOD(absl::optional<Packet>, GetNextPacket, (), (override)); + MOCK_METHOD(int, + DiscardNextPacket, + (StatisticsCalculator * stats), + (override)); + MOCK_METHOD(void, + DiscardOldPackets, + (uint32_t timestamp_limit, + uint32_t horizon_samples, + StatisticsCalculator* stats), + (override)); + MOCK_METHOD(void, + DiscardAllOldPackets, + (uint32_t timestamp_limit, StatisticsCalculator* stats), + (override)); + MOCK_METHOD(size_t, NumPacketsInBuffer, (), (const, override)); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_PACKET_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_red_payload_splitter.h b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_red_payload_splitter.h new file mode 100644 index 0000000000..9daf571a80 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_red_payload_splitter.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_RED_PAYLOAD_SPLITTER_H_ +#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_RED_PAYLOAD_SPLITTER_H_ + +#include "modules/audio_coding/neteq/red_payload_splitter.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockRedPayloadSplitter : public RedPayloadSplitter { + public: + MOCK_METHOD(bool, SplitRed, (PacketList * packet_list), (override)); + MOCK_METHOD(void, + CheckRedPayloads, + (PacketList * packet_list, + const DecoderDatabase& decoder_database), + (override)); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_RED_PAYLOAD_SPLITTER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_statistics_calculator.h b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_statistics_calculator.h new file mode 100644 index 0000000000..f8812478d6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_statistics_calculator.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_STATISTICS_CALCULATOR_H_ +#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_STATISTICS_CALCULATOR_H_ + +#include "modules/audio_coding/neteq/statistics_calculator.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockStatisticsCalculator : public StatisticsCalculator { + public: + MOCK_METHOD(void, PacketsDiscarded, (size_t num_packets), (override)); + MOCK_METHOD(void, + SecondaryPacketsDiscarded, + (size_t num_packets), + (override)); + MOCK_METHOD(void, RelativePacketArrivalDelay, (size_t delay_ms), (override)); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_STATISTICS_CALCULATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/nack_tracker.cc b/third_party/libwebrtc/modules/audio_coding/neteq/nack_tracker.cc new file mode 100644 index 0000000000..04cc5b52e8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/nack_tracker.cc @@ -0,0 +1,267 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/nack_tracker.h" + +#include <cstdint> +#include <utility> + +#include "rtc_base/checks.h" +#include "rtc_base/experiments/struct_parameters_parser.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { +namespace { + +const int kDefaultSampleRateKhz = 48; +const int kMaxPacketSizeMs = 120; +constexpr char kNackTrackerConfigFieldTrial[] = + "WebRTC-Audio-NetEqNackTrackerConfig"; + +} // namespace + +NackTracker::Config::Config() { + auto parser = StructParametersParser::Create( + "packet_loss_forget_factor", &packet_loss_forget_factor, + "ms_per_loss_percent", &ms_per_loss_percent, "never_nack_multiple_times", + &never_nack_multiple_times, "require_valid_rtt", &require_valid_rtt, + "max_loss_rate", &max_loss_rate); + parser->Parse( + webrtc::field_trial::FindFullName(kNackTrackerConfigFieldTrial)); + RTC_LOG(LS_INFO) << "Nack tracker config:" + " packet_loss_forget_factor=" + << packet_loss_forget_factor + << " ms_per_loss_percent=" << ms_per_loss_percent + << " never_nack_multiple_times=" << never_nack_multiple_times + << " require_valid_rtt=" << require_valid_rtt + << " max_loss_rate=" << max_loss_rate; +} + +NackTracker::NackTracker() + : sequence_num_last_received_rtp_(0), + timestamp_last_received_rtp_(0), + any_rtp_received_(false), + sequence_num_last_decoded_rtp_(0), + timestamp_last_decoded_rtp_(0), + any_rtp_decoded_(false), + sample_rate_khz_(kDefaultSampleRateKhz), + max_nack_list_size_(kNackListSizeLimit) {} + +NackTracker::~NackTracker() = default; + +void NackTracker::UpdateSampleRate(int sample_rate_hz) { + RTC_DCHECK_GT(sample_rate_hz, 0); + sample_rate_khz_ = sample_rate_hz / 1000; +} + +void NackTracker::UpdateLastReceivedPacket(uint16_t sequence_number, + uint32_t timestamp) { + // Just record the value of sequence number and timestamp if this is the + // first packet. + if (!any_rtp_received_) { + sequence_num_last_received_rtp_ = sequence_number; + timestamp_last_received_rtp_ = timestamp; + any_rtp_received_ = true; + // If no packet is decoded, to have a reasonable estimate of time-to-play + // use the given values. + if (!any_rtp_decoded_) { + sequence_num_last_decoded_rtp_ = sequence_number; + timestamp_last_decoded_rtp_ = timestamp; + } + return; + } + + if (sequence_number == sequence_num_last_received_rtp_) + return; + + // Received RTP should not be in the list. + nack_list_.erase(sequence_number); + + // If this is an old sequence number, no more action is required, return. + if (IsNewerSequenceNumber(sequence_num_last_received_rtp_, sequence_number)) + return; + + UpdatePacketLossRate(sequence_number - sequence_num_last_received_rtp_ - 1); + + UpdateList(sequence_number, timestamp); + + sequence_num_last_received_rtp_ = sequence_number; + timestamp_last_received_rtp_ = timestamp; + LimitNackListSize(); +} + +absl::optional<int> NackTracker::GetSamplesPerPacket( + uint16_t sequence_number_current_received_rtp, + uint32_t timestamp_current_received_rtp) const { + uint32_t timestamp_increase = + timestamp_current_received_rtp - timestamp_last_received_rtp_; + uint16_t sequence_num_increase = + sequence_number_current_received_rtp - sequence_num_last_received_rtp_; + + int samples_per_packet = timestamp_increase / sequence_num_increase; + if (samples_per_packet == 0 || + samples_per_packet > kMaxPacketSizeMs * sample_rate_khz_) { + // Not a valid samples per packet. + return absl::nullopt; + } + return samples_per_packet; +} + +void NackTracker::UpdateList(uint16_t sequence_number_current_received_rtp, + uint32_t timestamp_current_received_rtp) { + if (!IsNewerSequenceNumber(sequence_number_current_received_rtp, + sequence_num_last_received_rtp_ + 1)) { + return; + } + RTC_DCHECK(!any_rtp_decoded_ || + IsNewerSequenceNumber(sequence_number_current_received_rtp, + sequence_num_last_decoded_rtp_)); + + absl::optional<int> samples_per_packet = GetSamplesPerPacket( + sequence_number_current_received_rtp, timestamp_current_received_rtp); + if (!samples_per_packet) { + return; + } + + for (uint16_t n = sequence_num_last_received_rtp_ + 1; + IsNewerSequenceNumber(sequence_number_current_received_rtp, n); ++n) { + uint32_t timestamp = EstimateTimestamp(n, *samples_per_packet); + NackElement nack_element(TimeToPlay(timestamp), timestamp); + nack_list_.insert(nack_list_.end(), std::make_pair(n, nack_element)); + } +} + +uint32_t NackTracker::EstimateTimestamp(uint16_t sequence_num, + int samples_per_packet) { + uint16_t sequence_num_diff = sequence_num - sequence_num_last_received_rtp_; + return sequence_num_diff * samples_per_packet + timestamp_last_received_rtp_; +} + +void NackTracker::UpdateEstimatedPlayoutTimeBy10ms() { + while (!nack_list_.empty() && + nack_list_.begin()->second.time_to_play_ms <= 10) + nack_list_.erase(nack_list_.begin()); + + for (NackList::iterator it = nack_list_.begin(); it != nack_list_.end(); ++it) + it->second.time_to_play_ms -= 10; +} + +void NackTracker::UpdateLastDecodedPacket(uint16_t sequence_number, + uint32_t timestamp) { + if (IsNewerSequenceNumber(sequence_number, sequence_num_last_decoded_rtp_) || + !any_rtp_decoded_) { + sequence_num_last_decoded_rtp_ = sequence_number; + timestamp_last_decoded_rtp_ = timestamp; + // Packets in the list with sequence numbers less than the + // sequence number of the decoded RTP should be removed from the lists. + // They will be discarded by the jitter buffer if they arrive. + nack_list_.erase(nack_list_.begin(), + nack_list_.upper_bound(sequence_num_last_decoded_rtp_)); + + // Update estimated time-to-play. + for (NackList::iterator it = nack_list_.begin(); it != nack_list_.end(); + ++it) + it->second.time_to_play_ms = TimeToPlay(it->second.estimated_timestamp); + } else { + RTC_DCHECK_EQ(sequence_number, sequence_num_last_decoded_rtp_); + + // Same sequence number as before. 10 ms is elapsed, update estimations for + // time-to-play. + UpdateEstimatedPlayoutTimeBy10ms(); + + // Update timestamp for better estimate of time-to-play, for packets which + // are added to NACK list later on. + timestamp_last_decoded_rtp_ += sample_rate_khz_ * 10; + } + any_rtp_decoded_ = true; +} + +NackTracker::NackList NackTracker::GetNackList() const { + return nack_list_; +} + +void NackTracker::Reset() { + nack_list_.clear(); + + sequence_num_last_received_rtp_ = 0; + timestamp_last_received_rtp_ = 0; + any_rtp_received_ = false; + sequence_num_last_decoded_rtp_ = 0; + timestamp_last_decoded_rtp_ = 0; + any_rtp_decoded_ = false; + sample_rate_khz_ = kDefaultSampleRateKhz; +} + +void NackTracker::SetMaxNackListSize(size_t max_nack_list_size) { + RTC_CHECK_GT(max_nack_list_size, 0); + // Ugly hack to get around the problem of passing static consts by reference. + const size_t kNackListSizeLimitLocal = NackTracker::kNackListSizeLimit; + RTC_CHECK_LE(max_nack_list_size, kNackListSizeLimitLocal); + + max_nack_list_size_ = max_nack_list_size; + LimitNackListSize(); +} + +void NackTracker::LimitNackListSize() { + uint16_t limit = sequence_num_last_received_rtp_ - + static_cast<uint16_t>(max_nack_list_size_) - 1; + nack_list_.erase(nack_list_.begin(), nack_list_.upper_bound(limit)); +} + +int64_t NackTracker::TimeToPlay(uint32_t timestamp) const { + uint32_t timestamp_increase = timestamp - timestamp_last_decoded_rtp_; + return timestamp_increase / sample_rate_khz_; +} + +// We don't erase elements with time-to-play shorter than round-trip-time. +std::vector<uint16_t> NackTracker::GetNackList(int64_t round_trip_time_ms) { + RTC_DCHECK_GE(round_trip_time_ms, 0); + std::vector<uint16_t> sequence_numbers; + if (round_trip_time_ms == 0) { + if (config_.require_valid_rtt) { + return sequence_numbers; + } else { + round_trip_time_ms = config_.default_rtt_ms; + } + } + if (packet_loss_rate_ > + static_cast<uint32_t>(config_.max_loss_rate * (1 << 30))) { + return sequence_numbers; + } + // The estimated packet loss is between 0 and 1, so we need to multiply by 100 + // here. + int max_wait_ms = + 100.0 * config_.ms_per_loss_percent * packet_loss_rate_ / (1 << 30); + for (NackList::const_iterator it = nack_list_.begin(); it != nack_list_.end(); + ++it) { + int64_t time_since_packet_ms = + (timestamp_last_received_rtp_ - it->second.estimated_timestamp) / + sample_rate_khz_; + if (it->second.time_to_play_ms > round_trip_time_ms || + time_since_packet_ms + round_trip_time_ms < max_wait_ms) + sequence_numbers.push_back(it->first); + } + if (config_.never_nack_multiple_times) { + nack_list_.clear(); + } + return sequence_numbers; +} + +void NackTracker::UpdatePacketLossRate(int packets_lost) { + const uint64_t alpha_q30 = (1 << 30) * config_.packet_loss_forget_factor; + // Exponential filter. + packet_loss_rate_ = (alpha_q30 * packet_loss_rate_) >> 30; + for (int i = 0; i < packets_lost; ++i) { + packet_loss_rate_ = + ((alpha_q30 * packet_loss_rate_) >> 30) + ((1 << 30) - alpha_q30); + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/nack_tracker.h b/third_party/libwebrtc/modules/audio_coding/neteq/nack_tracker.h new file mode 100644 index 0000000000..14ba2166d1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/nack_tracker.h @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_NACK_TRACKER_H_ +#define MODULES_AUDIO_CODING_NETEQ_NACK_TRACKER_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <map> +#include <vector> + +#include "absl/types/optional.h" +#include "modules/include/module_common_types_public.h" +#include "rtc_base/gtest_prod_util.h" + +// +// The NackTracker class keeps track of the lost packets, an estimate of +// time-to-play for each packet is also given. +// +// Every time a packet is pushed into NetEq, LastReceivedPacket() has to be +// called to update the NACK list. +// +// Every time 10ms audio is pulled from NetEq LastDecodedPacket() should be +// called, and time-to-play is updated at that moment. +// +// If packet N is received, any packet prior to N which has not arrived is +// considered lost, and should be labeled as "missing" (the size of +// the list might be limited and older packet eliminated from the list). +// +// The NackTracker class has to know about the sample rate of the packets to +// compute time-to-play. So sample rate should be set as soon as the first +// packet is received. If there is a change in the receive codec (sender changes +// codec) then NackTracker should be reset. This is because NetEQ would flush +// its buffer and re-transmission is meaning less for old packet. Therefore, in +// that case, after reset the sampling rate has to be updated. +// +// Thread Safety +// ============= +// Please note that this class in not thread safe. The class must be protected +// if different APIs are called from different threads. +// +namespace webrtc { + +class NackTracker { + public: + // A limit for the size of the NACK list. + static const size_t kNackListSizeLimit = 500; // 10 seconds for 20 ms frame + // packets. + NackTracker(); + ~NackTracker(); + + // Set a maximum for the size of the NACK list. If the last received packet + // has sequence number of N, then NACK list will not contain any element + // with sequence number earlier than N - `max_nack_list_size`. + // + // The largest maximum size is defined by `kNackListSizeLimit` + void SetMaxNackListSize(size_t max_nack_list_size); + + // Set the sampling rate. + // + // If associated sampling rate of the received packets is changed, call this + // function to update sampling rate. Note that if there is any change in + // received codec then NetEq will flush its buffer and NACK has to be reset. + // After Reset() is called sampling rate has to be set. + void UpdateSampleRate(int sample_rate_hz); + + // Update the sequence number and the timestamp of the last decoded RTP. This + // API should be called every time 10 ms audio is pulled from NetEq. + void UpdateLastDecodedPacket(uint16_t sequence_number, uint32_t timestamp); + + // Update the sequence number and the timestamp of the last received RTP. This + // API should be called every time a packet pushed into ACM. + void UpdateLastReceivedPacket(uint16_t sequence_number, uint32_t timestamp); + + // Get a list of "missing" packets which have expected time-to-play larger + // than the given round-trip-time (in milliseconds). + // Note: Late packets are not included. + // Calling this method multiple times may give different results, since the + // internal nack list may get flushed if never_nack_multiple_times_ is true. + std::vector<uint16_t> GetNackList(int64_t round_trip_time_ms); + + // Reset to default values. The NACK list is cleared. + // `max_nack_list_size_` preserves its value. + void Reset(); + + // Returns the estimated packet loss rate in Q30, for testing only. + uint32_t GetPacketLossRateForTest() { return packet_loss_rate_; } + + private: + // This test need to access the private method GetNackList(). + FRIEND_TEST_ALL_PREFIXES(NackTrackerTest, EstimateTimestampAndTimeToPlay); + + // Options that can be configured via field trial. + struct Config { + Config(); + + // The exponential decay factor used to estimate the packet loss rate. + double packet_loss_forget_factor = 0.996; + // How many additional ms we are willing to wait (at most) for nacked + // packets for each additional percentage of packet loss. + int ms_per_loss_percent = 20; + // If true, never nack packets more than once. + bool never_nack_multiple_times = false; + // Only nack if the RTT is valid. + bool require_valid_rtt = false; + // Default RTT to use unless `require_valid_rtt` is set. + int default_rtt_ms = 100; + // Do not nack if the loss rate is above this value. + double max_loss_rate = 1.0; + }; + + struct NackElement { + NackElement(int64_t initial_time_to_play_ms, uint32_t initial_timestamp) + : time_to_play_ms(initial_time_to_play_ms), + estimated_timestamp(initial_timestamp) {} + + // Estimated time (ms) left for this packet to be decoded. This estimate is + // updated every time jitter buffer decodes a packet. + int64_t time_to_play_ms; + + // A guess about the timestamp of the missing packet, it is used for + // estimation of `time_to_play_ms`. The estimate might be slightly wrong if + // there has been frame-size change since the last received packet and the + // missing packet. However, the risk of this is low, and in case of such + // errors, there will be a minor misestimation in time-to-play of missing + // packets. This will have a very minor effect on NACK performance. + uint32_t estimated_timestamp; + }; + + class NackListCompare { + public: + bool operator()(uint16_t sequence_number_old, + uint16_t sequence_number_new) const { + return IsNewerSequenceNumber(sequence_number_new, sequence_number_old); + } + }; + + typedef std::map<uint16_t, NackElement, NackListCompare> NackList; + + // This API is used only for testing to assess whether time-to-play is + // computed correctly. + NackList GetNackList() const; + + // This function subtracts 10 ms of time-to-play for all packets in NACK list. + // This is called when 10 ms elapsed with no new RTP packet decoded. + void UpdateEstimatedPlayoutTimeBy10ms(); + + // Returns a valid number of samples per packet given the current received + // sequence number and timestamp or nullopt of none could be computed. + absl::optional<int> GetSamplesPerPacket( + uint16_t sequence_number_current_received_rtp, + uint32_t timestamp_current_received_rtp) const; + + // Given the `sequence_number_current_received_rtp` of currently received RTP + // update the list. Packets that are older than the received packet are added + // to the nack list. + void UpdateList(uint16_t sequence_number_current_received_rtp, + uint32_t timestamp_current_received_rtp); + + // Packets which have sequence number older that + // `sequence_num_last_received_rtp_` - `max_nack_list_size_` are removed + // from the NACK list. + void LimitNackListSize(); + + // Estimate timestamp of a missing packet given its sequence number. + uint32_t EstimateTimestamp(uint16_t sequence_number, int samples_per_packet); + + // Compute time-to-play given a timestamp. + int64_t TimeToPlay(uint32_t timestamp) const; + + // Updates the estimated packet lost rate. + void UpdatePacketLossRate(int packets_lost); + + const Config config_; + + // Valid if a packet is received. + uint16_t sequence_num_last_received_rtp_; + uint32_t timestamp_last_received_rtp_; + bool any_rtp_received_; // If any packet received. + + // Valid if a packet is decoded. + uint16_t sequence_num_last_decoded_rtp_; + uint32_t timestamp_last_decoded_rtp_; + bool any_rtp_decoded_; // If any packet decoded. + + int sample_rate_khz_; // Sample rate in kHz. + + // A list of missing packets to be retransmitted. Components of the list + // contain the sequence number of missing packets and the estimated time that + // each pack is going to be played out. + NackList nack_list_; + + // NACK list will not keep track of missing packets prior to + // `sequence_num_last_received_rtp_` - `max_nack_list_size_`. + size_t max_nack_list_size_; + + // Current estimate of the packet loss rate in Q30. + uint32_t packet_loss_rate_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_NETEQ_NACK_TRACKER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/nack_tracker_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/nack_tracker_unittest.cc new file mode 100644 index 0000000000..bcc5120ff3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/nack_tracker_unittest.cc @@ -0,0 +1,565 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/nack_tracker.h" + +#include <stdint.h> + +#include <algorithm> +#include <memory> + +#include "modules/audio_coding/include/audio_coding_module_typedefs.h" +#include "test/field_trial.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +const int kSampleRateHz = 16000; +const int kPacketSizeMs = 30; +const uint32_t kTimestampIncrement = 480; // 30 ms. +const int64_t kShortRoundTripTimeMs = 1; + +bool IsNackListCorrect(const std::vector<uint16_t>& nack_list, + const uint16_t* lost_sequence_numbers, + size_t num_lost_packets) { + if (nack_list.size() != num_lost_packets) + return false; + + if (num_lost_packets == 0) + return true; + + for (size_t k = 0; k < nack_list.size(); ++k) { + int seq_num = nack_list[k]; + bool seq_num_matched = false; + for (size_t n = 0; n < num_lost_packets; ++n) { + if (seq_num == lost_sequence_numbers[n]) { + seq_num_matched = true; + break; + } + } + if (!seq_num_matched) + return false; + } + return true; +} + +} // namespace + +TEST(NackTrackerTest, EmptyListWhenNoPacketLoss) { + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + + int seq_num = 1; + uint32_t timestamp = 0; + + std::vector<uint16_t> nack_list; + for (int n = 0; n < 100; n++) { + nack.UpdateLastReceivedPacket(seq_num, timestamp); + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + seq_num++; + timestamp += kTimestampIncrement; + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(nack_list.empty()); + } +} + +TEST(NackTrackerTest, LatePacketsMovedToNackThenNackListDoesNotChange) { + const uint16_t kSequenceNumberLostPackets[] = {2, 3, 4, 5, 6, 7, 8, 9}; + static const int kNumAllLostPackets = sizeof(kSequenceNumberLostPackets) / + sizeof(kSequenceNumberLostPackets[0]); + + for (int k = 0; k < 2; k++) { // Two iteration with/without wrap around. + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + + uint16_t sequence_num_lost_packets[kNumAllLostPackets]; + for (int n = 0; n < kNumAllLostPackets; n++) { + sequence_num_lost_packets[n] = + kSequenceNumberLostPackets[n] + + k * 65531; // Have wrap around in sequence numbers for |k == 1|. + } + uint16_t seq_num = sequence_num_lost_packets[0] - 1; + + uint32_t timestamp = 0; + std::vector<uint16_t> nack_list; + + nack.UpdateLastReceivedPacket(seq_num, timestamp); + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(nack_list.empty()); + + seq_num = sequence_num_lost_packets[kNumAllLostPackets - 1] + 1; + timestamp += kTimestampIncrement * (kNumAllLostPackets + 1); + int num_lost_packets = std::max(0, kNumAllLostPackets); + + nack.UpdateLastReceivedPacket(seq_num, timestamp); + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(IsNackListCorrect(nack_list, sequence_num_lost_packets, + num_lost_packets)); + seq_num++; + timestamp += kTimestampIncrement; + num_lost_packets++; + + for (int n = 0; n < 100; ++n) { + nack.UpdateLastReceivedPacket(seq_num, timestamp); + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(IsNackListCorrect(nack_list, sequence_num_lost_packets, + kNumAllLostPackets)); + seq_num++; + timestamp += kTimestampIncrement; + } + } +} + +TEST(NackTrackerTest, ArrivedPacketsAreRemovedFromNackList) { + const uint16_t kSequenceNumberLostPackets[] = {2, 3, 4, 5, 6, 7, 8, 9}; + static const int kNumAllLostPackets = sizeof(kSequenceNumberLostPackets) / + sizeof(kSequenceNumberLostPackets[0]); + + for (int k = 0; k < 2; ++k) { // Two iteration with/without wrap around. + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + + uint16_t sequence_num_lost_packets[kNumAllLostPackets]; + for (int n = 0; n < kNumAllLostPackets; ++n) { + sequence_num_lost_packets[n] = kSequenceNumberLostPackets[n] + + k * 65531; // Wrap around for |k == 1|. + } + + uint16_t seq_num = sequence_num_lost_packets[0] - 1; + uint32_t timestamp = 0; + + nack.UpdateLastReceivedPacket(seq_num, timestamp); + std::vector<uint16_t> nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(nack_list.empty()); + + size_t index_retransmitted_rtp = 0; + uint32_t timestamp_retransmitted_rtp = timestamp + kTimestampIncrement; + + seq_num = sequence_num_lost_packets[kNumAllLostPackets - 1] + 1; + timestamp += kTimestampIncrement * (kNumAllLostPackets + 1); + size_t num_lost_packets = kNumAllLostPackets; + for (int n = 0; n < kNumAllLostPackets; ++n) { + // Number of lost packets does not change for the first + // |kNackThreshold + 1| packets, one is added to the list and one is + // removed. Thereafter, the list shrinks every iteration. + if (n >= 1) + num_lost_packets--; + + nack.UpdateLastReceivedPacket(seq_num, timestamp); + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(IsNackListCorrect( + nack_list, &sequence_num_lost_packets[index_retransmitted_rtp], + num_lost_packets)); + seq_num++; + timestamp += kTimestampIncrement; + + // Retransmission of a lost RTP. + nack.UpdateLastReceivedPacket( + sequence_num_lost_packets[index_retransmitted_rtp], + timestamp_retransmitted_rtp); + index_retransmitted_rtp++; + timestamp_retransmitted_rtp += kTimestampIncrement; + + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(IsNackListCorrect( + nack_list, &sequence_num_lost_packets[index_retransmitted_rtp], + num_lost_packets - 1)); // One less lost packet in the list. + } + ASSERT_TRUE(nack_list.empty()); + } +} + +// Assess if estimation of timestamps and time-to-play is correct. Introduce all +// combinations that timestamps and sequence numbers might have wrap around. +TEST(NackTrackerTest, EstimateTimestampAndTimeToPlay) { + const uint16_t kLostPackets[] = {2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15}; + static const int kNumAllLostPackets = + sizeof(kLostPackets) / sizeof(kLostPackets[0]); + + for (int k = 0; k < 4; ++k) { + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + + // Sequence number wrap around if `k` is 2 or 3; + int seq_num_offset = (k < 2) ? 0 : 65531; + + // Timestamp wrap around if `k` is 1 or 3. + uint32_t timestamp_offset = + (k & 0x1) ? static_cast<uint32_t>(0xffffffff) - 6 : 0; + + uint32_t timestamp_lost_packets[kNumAllLostPackets]; + uint16_t seq_num_lost_packets[kNumAllLostPackets]; + for (int n = 0; n < kNumAllLostPackets; ++n) { + timestamp_lost_packets[n] = + timestamp_offset + kLostPackets[n] * kTimestampIncrement; + seq_num_lost_packets[n] = seq_num_offset + kLostPackets[n]; + } + + // We and to push two packets before lost burst starts. + uint16_t seq_num = seq_num_lost_packets[0] - 2; + uint32_t timestamp = timestamp_lost_packets[0] - 2 * kTimestampIncrement; + + const uint16_t first_seq_num = seq_num; + const uint32_t first_timestamp = timestamp; + + // Two consecutive packets to have a correct estimate of timestamp increase. + nack.UpdateLastReceivedPacket(seq_num, timestamp); + seq_num++; + timestamp += kTimestampIncrement; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + + // A packet after the last one which is supposed to be lost. + seq_num = seq_num_lost_packets[kNumAllLostPackets - 1] + 1; + timestamp = + timestamp_lost_packets[kNumAllLostPackets - 1] + kTimestampIncrement; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + + NackTracker::NackList nack_list = nack.GetNackList(); + EXPECT_EQ(static_cast<size_t>(kNumAllLostPackets), nack_list.size()); + + // Pretend the first packet is decoded. + nack.UpdateLastDecodedPacket(first_seq_num, first_timestamp); + nack_list = nack.GetNackList(); + + NackTracker::NackList::iterator it = nack_list.begin(); + while (it != nack_list.end()) { + seq_num = it->first - seq_num_offset; + int index = seq_num - kLostPackets[0]; + EXPECT_EQ(timestamp_lost_packets[index], it->second.estimated_timestamp); + EXPECT_EQ((index + 2) * kPacketSizeMs, it->second.time_to_play_ms); + ++it; + } + + // Pretend 10 ms is passed, and we had pulled audio from NetEq, it still + // reports the same sequence number as decoded, time-to-play should be + // updated by 10 ms. + nack.UpdateLastDecodedPacket(first_seq_num, first_timestamp); + nack_list = nack.GetNackList(); + it = nack_list.begin(); + while (it != nack_list.end()) { + seq_num = it->first - seq_num_offset; + int index = seq_num - kLostPackets[0]; + EXPECT_EQ((index + 2) * kPacketSizeMs - 10, it->second.time_to_play_ms); + ++it; + } + } +} + +TEST(NackTrackerTest, + MissingPacketsPriorToLastDecodedRtpShouldNotBeInNackList) { + for (int m = 0; m < 2; ++m) { + uint16_t seq_num_offset = (m == 0) ? 0 : 65531; // Wrap around if `m` is 1. + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + + // Two consecutive packets to have a correct estimate of timestamp increase. + uint16_t seq_num = 0; + nack.UpdateLastReceivedPacket(seq_num_offset + seq_num, + seq_num * kTimestampIncrement); + seq_num++; + nack.UpdateLastReceivedPacket(seq_num_offset + seq_num, + seq_num * kTimestampIncrement); + + // Skip 10 packets (larger than NACK threshold). + const int kNumLostPackets = 10; + seq_num += kNumLostPackets + 1; + nack.UpdateLastReceivedPacket(seq_num_offset + seq_num, + seq_num * kTimestampIncrement); + + const size_t kExpectedListSize = kNumLostPackets; + std::vector<uint16_t> nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_EQ(kExpectedListSize, nack_list.size()); + + for (int k = 0; k < 2; ++k) { + // Decoding of the first and the second arrived packets. + for (int n = 0; n < kPacketSizeMs / 10; ++n) { + nack.UpdateLastDecodedPacket(seq_num_offset + k, + k * kTimestampIncrement); + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_EQ(kExpectedListSize, nack_list.size()); + } + } + + // Decoding of the last received packet. + nack.UpdateLastDecodedPacket(seq_num + seq_num_offset, + seq_num * kTimestampIncrement); + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(nack_list.empty()); + + // Make sure list of late packets is also empty. To check that, push few + // packets, if the late list is not empty its content will pop up in NACK + // list. + for (int n = 0; n < 10; ++n) { + seq_num++; + nack.UpdateLastReceivedPacket(seq_num_offset + seq_num, + seq_num * kTimestampIncrement); + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(nack_list.empty()); + } + } +} + +TEST(NackTrackerTest, Reset) { + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + + // Two consecutive packets to have a correct estimate of timestamp increase. + uint16_t seq_num = 0; + nack.UpdateLastReceivedPacket(seq_num, seq_num * kTimestampIncrement); + seq_num++; + nack.UpdateLastReceivedPacket(seq_num, seq_num * kTimestampIncrement); + + // Skip 10 packets (larger than NACK threshold). + const int kNumLostPackets = 10; + seq_num += kNumLostPackets + 1; + nack.UpdateLastReceivedPacket(seq_num, seq_num * kTimestampIncrement); + + const size_t kExpectedListSize = kNumLostPackets; + std::vector<uint16_t> nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_EQ(kExpectedListSize, nack_list.size()); + + nack.Reset(); + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(nack_list.empty()); +} + +TEST(NackTrackerTest, ListSizeAppliedFromBeginning) { + const size_t kNackListSize = 10; + for (int m = 0; m < 2; ++m) { + uint16_t seq_num_offset = (m == 0) ? 0 : 65525; // Wrap around if `m` is 1. + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + nack.SetMaxNackListSize(kNackListSize); + + uint16_t seq_num = seq_num_offset; + uint32_t timestamp = 0x12345678; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + + // Packet lost more than NACK-list size limit. + uint16_t num_lost_packets = kNackListSize + 5; + + seq_num += num_lost_packets + 1; + timestamp += (num_lost_packets + 1) * kTimestampIncrement; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + + std::vector<uint16_t> nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_EQ(kNackListSize, nack_list.size()); + } +} + +TEST(NackTrackerTest, ChangeOfListSizeAppliedAndOldElementsRemoved) { + const size_t kNackListSize = 10; + for (int m = 0; m < 2; ++m) { + uint16_t seq_num_offset = (m == 0) ? 0 : 65525; // Wrap around if `m` is 1. + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + + uint16_t seq_num = seq_num_offset; + uint32_t timestamp = 0x87654321; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + + // Packet lost more than NACK-list size limit. + uint16_t num_lost_packets = kNackListSize + 5; + + std::unique_ptr<uint16_t[]> seq_num_lost(new uint16_t[num_lost_packets]); + for (int n = 0; n < num_lost_packets; ++n) { + seq_num_lost[n] = ++seq_num; + } + + ++seq_num; + timestamp += (num_lost_packets + 1) * kTimestampIncrement; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + size_t expected_size = num_lost_packets; + + std::vector<uint16_t> nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_EQ(expected_size, nack_list.size()); + + nack.SetMaxNackListSize(kNackListSize); + expected_size = kNackListSize; + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(IsNackListCorrect( + nack_list, &seq_num_lost[num_lost_packets - kNackListSize], + expected_size)); + + // NACK list should shrink. + for (size_t n = 1; n < kNackListSize; ++n) { + ++seq_num; + timestamp += kTimestampIncrement; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + --expected_size; + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(IsNackListCorrect( + nack_list, &seq_num_lost[num_lost_packets - kNackListSize + n], + expected_size)); + } + + // After this packet, NACK list should be empty. + ++seq_num; + timestamp += kTimestampIncrement; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + nack_list = nack.GetNackList(kShortRoundTripTimeMs); + EXPECT_TRUE(nack_list.empty()); + } +} + +TEST(NackTrackerTest, RoudTripTimeIsApplied) { + const int kNackListSize = 200; + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + nack.SetMaxNackListSize(kNackListSize); + + uint16_t seq_num = 0; + uint32_t timestamp = 0x87654321; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + + // Packet lost more than NACK-list size limit. + uint16_t kNumLostPackets = 5; + + seq_num += (1 + kNumLostPackets); + timestamp += (1 + kNumLostPackets) * kTimestampIncrement; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + + // Expected time-to-play are: + // kPacketSizeMs - 10, 2*kPacketSizeMs - 10, 3*kPacketSizeMs - 10, ... + // + // sequence number: 1, 2, 3, 4, 5 + // time-to-play: 20, 50, 80, 110, 140 + // + std::vector<uint16_t> nack_list = nack.GetNackList(100); + ASSERT_EQ(2u, nack_list.size()); + EXPECT_EQ(4, nack_list[0]); + EXPECT_EQ(5, nack_list[1]); +} + +// Set never_nack_multiple_times to true with a field trial and verify that +// packets are not nacked multiple times. +TEST(NackTrackerTest, DoNotNackMultipleTimes) { + test::ScopedFieldTrials field_trials( + "WebRTC-Audio-NetEqNackTrackerConfig/" + "packet_loss_forget_factor:0.996,ms_per_loss_percent:20," + "never_nack_multiple_times:true/"); + const int kNackListSize = 200; + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + nack.SetMaxNackListSize(kNackListSize); + + uint16_t seq_num = 0; + uint32_t timestamp = 0x87654321; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + + uint16_t kNumLostPackets = 3; + + seq_num += (1 + kNumLostPackets); + timestamp += (1 + kNumLostPackets) * kTimestampIncrement; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + + std::vector<uint16_t> nack_list = nack.GetNackList(10); + ASSERT_EQ(3u, nack_list.size()); + EXPECT_EQ(1, nack_list[0]); + EXPECT_EQ(2, nack_list[1]); + EXPECT_EQ(3, nack_list[2]); + // When we get the nack list again, it should be empty. + std::vector<uint16_t> nack_list2 = nack.GetNackList(10); + EXPECT_TRUE(nack_list2.empty()); +} + +// Test if estimated packet loss rate is correct. +TEST(NackTrackerTest, PacketLossRateCorrect) { + const int kNackListSize = 200; + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + nack.SetMaxNackListSize(kNackListSize); + uint16_t seq_num = 0; + uint32_t timestamp = 0x87654321; + auto add_packet = [&nack, &seq_num, ×tamp](bool received) { + if (received) { + nack.UpdateLastReceivedPacket(seq_num, timestamp); + } + seq_num++; + timestamp += kTimestampIncrement; + }; + // Add some packets, but every fourth packet is lost. + for (int i = 0; i < 300; i++) { + add_packet(true); + add_packet(true); + add_packet(true); + add_packet(false); + } + // 1 << 28 is 0.25 in Q30. We expect the packet loss estimate to be within + // 0.01 of that. + EXPECT_NEAR(nack.GetPacketLossRateForTest(), 1 << 28, (1 << 30) / 100); +} + +TEST(NackTrackerTest, DoNotNackAfterDtx) { + const int kNackListSize = 200; + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + nack.SetMaxNackListSize(kNackListSize); + uint16_t seq_num = 0; + uint32_t timestamp = 0x87654321; + nack.UpdateLastReceivedPacket(seq_num, timestamp); + EXPECT_TRUE(nack.GetNackList(0).empty()); + constexpr int kDtxPeriod = 400; + nack.UpdateLastReceivedPacket(seq_num + 2, + timestamp + kDtxPeriod * kSampleRateHz / 1000); + EXPECT_TRUE(nack.GetNackList(0).empty()); +} + +TEST(NackTrackerTest, DoNotNackIfLossRateIsTooHigh) { + test::ScopedFieldTrials field_trials( + "WebRTC-Audio-NetEqNackTrackerConfig/max_loss_rate:0.4/"); + const int kNackListSize = 200; + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + nack.SetMaxNackListSize(kNackListSize); + uint16_t seq_num = 0; + uint32_t timestamp = 0x87654321; + auto add_packet = [&nack, &seq_num, ×tamp](bool received) { + if (received) { + nack.UpdateLastReceivedPacket(seq_num, timestamp); + } + seq_num++; + timestamp += kTimestampIncrement; + }; + for (int i = 0; i < 500; i++) { + add_packet(true); + add_packet(false); + } + // Expect 50% loss rate which is higher that the configured maximum 40%. + EXPECT_NEAR(nack.GetPacketLossRateForTest(), 1 << 29, (1 << 30) / 100); + EXPECT_TRUE(nack.GetNackList(0).empty()); +} + +TEST(NackTrackerTest, OnlyNackIfRttIsValid) { + test::ScopedFieldTrials field_trials( + "WebRTC-Audio-NetEqNackTrackerConfig/require_valid_rtt:true/"); + const int kNackListSize = 200; + NackTracker nack; + nack.UpdateSampleRate(kSampleRateHz); + nack.SetMaxNackListSize(kNackListSize); + uint16_t seq_num = 0; + uint32_t timestamp = 0x87654321; + auto add_packet = [&nack, &seq_num, ×tamp](bool received) { + if (received) { + nack.UpdateLastReceivedPacket(seq_num, timestamp); + } + seq_num++; + timestamp += kTimestampIncrement; + }; + add_packet(true); + add_packet(false); + add_packet(true); + EXPECT_TRUE(nack.GetNackList(0).empty()); + EXPECT_FALSE(nack.GetNackList(10).empty()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/neteq_decoder_plc_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_decoder_plc_unittest.cc new file mode 100644 index 0000000000..cf310d1efb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_decoder_plc_unittest.cc @@ -0,0 +1,313 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Test to verify correct operation when using the decoder-internal PLC. + +#include <memory> +#include <utility> +#include <vector> + +#include "absl/types/optional.h" +#include "modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.h" +#include "modules/audio_coding/neteq/tools/audio_checksum.h" +#include "modules/audio_coding/neteq/tools/audio_sink.h" +#include "modules/audio_coding/neteq/tools/encode_neteq_input.h" +#include "modules/audio_coding/neteq/tools/fake_decode_from_file.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" +#include "modules/audio_coding/neteq/tools/neteq_test.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/audio_decoder_proxy_factory.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace test { +namespace { + +constexpr int kSampleRateHz = 32000; +constexpr int kRunTimeMs = 10000; + +// This class implements a fake decoder. The decoder will read audio from a file +// and present as output, both for regular decoding and for PLC. +class AudioDecoderPlc : public AudioDecoder { + public: + AudioDecoderPlc(std::unique_ptr<InputAudioFile> input, int sample_rate_hz) + : input_(std::move(input)), sample_rate_hz_(sample_rate_hz) {} + + void Reset() override {} + int SampleRateHz() const override { return sample_rate_hz_; } + size_t Channels() const override { return 1; } + int DecodeInternal(const uint8_t* /*encoded*/, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override { + RTC_CHECK_GE(encoded_len / 2, 10 * sample_rate_hz_ / 1000); + RTC_CHECK_LE(encoded_len / 2, 2 * 10 * sample_rate_hz_ / 1000); + RTC_CHECK_EQ(sample_rate_hz, sample_rate_hz_); + RTC_CHECK(decoded); + RTC_CHECK(speech_type); + RTC_CHECK(input_->Read(encoded_len / 2, decoded)); + *speech_type = kSpeech; + last_was_plc_ = false; + return encoded_len / 2; + } + + void GeneratePlc(size_t requested_samples_per_channel, + rtc::BufferT<int16_t>* concealment_audio) override { + // Instead of generating random data for GeneratePlc we use the same data as + // the input, so we can check that we produce the same result independently + // of the losses. + RTC_DCHECK_EQ(requested_samples_per_channel, 10 * sample_rate_hz_ / 1000); + + // Must keep a local copy of this since DecodeInternal sets it to false. + const bool last_was_plc = last_was_plc_; + + std::vector<int16_t> decoded(5760); + SpeechType speech_type; + int dec_len = DecodeInternal(nullptr, 2 * 10 * sample_rate_hz_ / 1000, + sample_rate_hz_, decoded.data(), &speech_type); + concealment_audio->AppendData(decoded.data(), dec_len); + concealed_samples_ += rtc::checked_cast<size_t>(dec_len); + + if (!last_was_plc) { + ++concealment_events_; + } + last_was_plc_ = true; + } + + size_t concealed_samples() { return concealed_samples_; } + size_t concealment_events() { return concealment_events_; } + + private: + const std::unique_ptr<InputAudioFile> input_; + const int sample_rate_hz_; + size_t concealed_samples_ = 0; + size_t concealment_events_ = 0; + bool last_was_plc_ = false; +}; + +// An input sample generator which generates only zero-samples. +class ZeroSampleGenerator : public EncodeNetEqInput::Generator { + public: + rtc::ArrayView<const int16_t> Generate(size_t num_samples) override { + vec.resize(num_samples, 0); + rtc::ArrayView<const int16_t> view(vec); + RTC_DCHECK_EQ(view.size(), num_samples); + return view; + } + + private: + std::vector<int16_t> vec; +}; + +// A NetEqInput which connects to another NetEqInput, but drops a number of +// consecutive packets on the way +class LossyInput : public NetEqInput { + public: + LossyInput(int loss_cadence, + int burst_length, + std::unique_ptr<NetEqInput> input) + : loss_cadence_(loss_cadence), + burst_length_(burst_length), + input_(std::move(input)) {} + + absl::optional<int64_t> NextPacketTime() const override { + return input_->NextPacketTime(); + } + + absl::optional<int64_t> NextOutputEventTime() const override { + return input_->NextOutputEventTime(); + } + + std::unique_ptr<PacketData> PopPacket() override { + if (loss_cadence_ != 0 && (++count_ % loss_cadence_) == 0) { + // Pop `burst_length_` packets to create the loss. + auto packet_to_return = input_->PopPacket(); + for (int i = 0; i < burst_length_; i++) { + input_->PopPacket(); + } + return packet_to_return; + } + return input_->PopPacket(); + } + + void AdvanceOutputEvent() override { return input_->AdvanceOutputEvent(); } + + bool ended() const override { return input_->ended(); } + + absl::optional<RTPHeader> NextHeader() const override { + return input_->NextHeader(); + } + + private: + const int loss_cadence_; + const int burst_length_; + int count_ = 0; + const std::unique_ptr<NetEqInput> input_; +}; + +class AudioChecksumWithOutput : public AudioChecksum { + public: + explicit AudioChecksumWithOutput(std::string* output_str) + : output_str_(*output_str) {} + ~AudioChecksumWithOutput() { output_str_ = Finish(); } + + private: + std::string& output_str_; +}; + +struct TestStatistics { + NetEqNetworkStatistics network; + NetEqLifetimeStatistics lifetime; +}; + +TestStatistics RunTest(int loss_cadence, + int burst_length, + std::string* checksum) { + NetEq::Config config; + config.for_test_no_time_stretching = true; + + // The input is mostly useless. It sends zero-samples to a PCM16b encoder, + // but the actual encoded samples will never be used by the decoder in the + // test. See below about the decoder. + auto generator = std::make_unique<ZeroSampleGenerator>(); + constexpr int kPayloadType = 100; + AudioEncoderPcm16B::Config encoder_config; + encoder_config.sample_rate_hz = kSampleRateHz; + encoder_config.payload_type = kPayloadType; + auto encoder = std::make_unique<AudioEncoderPcm16B>(encoder_config); + auto input = std::make_unique<EncodeNetEqInput>( + std::move(generator), std::move(encoder), kRunTimeMs); + // Wrap the input in a loss function. + auto lossy_input = std::make_unique<LossyInput>(loss_cadence, burst_length, + std::move(input)); + + // Setting up decoders. + NetEqTest::DecoderMap decoders; + // Using a fake decoder which simply reads the output audio from a file. + auto input_file = std::make_unique<InputAudioFile>( + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm")); + AudioDecoderPlc dec(std::move(input_file), kSampleRateHz); + // Masquerading as a PCM16b decoder. + decoders.emplace(kPayloadType, SdpAudioFormat("l16", 32000, 1)); + + // Output is simply a checksum calculator. + auto output = std::make_unique<AudioChecksumWithOutput>(checksum); + + // No callback objects. + NetEqTest::Callbacks callbacks; + + NetEqTest neteq_test( + config, /*decoder_factory=*/ + rtc::make_ref_counted<test::AudioDecoderProxyFactory>(&dec), + /*codecs=*/decoders, /*text_log=*/nullptr, /*neteq_factory=*/nullptr, + /*input=*/std::move(lossy_input), std::move(output), callbacks); + EXPECT_LE(kRunTimeMs, neteq_test.Run()); + + auto lifetime_stats = neteq_test.LifetimeStats(); + EXPECT_EQ(dec.concealed_samples(), lifetime_stats.concealed_samples); + EXPECT_EQ(dec.concealment_events(), lifetime_stats.concealment_events); + return {neteq_test.SimulationStats(), neteq_test.LifetimeStats()}; +} +} // namespace + +// Check that some basic metrics are produced in the right direction. In +// particular, expand_rate should only increase if there are losses present. Our +// dummy decoder is designed such as the checksum should always be the same +// regardless of the losses given that calls are executed in the right order. +TEST(NetEqDecoderPlc, BasicMetrics) { + std::string checksum; + + // Drop 1 packet every 10 packets. + auto stats = RunTest(10, 1, &checksum); + + std::string checksum_no_loss; + auto stats_no_loss = RunTest(0, 0, &checksum_no_loss); + + EXPECT_EQ(checksum, checksum_no_loss); + + EXPECT_EQ(stats.network.preemptive_rate, + stats_no_loss.network.preemptive_rate); + EXPECT_EQ(stats.network.accelerate_rate, + stats_no_loss.network.accelerate_rate); + EXPECT_EQ(0, stats_no_loss.network.expand_rate); + EXPECT_GT(stats.network.expand_rate, 0); +} + +// Checks that interruptions are not counted in small losses but they are +// correctly counted in long interruptions. +TEST(NetEqDecoderPlc, CountInterruptions) { + std::string checksum; + std::string checksum_2; + std::string checksum_3; + + // Half of the packets lost but in short interruptions. + auto stats_no_interruptions = RunTest(1, 1, &checksum); + // One lost of 500 ms (250 packets). + auto stats_one_interruption = RunTest(200, 250, &checksum_2); + // Two losses of 250ms each (125 packets). + auto stats_two_interruptions = RunTest(125, 125, &checksum_3); + + EXPECT_EQ(checksum, checksum_2); + EXPECT_EQ(checksum, checksum_3); + EXPECT_GT(stats_no_interruptions.network.expand_rate, 0); + EXPECT_EQ(stats_no_interruptions.lifetime.total_interruption_duration_ms, 0); + EXPECT_EQ(stats_no_interruptions.lifetime.interruption_count, 0); + + EXPECT_GT(stats_one_interruption.network.expand_rate, 0); + EXPECT_EQ(stats_one_interruption.lifetime.total_interruption_duration_ms, + 5000); + EXPECT_EQ(stats_one_interruption.lifetime.interruption_count, 1); + + EXPECT_GT(stats_two_interruptions.network.expand_rate, 0); + EXPECT_EQ(stats_two_interruptions.lifetime.total_interruption_duration_ms, + 5000); + EXPECT_EQ(stats_two_interruptions.lifetime.interruption_count, 2); +} + +// Checks that small losses do not produce interruptions. +TEST(NetEqDecoderPlc, NoInterruptionsInSmallLosses) { + std::string checksum_1; + std::string checksum_4; + + auto stats_1 = RunTest(300, 1, &checksum_1); + auto stats_4 = RunTest(300, 4, &checksum_4); + + EXPECT_EQ(checksum_1, checksum_4); + + EXPECT_EQ(stats_1.lifetime.interruption_count, 0); + EXPECT_EQ(stats_1.lifetime.total_interruption_duration_ms, 0); + EXPECT_EQ(stats_1.lifetime.concealed_samples, 640u); // 20ms of concealment. + EXPECT_EQ(stats_1.lifetime.concealment_events, 1u); // in just one event. + + EXPECT_EQ(stats_4.lifetime.interruption_count, 0); + EXPECT_EQ(stats_4.lifetime.total_interruption_duration_ms, 0); + EXPECT_EQ(stats_4.lifetime.concealed_samples, 2560u); // 80ms of concealment. + EXPECT_EQ(stats_4.lifetime.concealment_events, 1u); // in just one event. +} + +// Checks that interruptions of different sizes report correct duration. +TEST(NetEqDecoderPlc, InterruptionsReportCorrectSize) { + std::string checksum; + + for (int burst_length = 5; burst_length < 10; burst_length++) { + auto stats = RunTest(300, burst_length, &checksum); + auto duration = stats.lifetime.total_interruption_duration_ms; + if (burst_length < 8) { + EXPECT_EQ(duration, 0); + } else { + EXPECT_EQ(duration, burst_length * 20); + } + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl.cc b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl.cc new file mode 100644 index 0000000000..6a6367d045 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl.cc @@ -0,0 +1,2141 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/neteq_impl.h" + +#include <algorithm> +#include <cstdint> +#include <cstring> +#include <list> +#include <map> +#include <memory> +#include <utility> +#include <vector> + +#include "api/audio_codecs/audio_decoder.h" +#include "api/neteq/tick_timer.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_coding/codecs/cng/webrtc_cng.h" +#include "modules/audio_coding/neteq/accelerate.h" +#include "modules/audio_coding/neteq/background_noise.h" +#include "modules/audio_coding/neteq/comfort_noise.h" +#include "modules/audio_coding/neteq/decision_logic.h" +#include "modules/audio_coding/neteq/decoder_database.h" +#include "modules/audio_coding/neteq/dtmf_buffer.h" +#include "modules/audio_coding/neteq/dtmf_tone_generator.h" +#include "modules/audio_coding/neteq/expand.h" +#include "modules/audio_coding/neteq/merge.h" +#include "modules/audio_coding/neteq/nack_tracker.h" +#include "modules/audio_coding/neteq/normal.h" +#include "modules/audio_coding/neteq/packet.h" +#include "modules/audio_coding/neteq/packet_buffer.h" +#include "modules/audio_coding/neteq/post_decode_vad.h" +#include "modules/audio_coding/neteq/preemptive_expand.h" +#include "modules/audio_coding/neteq/red_payload_splitter.h" +#include "modules/audio_coding/neteq/statistics_calculator.h" +#include "modules/audio_coding/neteq/sync_buffer.h" +#include "modules/audio_coding/neteq/time_stretch.h" +#include "modules/audio_coding/neteq/timestamp_scaler.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/sanitizer.h" +#include "rtc_base/strings/audio_format_to_string.h" +#include "rtc_base/trace_event.h" +#include "system_wrappers/include/clock.h" + +namespace webrtc { +namespace { + +std::unique_ptr<NetEqController> CreateNetEqController( + const NetEqControllerFactory& controller_factory, + int base_min_delay, + int max_packets_in_buffer, + bool allow_time_stretching, + TickTimer* tick_timer, + webrtc::Clock* clock) { + NetEqController::Config config; + config.base_min_delay_ms = base_min_delay; + config.max_packets_in_buffer = max_packets_in_buffer; + config.allow_time_stretching = allow_time_stretching; + config.tick_timer = tick_timer; + config.clock = clock; + return controller_factory.CreateNetEqController(config); +} + +} // namespace + +NetEqImpl::Dependencies::Dependencies( + const NetEq::Config& config, + Clock* clock, + const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory, + const NetEqControllerFactory& controller_factory) + : clock(clock), + tick_timer(new TickTimer), + stats(new StatisticsCalculator), + decoder_database( + new DecoderDatabase(decoder_factory, config.codec_pair_id)), + dtmf_buffer(new DtmfBuffer(config.sample_rate_hz)), + dtmf_tone_generator(new DtmfToneGenerator), + packet_buffer( + new PacketBuffer(config.max_packets_in_buffer, tick_timer.get())), + neteq_controller( + CreateNetEqController(controller_factory, + config.min_delay_ms, + config.max_packets_in_buffer, + !config.for_test_no_time_stretching, + tick_timer.get(), + clock)), + red_payload_splitter(new RedPayloadSplitter), + timestamp_scaler(new TimestampScaler(*decoder_database)), + accelerate_factory(new AccelerateFactory), + expand_factory(new ExpandFactory), + preemptive_expand_factory(new PreemptiveExpandFactory) {} + +NetEqImpl::Dependencies::~Dependencies() = default; + +NetEqImpl::NetEqImpl(const NetEq::Config& config, + Dependencies&& deps, + bool create_components) + : clock_(deps.clock), + tick_timer_(std::move(deps.tick_timer)), + decoder_database_(std::move(deps.decoder_database)), + dtmf_buffer_(std::move(deps.dtmf_buffer)), + dtmf_tone_generator_(std::move(deps.dtmf_tone_generator)), + packet_buffer_(std::move(deps.packet_buffer)), + red_payload_splitter_(std::move(deps.red_payload_splitter)), + timestamp_scaler_(std::move(deps.timestamp_scaler)), + vad_(new PostDecodeVad()), + expand_factory_(std::move(deps.expand_factory)), + accelerate_factory_(std::move(deps.accelerate_factory)), + preemptive_expand_factory_(std::move(deps.preemptive_expand_factory)), + stats_(std::move(deps.stats)), + controller_(std::move(deps.neteq_controller)), + last_mode_(Mode::kNormal), + decoded_buffer_length_(kMaxFrameSize), + decoded_buffer_(new int16_t[decoded_buffer_length_]), + playout_timestamp_(0), + new_codec_(false), + timestamp_(0), + reset_decoder_(false), + first_packet_(true), + enable_fast_accelerate_(config.enable_fast_accelerate), + nack_enabled_(false), + enable_muted_state_(config.enable_muted_state), + expand_uma_logger_("WebRTC.Audio.ExpandRatePercent", + 10, // Report once every 10 s. + tick_timer_.get()), + speech_expand_uma_logger_("WebRTC.Audio.SpeechExpandRatePercent", + 10, // Report once every 10 s. + tick_timer_.get()), + no_time_stretching_(config.for_test_no_time_stretching) { + RTC_LOG(LS_INFO) << "NetEq config: " << config.ToString(); + int fs = config.sample_rate_hz; + if (fs != 8000 && fs != 16000 && fs != 32000 && fs != 48000) { + RTC_LOG(LS_ERROR) << "Sample rate " << fs + << " Hz not supported. " + "Changing to 8000 Hz."; + fs = 8000; + } + controller_->SetMaximumDelay(config.max_delay_ms); + fs_hz_ = fs; + fs_mult_ = fs / 8000; + last_output_sample_rate_hz_ = fs; + output_size_samples_ = static_cast<size_t>(kOutputSizeMs * 8 * fs_mult_); + controller_->SetSampleRate(fs_hz_, output_size_samples_); + decoder_frame_length_ = 2 * output_size_samples_; // 20 ms. + if (create_components) { + SetSampleRateAndChannels(fs, 1); // Default is 1 channel. + } + RTC_DCHECK(!vad_->enabled()); + if (config.enable_post_decode_vad) { + vad_->Enable(); + } +} + +NetEqImpl::~NetEqImpl() = default; + +int NetEqImpl::InsertPacket(const RTPHeader& rtp_header, + rtc::ArrayView<const uint8_t> payload) { + rtc::MsanCheckInitialized(payload); + TRACE_EVENT0("webrtc", "NetEqImpl::InsertPacket"); + MutexLock lock(&mutex_); + if (InsertPacketInternal(rtp_header, payload) != 0) { + return kFail; + } + return kOK; +} + +void NetEqImpl::InsertEmptyPacket(const RTPHeader& rtp_header) { + MutexLock lock(&mutex_); + if (nack_enabled_) { + nack_->UpdateLastReceivedPacket(rtp_header.sequenceNumber, + rtp_header.timestamp); + } + controller_->RegisterEmptyPacket(); +} + +namespace { +void SetAudioFrameActivityAndType(bool vad_enabled, + NetEqImpl::OutputType type, + AudioFrame::VADActivity last_vad_activity, + AudioFrame* audio_frame) { + switch (type) { + case NetEqImpl::OutputType::kNormalSpeech: { + audio_frame->speech_type_ = AudioFrame::kNormalSpeech; + audio_frame->vad_activity_ = AudioFrame::kVadActive; + break; + } + case NetEqImpl::OutputType::kVadPassive: { + // This should only be reached if the VAD is enabled. + RTC_DCHECK(vad_enabled); + audio_frame->speech_type_ = AudioFrame::kNormalSpeech; + audio_frame->vad_activity_ = AudioFrame::kVadPassive; + break; + } + case NetEqImpl::OutputType::kCNG: { + audio_frame->speech_type_ = AudioFrame::kCNG; + audio_frame->vad_activity_ = AudioFrame::kVadPassive; + break; + } + case NetEqImpl::OutputType::kPLC: { + audio_frame->speech_type_ = AudioFrame::kPLC; + audio_frame->vad_activity_ = last_vad_activity; + break; + } + case NetEqImpl::OutputType::kPLCCNG: { + audio_frame->speech_type_ = AudioFrame::kPLCCNG; + audio_frame->vad_activity_ = AudioFrame::kVadPassive; + break; + } + case NetEqImpl::OutputType::kCodecPLC: { + audio_frame->speech_type_ = AudioFrame::kCodecPLC; + audio_frame->vad_activity_ = last_vad_activity; + break; + } + default: + RTC_DCHECK_NOTREACHED(); + } + if (!vad_enabled) { + // Always set kVadUnknown when receive VAD is inactive. + audio_frame->vad_activity_ = AudioFrame::kVadUnknown; + } +} +} // namespace + +int NetEqImpl::GetAudio(AudioFrame* audio_frame, + bool* muted, + int* current_sample_rate_hz, + absl::optional<Operation> action_override) { + TRACE_EVENT0("webrtc", "NetEqImpl::GetAudio"); + MutexLock lock(&mutex_); + if (GetAudioInternal(audio_frame, muted, action_override) != 0) { + return kFail; + } + RTC_DCHECK_EQ( + audio_frame->sample_rate_hz_, + rtc::dchecked_cast<int>(audio_frame->samples_per_channel_ * 100)); + RTC_DCHECK_EQ(*muted, audio_frame->muted()); + SetAudioFrameActivityAndType(vad_->enabled(), LastOutputType(), + last_vad_activity_, audio_frame); + last_vad_activity_ = audio_frame->vad_activity_; + last_output_sample_rate_hz_ = audio_frame->sample_rate_hz_; + RTC_DCHECK(last_output_sample_rate_hz_ == 8000 || + last_output_sample_rate_hz_ == 16000 || + last_output_sample_rate_hz_ == 32000 || + last_output_sample_rate_hz_ == 48000) + << "Unexpected sample rate " << last_output_sample_rate_hz_; + + if (current_sample_rate_hz) { + *current_sample_rate_hz = last_output_sample_rate_hz_; + } + + return kOK; +} + +void NetEqImpl::SetCodecs(const std::map<int, SdpAudioFormat>& codecs) { + MutexLock lock(&mutex_); + const std::vector<int> changed_payload_types = + decoder_database_->SetCodecs(codecs); + for (const int pt : changed_payload_types) { + packet_buffer_->DiscardPacketsWithPayloadType(pt, stats_.get()); + } +} + +bool NetEqImpl::RegisterPayloadType(int rtp_payload_type, + const SdpAudioFormat& audio_format) { + RTC_LOG(LS_VERBOSE) << "NetEqImpl::RegisterPayloadType: payload type " + << rtp_payload_type << ", codec " + << rtc::ToString(audio_format); + MutexLock lock(&mutex_); + return decoder_database_->RegisterPayload(rtp_payload_type, audio_format) == + DecoderDatabase::kOK; +} + +int NetEqImpl::RemovePayloadType(uint8_t rtp_payload_type) { + MutexLock lock(&mutex_); + int ret = decoder_database_->Remove(rtp_payload_type); + if (ret == DecoderDatabase::kOK || ret == DecoderDatabase::kDecoderNotFound) { + packet_buffer_->DiscardPacketsWithPayloadType(rtp_payload_type, + stats_.get()); + return kOK; + } + return kFail; +} + +void NetEqImpl::RemoveAllPayloadTypes() { + MutexLock lock(&mutex_); + decoder_database_->RemoveAll(); +} + +bool NetEqImpl::SetMinimumDelay(int delay_ms) { + MutexLock lock(&mutex_); + if (delay_ms >= 0 && delay_ms <= 10000) { + RTC_DCHECK(controller_.get()); + return controller_->SetMinimumDelay(delay_ms); + } + return false; +} + +bool NetEqImpl::SetMaximumDelay(int delay_ms) { + MutexLock lock(&mutex_); + if (delay_ms >= 0 && delay_ms <= 10000) { + RTC_DCHECK(controller_.get()); + return controller_->SetMaximumDelay(delay_ms); + } + return false; +} + +bool NetEqImpl::SetBaseMinimumDelayMs(int delay_ms) { + MutexLock lock(&mutex_); + if (delay_ms >= 0 && delay_ms <= 10000) { + return controller_->SetBaseMinimumDelay(delay_ms); + } + return false; +} + +int NetEqImpl::GetBaseMinimumDelayMs() const { + MutexLock lock(&mutex_); + return controller_->GetBaseMinimumDelay(); +} + +int NetEqImpl::TargetDelayMs() const { + MutexLock lock(&mutex_); + RTC_DCHECK(controller_.get()); + return controller_->TargetLevelMs(); +} + +int NetEqImpl::FilteredCurrentDelayMs() const { + MutexLock lock(&mutex_); + // Sum up the filtered packet buffer level with the future length of the sync + // buffer. + const int delay_samples = + controller_->GetFilteredBufferLevel() + sync_buffer_->FutureLength(); + // The division below will truncate. The return value is in ms. + return delay_samples / rtc::CheckedDivExact(fs_hz_, 1000); +} + +int NetEqImpl::NetworkStatistics(NetEqNetworkStatistics* stats) { + MutexLock lock(&mutex_); + RTC_DCHECK(decoder_database_.get()); + *stats = CurrentNetworkStatisticsInternal(); + stats_->GetNetworkStatistics(decoder_frame_length_, stats); + return 0; +} + +NetEqNetworkStatistics NetEqImpl::CurrentNetworkStatistics() const { + MutexLock lock(&mutex_); + return CurrentNetworkStatisticsInternal(); +} + +NetEqNetworkStatistics NetEqImpl::CurrentNetworkStatisticsInternal() const { + RTC_DCHECK(decoder_database_.get()); + NetEqNetworkStatistics stats; + const size_t total_samples_in_buffers = + packet_buffer_->NumSamplesInBuffer(decoder_frame_length_) + + sync_buffer_->FutureLength(); + + RTC_DCHECK(controller_.get()); + stats.preferred_buffer_size_ms = controller_->TargetLevelMs(); + stats.jitter_peaks_found = controller_->PeakFound(); + RTC_DCHECK_GT(fs_hz_, 0); + stats.current_buffer_size_ms = + static_cast<uint16_t>(total_samples_in_buffers * 1000 / fs_hz_); + return stats; +} + +NetEqLifetimeStatistics NetEqImpl::GetLifetimeStatistics() const { + MutexLock lock(&mutex_); + return stats_->GetLifetimeStatistics(); +} + +NetEqOperationsAndState NetEqImpl::GetOperationsAndState() const { + MutexLock lock(&mutex_); + auto result = stats_->GetOperationsAndState(); + result.current_buffer_size_ms = + (packet_buffer_->NumSamplesInBuffer(decoder_frame_length_) + + sync_buffer_->FutureLength()) * + 1000 / fs_hz_; + result.current_frame_size_ms = decoder_frame_length_ * 1000 / fs_hz_; + result.next_packet_available = packet_buffer_->PeekNextPacket() && + packet_buffer_->PeekNextPacket()->timestamp == + sync_buffer_->end_timestamp(); + return result; +} + +void NetEqImpl::EnableVad() { + MutexLock lock(&mutex_); + RTC_DCHECK(vad_.get()); + vad_->Enable(); +} + +void NetEqImpl::DisableVad() { + MutexLock lock(&mutex_); + RTC_DCHECK(vad_.get()); + vad_->Disable(); +} + +absl::optional<uint32_t> NetEqImpl::GetPlayoutTimestamp() const { + MutexLock lock(&mutex_); + if (first_packet_ || last_mode_ == Mode::kRfc3389Cng || + last_mode_ == Mode::kCodecInternalCng) { + // We don't have a valid RTP timestamp until we have decoded our first + // RTP packet. Also, the RTP timestamp is not accurate while playing CNG, + // which is indicated by returning an empty value. + return absl::nullopt; + } + return timestamp_scaler_->ToExternal(playout_timestamp_); +} + +int NetEqImpl::last_output_sample_rate_hz() const { + MutexLock lock(&mutex_); + return last_output_sample_rate_hz_; +} + +absl::optional<NetEq::DecoderFormat> NetEqImpl::GetDecoderFormat( + int payload_type) const { + MutexLock lock(&mutex_); + const DecoderDatabase::DecoderInfo* const di = + decoder_database_->GetDecoderInfo(payload_type); + if (di) { + const AudioDecoder* const decoder = di->GetDecoder(); + // TODO(kwiberg): Why the special case for RED? + return DecoderFormat{ + /*sample_rate_hz=*/di->IsRed() ? 8000 : di->SampleRateHz(), + /*num_channels=*/ + decoder ? rtc::dchecked_cast<int>(decoder->Channels()) : 1, + /*sdp_format=*/di->GetFormat()}; + } else { + // Payload type not registered. + return absl::nullopt; + } +} + +void NetEqImpl::FlushBuffers() { + MutexLock lock(&mutex_); + RTC_LOG(LS_VERBOSE) << "FlushBuffers"; + packet_buffer_->Flush(stats_.get()); + RTC_DCHECK(sync_buffer_.get()); + RTC_DCHECK(expand_.get()); + sync_buffer_->Flush(); + sync_buffer_->set_next_index(sync_buffer_->next_index() - + expand_->overlap_length()); + // Set to wait for new codec. + first_packet_ = true; +} + +void NetEqImpl::EnableNack(size_t max_nack_list_size) { + MutexLock lock(&mutex_); + if (!nack_enabled_) { + nack_ = std::make_unique<NackTracker>(); + nack_enabled_ = true; + nack_->UpdateSampleRate(fs_hz_); + } + nack_->SetMaxNackListSize(max_nack_list_size); +} + +void NetEqImpl::DisableNack() { + MutexLock lock(&mutex_); + nack_.reset(); + nack_enabled_ = false; +} + +std::vector<uint16_t> NetEqImpl::GetNackList(int64_t round_trip_time_ms) const { + MutexLock lock(&mutex_); + if (!nack_enabled_) { + return std::vector<uint16_t>(); + } + RTC_DCHECK(nack_.get()); + return nack_->GetNackList(round_trip_time_ms); +} + +int NetEqImpl::SyncBufferSizeMs() const { + MutexLock lock(&mutex_); + return rtc::dchecked_cast<int>(sync_buffer_->FutureLength() / + rtc::CheckedDivExact(fs_hz_, 1000)); +} + +const SyncBuffer* NetEqImpl::sync_buffer_for_test() const { + MutexLock lock(&mutex_); + return sync_buffer_.get(); +} + +NetEq::Operation NetEqImpl::last_operation_for_test() const { + MutexLock lock(&mutex_); + return last_operation_; +} + +// Methods below this line are private. + +int NetEqImpl::InsertPacketInternal(const RTPHeader& rtp_header, + rtc::ArrayView<const uint8_t> payload) { + if (payload.empty()) { + RTC_LOG_F(LS_ERROR) << "payload is empty"; + return kInvalidPointer; + } + + Timestamp receive_time = clock_->CurrentTime(); + stats_->ReceivedPacket(); + + PacketList packet_list; + // Insert packet in a packet list. + packet_list.push_back([&rtp_header, &payload, &receive_time] { + // Convert to Packet. + Packet packet; + packet.payload_type = rtp_header.payloadType; + packet.sequence_number = rtp_header.sequenceNumber; + packet.timestamp = rtp_header.timestamp; + packet.payload.SetData(payload.data(), payload.size()); + packet.packet_info = RtpPacketInfo(rtp_header, receive_time); + // Waiting time will be set upon inserting the packet in the buffer. + RTC_DCHECK(!packet.waiting_time); + return packet; + }()); + + bool update_sample_rate_and_channels = first_packet_; + + if (update_sample_rate_and_channels) { + // Reset timestamp scaling. + timestamp_scaler_->Reset(); + } + + if (!decoder_database_->IsRed(rtp_header.payloadType)) { + // Scale timestamp to internal domain (only for some codecs). + timestamp_scaler_->ToInternal(&packet_list); + } + + // Store these for later use, since the first packet may very well disappear + // before we need these values. + uint32_t main_timestamp = packet_list.front().timestamp; + uint8_t main_payload_type = packet_list.front().payload_type; + uint16_t main_sequence_number = packet_list.front().sequence_number; + + // Reinitialize NetEq if it's needed (changed SSRC or first call). + if (update_sample_rate_and_channels) { + // Note: `first_packet_` will be cleared further down in this method, once + // the packet has been successfully inserted into the packet buffer. + + // Flush the packet buffer and DTMF buffer. + packet_buffer_->Flush(stats_.get()); + dtmf_buffer_->Flush(); + + // Update audio buffer timestamp. + sync_buffer_->IncreaseEndTimestamp(main_timestamp - timestamp_); + + // Update codecs. + timestamp_ = main_timestamp; + } + + if (nack_enabled_) { + RTC_DCHECK(nack_); + if (update_sample_rate_and_channels) { + nack_->Reset(); + } + nack_->UpdateLastReceivedPacket(main_sequence_number, main_timestamp); + } + + // Check for RED payload type, and separate payloads into several packets. + if (decoder_database_->IsRed(rtp_header.payloadType)) { + if (!red_payload_splitter_->SplitRed(&packet_list)) { + return kRedundancySplitError; + } + // Only accept a few RED payloads of the same type as the main data, + // DTMF events and CNG. + red_payload_splitter_->CheckRedPayloads(&packet_list, *decoder_database_); + if (packet_list.empty()) { + return kRedundancySplitError; + } + } + + // Check payload types. + if (decoder_database_->CheckPayloadTypes(packet_list) == + DecoderDatabase::kDecoderNotFound) { + return kUnknownRtpPayloadType; + } + + RTC_DCHECK(!packet_list.empty()); + + // Update main_timestamp, if new packets appear in the list + // after RED splitting. + if (decoder_database_->IsRed(rtp_header.payloadType)) { + timestamp_scaler_->ToInternal(&packet_list); + main_timestamp = packet_list.front().timestamp; + main_payload_type = packet_list.front().payload_type; + main_sequence_number = packet_list.front().sequence_number; + } + + // Process DTMF payloads. Cycle through the list of packets, and pick out any + // DTMF payloads found. + PacketList::iterator it = packet_list.begin(); + while (it != packet_list.end()) { + const Packet& current_packet = (*it); + RTC_DCHECK(!current_packet.payload.empty()); + if (decoder_database_->IsDtmf(current_packet.payload_type)) { + DtmfEvent event; + int ret = DtmfBuffer::ParseEvent(current_packet.timestamp, + current_packet.payload.data(), + current_packet.payload.size(), &event); + if (ret != DtmfBuffer::kOK) { + return kDtmfParsingError; + } + if (dtmf_buffer_->InsertEvent(event) != DtmfBuffer::kOK) { + return kDtmfInsertError; + } + it = packet_list.erase(it); + } else { + ++it; + } + } + + PacketList parsed_packet_list; + bool is_dtx = false; + while (!packet_list.empty()) { + Packet& packet = packet_list.front(); + const DecoderDatabase::DecoderInfo* info = + decoder_database_->GetDecoderInfo(packet.payload_type); + if (!info) { + RTC_LOG(LS_WARNING) << "SplitAudio unknown payload type"; + return kUnknownRtpPayloadType; + } + + if (info->IsComfortNoise()) { + // Carry comfort noise packets along. + parsed_packet_list.splice(parsed_packet_list.end(), packet_list, + packet_list.begin()); + } else { + const auto sequence_number = packet.sequence_number; + const auto payload_type = packet.payload_type; + const Packet::Priority original_priority = packet.priority; + const auto& packet_info = packet.packet_info; + auto packet_from_result = [&](AudioDecoder::ParseResult& result) { + Packet new_packet; + new_packet.sequence_number = sequence_number; + new_packet.payload_type = payload_type; + new_packet.timestamp = result.timestamp; + new_packet.priority.codec_level = result.priority; + new_packet.priority.red_level = original_priority.red_level; + new_packet.packet_info = packet_info; + new_packet.frame = std::move(result.frame); + return new_packet; + }; + + std::vector<AudioDecoder::ParseResult> results = + info->GetDecoder()->ParsePayload(std::move(packet.payload), + packet.timestamp); + if (results.empty()) { + packet_list.pop_front(); + } else { + bool first = true; + for (auto& result : results) { + RTC_DCHECK(result.frame); + RTC_DCHECK_GE(result.priority, 0); + is_dtx = is_dtx || result.frame->IsDtxPacket(); + if (first) { + // Re-use the node and move it to parsed_packet_list. + packet_list.front() = packet_from_result(result); + parsed_packet_list.splice(parsed_packet_list.end(), packet_list, + packet_list.begin()); + first = false; + } else { + parsed_packet_list.push_back(packet_from_result(result)); + } + } + } + } + } + + // Calculate the number of primary (non-FEC/RED) packets. + const size_t number_of_primary_packets = std::count_if( + parsed_packet_list.begin(), parsed_packet_list.end(), + [](const Packet& in) { return in.priority.codec_level == 0; }); + if (number_of_primary_packets < parsed_packet_list.size()) { + stats_->SecondaryPacketsReceived(parsed_packet_list.size() - + number_of_primary_packets); + } + + // Insert packets in buffer. + const int target_level_ms = controller_->TargetLevelMs(); + const int ret = packet_buffer_->InsertPacketList( + &parsed_packet_list, *decoder_database_, ¤t_rtp_payload_type_, + ¤t_cng_rtp_payload_type_, stats_.get(), decoder_frame_length_, + last_output_sample_rate_hz_, target_level_ms); + bool buffer_flush_occured = false; + if (ret == PacketBuffer::kFlushed) { + // Reset DSP timestamp etc. if packet buffer flushed. + new_codec_ = true; + update_sample_rate_and_channels = true; + buffer_flush_occured = true; + } else if (ret == PacketBuffer::kPartialFlush) { + // Forward sync buffer timestamp + timestamp_ = packet_buffer_->PeekNextPacket()->timestamp; + sync_buffer_->IncreaseEndTimestamp(timestamp_ - + sync_buffer_->end_timestamp()); + buffer_flush_occured = true; + } else if (ret != PacketBuffer::kOK) { + return kOtherError; + } + + if (first_packet_) { + first_packet_ = false; + // Update the codec on the next GetAudio call. + new_codec_ = true; + } + + if (current_rtp_payload_type_) { + RTC_DCHECK(decoder_database_->GetDecoderInfo(*current_rtp_payload_type_)) + << "Payload type " << static_cast<int>(*current_rtp_payload_type_) + << " is unknown where it shouldn't be"; + } + + if (update_sample_rate_and_channels && !packet_buffer_->Empty()) { + // We do not use `current_rtp_payload_type_` to |set payload_type|, but + // get the next RTP header from `packet_buffer_` to obtain the payload type. + // The reason for it is the following corner case. If NetEq receives a + // CNG packet with a sample rate different than the current CNG then it + // flushes its buffer, assuming send codec must have been changed. However, + // payload type of the hypothetically new send codec is not known. + const Packet* next_packet = packet_buffer_->PeekNextPacket(); + RTC_DCHECK(next_packet); + const int payload_type = next_packet->payload_type; + size_t channels = 1; + if (!decoder_database_->IsComfortNoise(payload_type)) { + AudioDecoder* decoder = decoder_database_->GetDecoder(payload_type); + RTC_DCHECK(decoder); // Payloads are already checked to be valid. + channels = decoder->Channels(); + } + const DecoderDatabase::DecoderInfo* decoder_info = + decoder_database_->GetDecoderInfo(payload_type); + RTC_DCHECK(decoder_info); + if (decoder_info->SampleRateHz() != fs_hz_ || + channels != algorithm_buffer_->Channels()) { + SetSampleRateAndChannels(decoder_info->SampleRateHz(), channels); + } + if (nack_enabled_) { + RTC_DCHECK(nack_); + // Update the sample rate even if the rate is not new, because of Reset(). + nack_->UpdateSampleRate(fs_hz_); + } + } + + const DecoderDatabase::DecoderInfo* dec_info = + decoder_database_->GetDecoderInfo(main_payload_type); + RTC_DCHECK(dec_info); // Already checked that the payload type is known. + + NetEqController::PacketArrivedInfo info; + info.is_cng_or_dtmf = dec_info->IsComfortNoise() || dec_info->IsDtmf(); + info.packet_length_samples = + number_of_primary_packets * decoder_frame_length_; + info.main_timestamp = main_timestamp; + info.main_sequence_number = main_sequence_number; + info.is_dtx = is_dtx; + info.buffer_flush = buffer_flush_occured; + + const bool should_update_stats = !new_codec_; + auto relative_delay = + controller_->PacketArrived(fs_hz_, should_update_stats, info); + if (relative_delay) { + stats_->RelativePacketArrivalDelay(relative_delay.value()); + } + return 0; +} + +int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame, + bool* muted, + absl::optional<Operation> action_override) { + PacketList packet_list; + DtmfEvent dtmf_event; + Operation operation; + bool play_dtmf; + *muted = false; + last_decoded_packet_infos_.clear(); + tick_timer_->Increment(); + stats_->IncreaseCounter(output_size_samples_, fs_hz_); + const auto lifetime_stats = stats_->GetLifetimeStatistics(); + expand_uma_logger_.UpdateSampleCounter(lifetime_stats.concealed_samples, + fs_hz_); + speech_expand_uma_logger_.UpdateSampleCounter( + lifetime_stats.concealed_samples - + lifetime_stats.silent_concealed_samples, + fs_hz_); + + // Check for muted state. + if (enable_muted_state_ && expand_->Muted() && packet_buffer_->Empty()) { + RTC_DCHECK_EQ(last_mode_, Mode::kExpand); + audio_frame->Reset(); + RTC_DCHECK(audio_frame->muted()); // Reset() should mute the frame. + playout_timestamp_ += static_cast<uint32_t>(output_size_samples_); + audio_frame->sample_rate_hz_ = fs_hz_; + // Make sure the total number of samples fits in the AudioFrame. + if (output_size_samples_ * sync_buffer_->Channels() > + AudioFrame::kMaxDataSizeSamples) { + return kSampleUnderrun; + } + audio_frame->samples_per_channel_ = output_size_samples_; + audio_frame->timestamp_ = + first_packet_ + ? 0 + : timestamp_scaler_->ToExternal(playout_timestamp_) - + static_cast<uint32_t>(audio_frame->samples_per_channel_); + audio_frame->num_channels_ = sync_buffer_->Channels(); + stats_->ExpandedNoiseSamples(output_size_samples_, false); + controller_->NotifyMutedState(); + *muted = true; + return 0; + } + int return_value = GetDecision(&operation, &packet_list, &dtmf_event, + &play_dtmf, action_override); + if (return_value != 0) { + last_mode_ = Mode::kError; + return return_value; + } + + AudioDecoder::SpeechType speech_type; + int length = 0; + const size_t start_num_packets = packet_list.size(); + int decode_return_value = + Decode(&packet_list, &operation, &length, &speech_type); + + RTC_DCHECK(vad_.get()); + bool sid_frame_available = + (operation == Operation::kRfc3389Cng && !packet_list.empty()); + vad_->Update(decoded_buffer_.get(), static_cast<size_t>(length), speech_type, + sid_frame_available, fs_hz_); + + // This is the criterion that we did decode some data through the speech + // decoder, and the operation resulted in comfort noise. + const bool codec_internal_sid_frame = + (speech_type == AudioDecoder::kComfortNoise && + start_num_packets > packet_list.size()); + + if (sid_frame_available || codec_internal_sid_frame) { + // Start a new stopwatch since we are decoding a new CNG packet. + generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch(); + } + + algorithm_buffer_->Clear(); + switch (operation) { + case Operation::kNormal: { + DoNormal(decoded_buffer_.get(), length, speech_type, play_dtmf); + if (length > 0) { + stats_->DecodedOutputPlayed(); + } + break; + } + case Operation::kMerge: { + DoMerge(decoded_buffer_.get(), length, speech_type, play_dtmf); + break; + } + case Operation::kExpand: { + RTC_DCHECK_EQ(return_value, 0); + if (!current_rtp_payload_type_ || !DoCodecPlc()) { + return_value = DoExpand(play_dtmf); + } + RTC_DCHECK_GE(sync_buffer_->FutureLength() - expand_->overlap_length(), + output_size_samples_); + break; + } + case Operation::kAccelerate: + case Operation::kFastAccelerate: { + const bool fast_accelerate = + enable_fast_accelerate_ && (operation == Operation::kFastAccelerate); + return_value = DoAccelerate(decoded_buffer_.get(), length, speech_type, + play_dtmf, fast_accelerate); + break; + } + case Operation::kPreemptiveExpand: { + return_value = DoPreemptiveExpand(decoded_buffer_.get(), length, + speech_type, play_dtmf); + break; + } + case Operation::kRfc3389Cng: + case Operation::kRfc3389CngNoPacket: { + return_value = DoRfc3389Cng(&packet_list, play_dtmf); + break; + } + case Operation::kCodecInternalCng: { + // This handles the case when there is no transmission and the decoder + // should produce internal comfort noise. + // TODO(hlundin): Write test for codec-internal CNG. + DoCodecInternalCng(decoded_buffer_.get(), length); + break; + } + case Operation::kDtmf: { + // TODO(hlundin): Write test for this. + return_value = DoDtmf(dtmf_event, &play_dtmf); + break; + } + case Operation::kUndefined: { + RTC_LOG(LS_ERROR) << "Invalid operation kUndefined."; + RTC_DCHECK_NOTREACHED(); // This should not happen. + last_mode_ = Mode::kError; + return kInvalidOperation; + } + } // End of switch. + last_operation_ = operation; + if (return_value < 0) { + return return_value; + } + + if (last_mode_ != Mode::kRfc3389Cng) { + comfort_noise_->Reset(); + } + + // We treat it as if all packets referenced to by `last_decoded_packet_infos_` + // were mashed together when creating the samples in `algorithm_buffer_`. + RtpPacketInfos packet_infos(last_decoded_packet_infos_); + + // Copy samples from `algorithm_buffer_` to `sync_buffer_`. + // + // TODO(bugs.webrtc.org/10757): + // We would in the future also like to pass `packet_infos` so that we can do + // sample-perfect tracking of that information across `sync_buffer_`. + sync_buffer_->PushBack(*algorithm_buffer_); + + // Extract data from `sync_buffer_` to `output`. + size_t num_output_samples_per_channel = output_size_samples_; + size_t num_output_samples = output_size_samples_ * sync_buffer_->Channels(); + if (num_output_samples > AudioFrame::kMaxDataSizeSamples) { + RTC_LOG(LS_WARNING) << "Output array is too short. " + << AudioFrame::kMaxDataSizeSamples << " < " + << output_size_samples_ << " * " + << sync_buffer_->Channels(); + num_output_samples = AudioFrame::kMaxDataSizeSamples; + num_output_samples_per_channel = + AudioFrame::kMaxDataSizeSamples / sync_buffer_->Channels(); + } + sync_buffer_->GetNextAudioInterleaved(num_output_samples_per_channel, + audio_frame); + audio_frame->sample_rate_hz_ = fs_hz_; + // TODO(bugs.webrtc.org/10757): + // We don't have the ability to properly track individual packets once their + // audio samples have entered `sync_buffer_`. So for now, treat it as if + // `packet_infos` from packets decoded by the current `GetAudioInternal()` + // call were all consumed assembling the current audio frame and the current + // audio frame only. + audio_frame->packet_infos_ = std::move(packet_infos); + if (sync_buffer_->FutureLength() < expand_->overlap_length()) { + // The sync buffer should always contain `overlap_length` samples, but now + // too many samples have been extracted. Reinstall the `overlap_length` + // lookahead by moving the index. + const size_t missing_lookahead_samples = + expand_->overlap_length() - sync_buffer_->FutureLength(); + RTC_DCHECK_GE(sync_buffer_->next_index(), missing_lookahead_samples); + sync_buffer_->set_next_index(sync_buffer_->next_index() - + missing_lookahead_samples); + } + if (audio_frame->samples_per_channel_ != output_size_samples_) { + RTC_LOG(LS_ERROR) << "audio_frame->samples_per_channel_ (" + << audio_frame->samples_per_channel_ + << ") != output_size_samples_ (" << output_size_samples_ + << ")"; + // TODO(minyue): treatment of under-run, filling zeros + audio_frame->Mute(); + return kSampleUnderrun; + } + + // Should always have overlap samples left in the `sync_buffer_`. + RTC_DCHECK_GE(sync_buffer_->FutureLength(), expand_->overlap_length()); + + // TODO(yujo): For muted frames, this can be a copy rather than an addition. + if (play_dtmf) { + return_value = DtmfOverdub(dtmf_event, sync_buffer_->Channels(), + audio_frame->mutable_data()); + } + + // Update the background noise parameters if last operation wrote data + // straight from the decoder to the `sync_buffer_`. That is, none of the + // operations that modify the signal can be followed by a parameter update. + if ((last_mode_ == Mode::kNormal) || (last_mode_ == Mode::kAccelerateFail) || + (last_mode_ == Mode::kPreemptiveExpandFail) || + (last_mode_ == Mode::kRfc3389Cng) || + (last_mode_ == Mode::kCodecInternalCng)) { + background_noise_->Update(*sync_buffer_, *vad_.get()); + } + + if (operation == Operation::kDtmf) { + // DTMF data was written the end of `sync_buffer_`. + // Update index to end of DTMF data in `sync_buffer_`. + sync_buffer_->set_dtmf_index(sync_buffer_->Size()); + } + + if (last_mode_ != Mode::kExpand && last_mode_ != Mode::kCodecPlc) { + // If last operation was not expand, calculate the `playout_timestamp_` from + // the `sync_buffer_`. However, do not update the `playout_timestamp_` if it + // would be moved "backwards". + uint32_t temp_timestamp = + sync_buffer_->end_timestamp() - + static_cast<uint32_t>(sync_buffer_->FutureLength()); + if (static_cast<int32_t>(temp_timestamp - playout_timestamp_) > 0) { + playout_timestamp_ = temp_timestamp; + } + } else { + // Use dead reckoning to estimate the `playout_timestamp_`. + playout_timestamp_ += static_cast<uint32_t>(output_size_samples_); + } + // Set the timestamp in the audio frame to zero before the first packet has + // been inserted. Otherwise, subtract the frame size in samples to get the + // timestamp of the first sample in the frame (playout_timestamp_ is the + // last + 1). + audio_frame->timestamp_ = + first_packet_ + ? 0 + : timestamp_scaler_->ToExternal(playout_timestamp_) - + static_cast<uint32_t>(audio_frame->samples_per_channel_); + + if (!(last_mode_ == Mode::kRfc3389Cng || + last_mode_ == Mode::kCodecInternalCng || last_mode_ == Mode::kExpand || + last_mode_ == Mode::kCodecPlc)) { + generated_noise_stopwatch_.reset(); + } + + if (decode_return_value) + return decode_return_value; + return return_value; +} + +int NetEqImpl::GetDecision(Operation* operation, + PacketList* packet_list, + DtmfEvent* dtmf_event, + bool* play_dtmf, + absl::optional<Operation> action_override) { + // Initialize output variables. + *play_dtmf = false; + *operation = Operation::kUndefined; + + RTC_DCHECK(sync_buffer_.get()); + uint32_t end_timestamp = sync_buffer_->end_timestamp(); + if (!new_codec_) { + const uint32_t five_seconds_samples = 5 * fs_hz_; + packet_buffer_->DiscardOldPackets(end_timestamp, five_seconds_samples, + stats_.get()); + } + const Packet* packet = packet_buffer_->PeekNextPacket(); + + RTC_DCHECK(!generated_noise_stopwatch_ || + generated_noise_stopwatch_->ElapsedTicks() >= 1); + uint64_t generated_noise_samples = + generated_noise_stopwatch_ ? (generated_noise_stopwatch_->ElapsedTicks() - + 1) * output_size_samples_ + + controller_->noise_fast_forward() + : 0; + + if (controller_->CngRfc3389On() || last_mode_ == Mode::kRfc3389Cng) { + // Because of timestamp peculiarities, we have to "manually" disallow using + // a CNG packet with the same timestamp as the one that was last played. + // This can happen when using redundancy and will cause the timing to shift. + while (packet && decoder_database_->IsComfortNoise(packet->payload_type) && + (end_timestamp >= packet->timestamp || + end_timestamp + generated_noise_samples > packet->timestamp)) { + // Don't use this packet, discard it. + if (packet_buffer_->DiscardNextPacket(stats_.get()) != + PacketBuffer::kOK) { + RTC_DCHECK_NOTREACHED(); // Must be ok by design. + } + // Check buffer again. + if (!new_codec_) { + packet_buffer_->DiscardOldPackets(end_timestamp, 5 * fs_hz_, + stats_.get()); + } + packet = packet_buffer_->PeekNextPacket(); + } + } + + RTC_DCHECK(expand_.get()); + const int samples_left = static_cast<int>(sync_buffer_->FutureLength() - + expand_->overlap_length()); + if (last_mode_ == Mode::kAccelerateSuccess || + last_mode_ == Mode::kAccelerateLowEnergy || + last_mode_ == Mode::kPreemptiveExpandSuccess || + last_mode_ == Mode::kPreemptiveExpandLowEnergy) { + // Subtract (samples_left + output_size_samples_) from sampleMemory. + controller_->AddSampleMemory( + -(samples_left + rtc::dchecked_cast<int>(output_size_samples_))); + } + + // Check if it is time to play a DTMF event. + if (dtmf_buffer_->GetEvent( + static_cast<uint32_t>(end_timestamp + generated_noise_samples), + dtmf_event)) { + *play_dtmf = true; + } + + // Get instruction. + RTC_DCHECK(sync_buffer_.get()); + RTC_DCHECK(expand_.get()); + generated_noise_samples = + generated_noise_stopwatch_ + ? generated_noise_stopwatch_->ElapsedTicks() * output_size_samples_ + + controller_->noise_fast_forward() + : 0; + NetEqController::NetEqStatus status; + status.packet_buffer_info.dtx_or_cng = + packet_buffer_->ContainsDtxOrCngPacket(decoder_database_.get()); + status.packet_buffer_info.num_samples = + packet_buffer_->NumSamplesInBuffer(decoder_frame_length_); + status.packet_buffer_info.span_samples = packet_buffer_->GetSpanSamples( + decoder_frame_length_, last_output_sample_rate_hz_, true); + status.packet_buffer_info.span_samples_no_dtx = + packet_buffer_->GetSpanSamples(decoder_frame_length_, + last_output_sample_rate_hz_, false); + status.packet_buffer_info.num_packets = packet_buffer_->NumPacketsInBuffer(); + status.target_timestamp = sync_buffer_->end_timestamp(); + status.expand_mutefactor = expand_->MuteFactor(0); + status.last_packet_samples = decoder_frame_length_; + status.last_mode = last_mode_; + status.play_dtmf = *play_dtmf; + status.generated_noise_samples = generated_noise_samples; + status.sync_buffer_samples = sync_buffer_->FutureLength(); + if (packet) { + status.next_packet = { + packet->timestamp, packet->frame && packet->frame->IsDtxPacket(), + decoder_database_->IsComfortNoise(packet->payload_type)}; + } + *operation = controller_->GetDecision(status, &reset_decoder_); + + // Disallow time stretching if this packet is DTX, because such a decision may + // be based on earlier buffer level estimate, as we do not update buffer level + // during DTX. When we have a better way to update buffer level during DTX, + // this can be discarded. + if (packet && packet->frame && packet->frame->IsDtxPacket() && + (*operation == Operation::kMerge || + *operation == Operation::kAccelerate || + *operation == Operation::kFastAccelerate || + *operation == Operation::kPreemptiveExpand)) { + *operation = Operation::kNormal; + } + + if (action_override) { + // Use the provided action instead of the decision NetEq decided on. + *operation = *action_override; + } + // Check if we already have enough samples in the `sync_buffer_`. If so, + // change decision to normal, unless the decision was merge, accelerate, or + // preemptive expand. + if (samples_left >= rtc::dchecked_cast<int>(output_size_samples_) && + *operation != Operation::kMerge && *operation != Operation::kAccelerate && + *operation != Operation::kFastAccelerate && + *operation != Operation::kPreemptiveExpand) { + *operation = Operation::kNormal; + return 0; + } + + controller_->ExpandDecision(*operation); + if ((last_mode_ == Mode::kCodecPlc) && (*operation != Operation::kExpand)) { + // Getting out of the PLC expand mode, reporting interruptions. + // NetEq PLC reports this metrics in expand.cc + stats_->EndExpandEvent(fs_hz_); + } + + // Check conditions for reset. + if (new_codec_ || *operation == Operation::kUndefined) { + // The only valid reason to get kUndefined is that new_codec_ is set. + RTC_DCHECK(new_codec_); + if (*play_dtmf && !packet) { + timestamp_ = dtmf_event->timestamp; + } else { + if (!packet) { + RTC_LOG(LS_ERROR) << "Packet missing where it shouldn't."; + return -1; + } + timestamp_ = packet->timestamp; + if (*operation == Operation::kRfc3389CngNoPacket && + decoder_database_->IsComfortNoise(packet->payload_type)) { + // Change decision to CNG packet, since we do have a CNG packet, but it + // was considered too early to use. Now, use it anyway. + *operation = Operation::kRfc3389Cng; + } else if (*operation != Operation::kRfc3389Cng) { + *operation = Operation::kNormal; + } + } + // Adjust `sync_buffer_` timestamp before setting `end_timestamp` to the + // new value. + sync_buffer_->IncreaseEndTimestamp(timestamp_ - end_timestamp); + end_timestamp = timestamp_; + new_codec_ = false; + controller_->SoftReset(); + stats_->ResetMcu(); + } + + size_t required_samples = output_size_samples_; + const size_t samples_10_ms = static_cast<size_t>(80 * fs_mult_); + const size_t samples_20_ms = 2 * samples_10_ms; + const size_t samples_30_ms = 3 * samples_10_ms; + + switch (*operation) { + case Operation::kExpand: { + timestamp_ = end_timestamp; + return 0; + } + case Operation::kRfc3389CngNoPacket: + case Operation::kCodecInternalCng: { + return 0; + } + case Operation::kDtmf: { + // TODO(hlundin): Write test for this. + // Update timestamp. + timestamp_ = end_timestamp; + const uint64_t generated_noise_samples = + generated_noise_stopwatch_ + ? generated_noise_stopwatch_->ElapsedTicks() * + output_size_samples_ + + controller_->noise_fast_forward() + : 0; + if (generated_noise_samples > 0 && last_mode_ != Mode::kDtmf) { + // Make a jump in timestamp due to the recently played comfort noise. + uint32_t timestamp_jump = + static_cast<uint32_t>(generated_noise_samples); + sync_buffer_->IncreaseEndTimestamp(timestamp_jump); + timestamp_ += timestamp_jump; + } + return 0; + } + case Operation::kAccelerate: + case Operation::kFastAccelerate: { + // In order to do an accelerate we need at least 30 ms of audio data. + if (samples_left >= static_cast<int>(samples_30_ms)) { + // Already have enough data, so we do not need to extract any more. + controller_->set_sample_memory(samples_left); + controller_->set_prev_time_scale(true); + return 0; + } else if (samples_left >= static_cast<int>(samples_10_ms) && + decoder_frame_length_ >= samples_30_ms) { + // Avoid decoding more data as it might overflow the playout buffer. + *operation = Operation::kNormal; + return 0; + } else if (samples_left < static_cast<int>(samples_20_ms) && + decoder_frame_length_ < samples_30_ms) { + // Build up decoded data by decoding at least 20 ms of audio data. Do + // not perform accelerate yet, but wait until we only need to do one + // decoding. + required_samples = 2 * output_size_samples_; + *operation = Operation::kNormal; + } + // If none of the above is true, we have one of two possible situations: + // (1) 20 ms <= samples_left < 30 ms and decoder_frame_length_ < 30 ms; or + // (2) samples_left < 10 ms and decoder_frame_length_ >= 30 ms. + // In either case, we move on with the accelerate decision, and decode one + // frame now. + break; + } + case Operation::kPreemptiveExpand: { + // In order to do a preemptive expand we need at least 30 ms of decoded + // audio data. + if ((samples_left >= static_cast<int>(samples_30_ms)) || + (samples_left >= static_cast<int>(samples_10_ms) && + decoder_frame_length_ >= samples_30_ms)) { + // Already have enough data, so we do not need to extract any more. + // Or, avoid decoding more data as it might overflow the playout buffer. + // Still try preemptive expand, though. + controller_->set_sample_memory(samples_left); + controller_->set_prev_time_scale(true); + return 0; + } + if (samples_left < static_cast<int>(samples_20_ms) && + decoder_frame_length_ < samples_30_ms) { + // Build up decoded data by decoding at least 20 ms of audio data. + // Still try to perform preemptive expand. + required_samples = 2 * output_size_samples_; + } + // Move on with the preemptive expand decision. + break; + } + case Operation::kMerge: { + required_samples = + std::max(merge_->RequiredFutureSamples(), required_samples); + break; + } + default: { + // Do nothing. + } + } + + // Get packets from buffer. + int extracted_samples = 0; + if (packet) { + sync_buffer_->IncreaseEndTimestamp(packet->timestamp - end_timestamp); + + if (*operation != Operation::kRfc3389Cng) { + // We are about to decode and use a non-CNG packet. + controller_->SetCngOff(); + } + + extracted_samples = ExtractPackets(required_samples, packet_list); + if (extracted_samples < 0) { + return kPacketBufferCorruption; + } + } + + if (*operation == Operation::kAccelerate || + *operation == Operation::kFastAccelerate || + *operation == Operation::kPreemptiveExpand) { + controller_->set_sample_memory(samples_left + extracted_samples); + controller_->set_prev_time_scale(true); + } + + if (*operation == Operation::kAccelerate || + *operation == Operation::kFastAccelerate) { + // Check that we have enough data (30ms) to do accelerate. + if (extracted_samples + samples_left < static_cast<int>(samples_30_ms)) { + // TODO(hlundin): Write test for this. + // Not enough, do normal operation instead. + *operation = Operation::kNormal; + } + } + + timestamp_ = sync_buffer_->end_timestamp(); + return 0; +} + +int NetEqImpl::Decode(PacketList* packet_list, + Operation* operation, + int* decoded_length, + AudioDecoder::SpeechType* speech_type) { + *speech_type = AudioDecoder::kSpeech; + + // When packet_list is empty, we may be in kCodecInternalCng mode, and for + // that we use current active decoder. + AudioDecoder* decoder = decoder_database_->GetActiveDecoder(); + + if (!packet_list->empty()) { + const Packet& packet = packet_list->front(); + uint8_t payload_type = packet.payload_type; + if (!decoder_database_->IsComfortNoise(payload_type)) { + decoder = decoder_database_->GetDecoder(payload_type); + RTC_DCHECK(decoder); + if (!decoder) { + RTC_LOG(LS_WARNING) + << "Unknown payload type " << static_cast<int>(payload_type); + packet_list->clear(); + return kDecoderNotFound; + } + bool decoder_changed; + decoder_database_->SetActiveDecoder(payload_type, &decoder_changed); + if (decoder_changed) { + // We have a new decoder. Re-init some values. + const DecoderDatabase::DecoderInfo* decoder_info = + decoder_database_->GetDecoderInfo(payload_type); + RTC_DCHECK(decoder_info); + if (!decoder_info) { + RTC_LOG(LS_WARNING) + << "Unknown payload type " << static_cast<int>(payload_type); + packet_list->clear(); + return kDecoderNotFound; + } + // If sampling rate or number of channels has changed, we need to make + // a reset. + if (decoder_info->SampleRateHz() != fs_hz_ || + decoder->Channels() != algorithm_buffer_->Channels()) { + // TODO(tlegrand): Add unittest to cover this event. + SetSampleRateAndChannels(decoder_info->SampleRateHz(), + decoder->Channels()); + } + sync_buffer_->set_end_timestamp(timestamp_); + playout_timestamp_ = timestamp_; + } + } + } + + if (reset_decoder_) { + // TODO(hlundin): Write test for this. + if (decoder) + decoder->Reset(); + + // Reset comfort noise decoder. + ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder(); + if (cng_decoder) + cng_decoder->Reset(); + + reset_decoder_ = false; + } + + *decoded_length = 0; + // Update codec-internal PLC state. + if ((*operation == Operation::kMerge) && decoder && decoder->HasDecodePlc()) { + decoder->DecodePlc(1, &decoded_buffer_[*decoded_length]); + } + + int return_value; + if (*operation == Operation::kCodecInternalCng) { + RTC_DCHECK(packet_list->empty()); + return_value = DecodeCng(decoder, decoded_length, speech_type); + } else { + return_value = DecodeLoop(packet_list, *operation, decoder, decoded_length, + speech_type); + } + + if (*decoded_length < 0) { + // Error returned from the decoder. + *decoded_length = 0; + sync_buffer_->IncreaseEndTimestamp( + static_cast<uint32_t>(decoder_frame_length_)); + int error_code = 0; + if (decoder) + error_code = decoder->ErrorCode(); + if (error_code != 0) { + // Got some error code from the decoder. + return_value = kDecoderErrorCode; + RTC_LOG(LS_WARNING) << "Decoder returned error code: " << error_code; + } else { + // Decoder does not implement error codes. Return generic error. + return_value = kOtherDecoderError; + RTC_LOG(LS_WARNING) << "Decoder error (no error code)"; + } + *operation = Operation::kExpand; // Do expansion to get data instead. + } + if (*speech_type != AudioDecoder::kComfortNoise) { + // Don't increment timestamp if codec returned CNG speech type + // since in this case, the we will increment the CNGplayedTS counter. + // Increase with number of samples per channel. + RTC_DCHECK(*decoded_length == 0 || + (decoder && decoder->Channels() == sync_buffer_->Channels())); + sync_buffer_->IncreaseEndTimestamp( + *decoded_length / static_cast<int>(sync_buffer_->Channels())); + } + return return_value; +} + +int NetEqImpl::DecodeCng(AudioDecoder* decoder, + int* decoded_length, + AudioDecoder::SpeechType* speech_type) { + if (!decoder) { + // This happens when active decoder is not defined. + *decoded_length = -1; + return 0; + } + + while (*decoded_length < rtc::dchecked_cast<int>(output_size_samples_)) { + const int length = decoder->Decode( + nullptr, 0, fs_hz_, + (decoded_buffer_length_ - *decoded_length) * sizeof(int16_t), + &decoded_buffer_[*decoded_length], speech_type); + if (length > 0) { + *decoded_length += length; + } else { + // Error. + RTC_LOG(LS_WARNING) << "Failed to decode CNG"; + *decoded_length = -1; + break; + } + if (*decoded_length > static_cast<int>(decoded_buffer_length_)) { + // Guard against overflow. + RTC_LOG(LS_WARNING) << "Decoded too much CNG."; + return kDecodedTooMuch; + } + } + stats_->GeneratedNoiseSamples(*decoded_length); + return 0; +} + +int NetEqImpl::DecodeLoop(PacketList* packet_list, + const Operation& operation, + AudioDecoder* decoder, + int* decoded_length, + AudioDecoder::SpeechType* speech_type) { + RTC_DCHECK(last_decoded_packet_infos_.empty()); + + // Do decoding. + while (!packet_list->empty() && !decoder_database_->IsComfortNoise( + packet_list->front().payload_type)) { + RTC_DCHECK(decoder); // At this point, we must have a decoder object. + // The number of channels in the `sync_buffer_` should be the same as the + // number decoder channels. + RTC_DCHECK_EQ(sync_buffer_->Channels(), decoder->Channels()); + RTC_DCHECK_GE(decoded_buffer_length_, kMaxFrameSize * decoder->Channels()); + RTC_DCHECK(operation == Operation::kNormal || + operation == Operation::kAccelerate || + operation == Operation::kFastAccelerate || + operation == Operation::kMerge || + operation == Operation::kPreemptiveExpand); + + auto opt_result = packet_list->front().frame->Decode( + rtc::ArrayView<int16_t>(&decoded_buffer_[*decoded_length], + decoded_buffer_length_ - *decoded_length)); + last_decoded_packet_infos_.push_back( + std::move(packet_list->front().packet_info)); + packet_list->pop_front(); + if (opt_result) { + const auto& result = *opt_result; + *speech_type = result.speech_type; + if (result.num_decoded_samples > 0) { + *decoded_length += rtc::dchecked_cast<int>(result.num_decoded_samples); + // Update `decoder_frame_length_` with number of samples per channel. + decoder_frame_length_ = + result.num_decoded_samples / decoder->Channels(); + } + } else { + // Error. + // TODO(ossu): What to put here? + RTC_LOG(LS_WARNING) << "Decode error"; + *decoded_length = -1; + last_decoded_packet_infos_.clear(); + packet_list->clear(); + break; + } + if (*decoded_length > rtc::dchecked_cast<int>(decoded_buffer_length_)) { + // Guard against overflow. + RTC_LOG(LS_WARNING) << "Decoded too much."; + packet_list->clear(); + return kDecodedTooMuch; + } + } // End of decode loop. + + // If the list is not empty at this point, either a decoding error terminated + // the while-loop, or list must hold exactly one CNG packet. + RTC_DCHECK( + packet_list->empty() || *decoded_length < 0 || + (packet_list->size() == 1 && + decoder_database_->IsComfortNoise(packet_list->front().payload_type))); + return 0; +} + +void NetEqImpl::DoNormal(const int16_t* decoded_buffer, + size_t decoded_length, + AudioDecoder::SpeechType speech_type, + bool play_dtmf) { + RTC_DCHECK(normal_.get()); + normal_->Process(decoded_buffer, decoded_length, last_mode_, + algorithm_buffer_.get()); + if (decoded_length != 0) { + last_mode_ = Mode::kNormal; + } + + // If last packet was decoded as an inband CNG, set mode to CNG instead. + if ((speech_type == AudioDecoder::kComfortNoise) || + ((last_mode_ == Mode::kCodecInternalCng) && (decoded_length == 0))) { + // TODO(hlundin): Remove second part of || statement above. + last_mode_ = Mode::kCodecInternalCng; + } + + if (!play_dtmf) { + dtmf_tone_generator_->Reset(); + } +} + +void NetEqImpl::DoMerge(int16_t* decoded_buffer, + size_t decoded_length, + AudioDecoder::SpeechType speech_type, + bool play_dtmf) { + RTC_DCHECK(merge_.get()); + size_t new_length = + merge_->Process(decoded_buffer, decoded_length, algorithm_buffer_.get()); + // Correction can be negative. + int expand_length_correction = + rtc::dchecked_cast<int>(new_length) - + rtc::dchecked_cast<int>(decoded_length / algorithm_buffer_->Channels()); + + // Update in-call and post-call statistics. + if (expand_->MuteFactor(0) == 0) { + // Expand generates only noise. + stats_->ExpandedNoiseSamplesCorrection(expand_length_correction); + } else { + // Expansion generates more than only noise. + stats_->ExpandedVoiceSamplesCorrection(expand_length_correction); + } + + last_mode_ = Mode::kMerge; + // If last packet was decoded as an inband CNG, set mode to CNG instead. + if (speech_type == AudioDecoder::kComfortNoise) { + last_mode_ = Mode::kCodecInternalCng; + } + expand_->Reset(); + if (!play_dtmf) { + dtmf_tone_generator_->Reset(); + } +} + +bool NetEqImpl::DoCodecPlc() { + AudioDecoder* decoder = decoder_database_->GetActiveDecoder(); + if (!decoder) { + return false; + } + const size_t channels = algorithm_buffer_->Channels(); + const size_t requested_samples_per_channel = + output_size_samples_ - + (sync_buffer_->FutureLength() - expand_->overlap_length()); + concealment_audio_.Clear(); + decoder->GeneratePlc(requested_samples_per_channel, &concealment_audio_); + if (concealment_audio_.empty()) { + // Nothing produced. Resort to regular expand. + return false; + } + RTC_CHECK_GE(concealment_audio_.size(), + requested_samples_per_channel * channels); + sync_buffer_->PushBackInterleaved(concealment_audio_); + RTC_DCHECK_NE(algorithm_buffer_->Channels(), 0); + const size_t concealed_samples_per_channel = + concealment_audio_.size() / channels; + + // Update in-call and post-call statistics. + const bool is_new_concealment_event = (last_mode_ != Mode::kCodecPlc); + if (std::all_of(concealment_audio_.cbegin(), concealment_audio_.cend(), + [](int16_t i) { return i == 0; })) { + // Expand operation generates only noise. + stats_->ExpandedNoiseSamples(concealed_samples_per_channel, + is_new_concealment_event); + } else { + // Expand operation generates more than only noise. + stats_->ExpandedVoiceSamples(concealed_samples_per_channel, + is_new_concealment_event); + } + last_mode_ = Mode::kCodecPlc; + if (!generated_noise_stopwatch_) { + // Start a new stopwatch since we may be covering for a lost CNG packet. + generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch(); + } + return true; +} + +int NetEqImpl::DoExpand(bool play_dtmf) { + while ((sync_buffer_->FutureLength() - expand_->overlap_length()) < + output_size_samples_) { + algorithm_buffer_->Clear(); + int return_value = expand_->Process(algorithm_buffer_.get()); + size_t length = algorithm_buffer_->Size(); + bool is_new_concealment_event = (last_mode_ != Mode::kExpand); + + // Update in-call and post-call statistics. + if (expand_->MuteFactor(0) == 0) { + // Expand operation generates only noise. + stats_->ExpandedNoiseSamples(length, is_new_concealment_event); + } else { + // Expand operation generates more than only noise. + stats_->ExpandedVoiceSamples(length, is_new_concealment_event); + } + + last_mode_ = Mode::kExpand; + + if (return_value < 0) { + return return_value; + } + + sync_buffer_->PushBack(*algorithm_buffer_); + algorithm_buffer_->Clear(); + } + if (!play_dtmf) { + dtmf_tone_generator_->Reset(); + } + + if (!generated_noise_stopwatch_) { + // Start a new stopwatch since we may be covering for a lost CNG packet. + generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch(); + } + + return 0; +} + +int NetEqImpl::DoAccelerate(int16_t* decoded_buffer, + size_t decoded_length, + AudioDecoder::SpeechType speech_type, + bool play_dtmf, + bool fast_accelerate) { + const size_t required_samples = + static_cast<size_t>(240 * fs_mult_); // Must have 30 ms. + size_t borrowed_samples_per_channel = 0; + size_t num_channels = algorithm_buffer_->Channels(); + size_t decoded_length_per_channel = decoded_length / num_channels; + if (decoded_length_per_channel < required_samples) { + // Must move data from the `sync_buffer_` in order to get 30 ms. + borrowed_samples_per_channel = + static_cast<int>(required_samples - decoded_length_per_channel); + memmove(&decoded_buffer[borrowed_samples_per_channel * num_channels], + decoded_buffer, sizeof(int16_t) * decoded_length); + sync_buffer_->ReadInterleavedFromEnd(borrowed_samples_per_channel, + decoded_buffer); + decoded_length = required_samples * num_channels; + } + + size_t samples_removed = 0; + Accelerate::ReturnCodes return_code = + accelerate_->Process(decoded_buffer, decoded_length, fast_accelerate, + algorithm_buffer_.get(), &samples_removed); + stats_->AcceleratedSamples(samples_removed); + switch (return_code) { + case Accelerate::kSuccess: + last_mode_ = Mode::kAccelerateSuccess; + break; + case Accelerate::kSuccessLowEnergy: + last_mode_ = Mode::kAccelerateLowEnergy; + break; + case Accelerate::kNoStretch: + last_mode_ = Mode::kAccelerateFail; + break; + case Accelerate::kError: + // TODO(hlundin): Map to Modes::kError instead? + last_mode_ = Mode::kAccelerateFail; + return kAccelerateError; + } + + if (borrowed_samples_per_channel > 0) { + // Copy borrowed samples back to the `sync_buffer_`. + size_t length = algorithm_buffer_->Size(); + if (length < borrowed_samples_per_channel) { + // This destroys the beginning of the buffer, but will not cause any + // problems. + sync_buffer_->ReplaceAtIndex( + *algorithm_buffer_, + sync_buffer_->Size() - borrowed_samples_per_channel); + sync_buffer_->PushFrontZeros(borrowed_samples_per_channel - length); + algorithm_buffer_->PopFront(length); + RTC_DCHECK(algorithm_buffer_->Empty()); + } else { + sync_buffer_->ReplaceAtIndex( + *algorithm_buffer_, borrowed_samples_per_channel, + sync_buffer_->Size() - borrowed_samples_per_channel); + algorithm_buffer_->PopFront(borrowed_samples_per_channel); + } + } + + // If last packet was decoded as an inband CNG, set mode to CNG instead. + if (speech_type == AudioDecoder::kComfortNoise) { + last_mode_ = Mode::kCodecInternalCng; + } + if (!play_dtmf) { + dtmf_tone_generator_->Reset(); + } + expand_->Reset(); + return 0; +} + +int NetEqImpl::DoPreemptiveExpand(int16_t* decoded_buffer, + size_t decoded_length, + AudioDecoder::SpeechType speech_type, + bool play_dtmf) { + const size_t required_samples = + static_cast<size_t>(240 * fs_mult_); // Must have 30 ms. + size_t num_channels = algorithm_buffer_->Channels(); + size_t borrowed_samples_per_channel = 0; + size_t old_borrowed_samples_per_channel = 0; + size_t decoded_length_per_channel = decoded_length / num_channels; + if (decoded_length_per_channel < required_samples) { + // Must move data from the `sync_buffer_` in order to get 30 ms. + borrowed_samples_per_channel = + required_samples - decoded_length_per_channel; + // Calculate how many of these were already played out. + old_borrowed_samples_per_channel = + (borrowed_samples_per_channel > sync_buffer_->FutureLength()) + ? (borrowed_samples_per_channel - sync_buffer_->FutureLength()) + : 0; + memmove(&decoded_buffer[borrowed_samples_per_channel * num_channels], + decoded_buffer, sizeof(int16_t) * decoded_length); + sync_buffer_->ReadInterleavedFromEnd(borrowed_samples_per_channel, + decoded_buffer); + decoded_length = required_samples * num_channels; + } + + size_t samples_added = 0; + PreemptiveExpand::ReturnCodes return_code = preemptive_expand_->Process( + decoded_buffer, decoded_length, old_borrowed_samples_per_channel, + algorithm_buffer_.get(), &samples_added); + stats_->PreemptiveExpandedSamples(samples_added); + switch (return_code) { + case PreemptiveExpand::kSuccess: + last_mode_ = Mode::kPreemptiveExpandSuccess; + break; + case PreemptiveExpand::kSuccessLowEnergy: + last_mode_ = Mode::kPreemptiveExpandLowEnergy; + break; + case PreemptiveExpand::kNoStretch: + last_mode_ = Mode::kPreemptiveExpandFail; + break; + case PreemptiveExpand::kError: + // TODO(hlundin): Map to Modes::kError instead? + last_mode_ = Mode::kPreemptiveExpandFail; + return kPreemptiveExpandError; + } + + if (borrowed_samples_per_channel > 0) { + // Copy borrowed samples back to the `sync_buffer_`. + sync_buffer_->ReplaceAtIndex( + *algorithm_buffer_, borrowed_samples_per_channel, + sync_buffer_->Size() - borrowed_samples_per_channel); + algorithm_buffer_->PopFront(borrowed_samples_per_channel); + } + + // If last packet was decoded as an inband CNG, set mode to CNG instead. + if (speech_type == AudioDecoder::kComfortNoise) { + last_mode_ = Mode::kCodecInternalCng; + } + if (!play_dtmf) { + dtmf_tone_generator_->Reset(); + } + expand_->Reset(); + return 0; +} + +int NetEqImpl::DoRfc3389Cng(PacketList* packet_list, bool play_dtmf) { + if (!packet_list->empty()) { + // Must have exactly one SID frame at this point. + RTC_DCHECK_EQ(packet_list->size(), 1); + const Packet& packet = packet_list->front(); + if (!decoder_database_->IsComfortNoise(packet.payload_type)) { + RTC_LOG(LS_ERROR) << "Trying to decode non-CNG payload as CNG."; + return kOtherError; + } + if (comfort_noise_->UpdateParameters(packet) == + ComfortNoise::kInternalError) { + algorithm_buffer_->Zeros(output_size_samples_); + return -comfort_noise_->internal_error_code(); + } + } + int cn_return = + comfort_noise_->Generate(output_size_samples_, algorithm_buffer_.get()); + expand_->Reset(); + last_mode_ = Mode::kRfc3389Cng; + if (!play_dtmf) { + dtmf_tone_generator_->Reset(); + } + if (cn_return == ComfortNoise::kInternalError) { + RTC_LOG(LS_WARNING) << "Comfort noise generator returned error code: " + << comfort_noise_->internal_error_code(); + return kComfortNoiseErrorCode; + } else if (cn_return == ComfortNoise::kUnknownPayloadType) { + return kUnknownRtpPayloadType; + } + return 0; +} + +void NetEqImpl::DoCodecInternalCng(const int16_t* decoded_buffer, + size_t decoded_length) { + RTC_DCHECK(normal_.get()); + normal_->Process(decoded_buffer, decoded_length, last_mode_, + algorithm_buffer_.get()); + last_mode_ = Mode::kCodecInternalCng; + expand_->Reset(); +} + +int NetEqImpl::DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf) { + // This block of the code and the block further down, handling `dtmf_switch` + // are commented out. Otherwise playing out-of-band DTMF would fail in VoE + // test, DtmfTest.ManualSuccessfullySendsOutOfBandTelephoneEvents. This is + // equivalent to `dtmf_switch` always be false. + // + // See http://webrtc-codereview.appspot.com/1195004/ for discussion + // On this issue. This change might cause some glitches at the point of + // switch from audio to DTMF. Issue 1545 is filed to track this. + // + // bool dtmf_switch = false; + // if ((last_mode_ != Modes::kDtmf) && + // dtmf_tone_generator_->initialized()) { + // // Special case; see below. + // // We must catch this before calling Generate, since `initialized` is + // // modified in that call. + // dtmf_switch = true; + // } + + int dtmf_return_value = 0; + if (!dtmf_tone_generator_->initialized()) { + // Initialize if not already done. + dtmf_return_value = dtmf_tone_generator_->Init(fs_hz_, dtmf_event.event_no, + dtmf_event.volume); + } + + if (dtmf_return_value == 0) { + // Generate DTMF signal. + dtmf_return_value = dtmf_tone_generator_->Generate(output_size_samples_, + algorithm_buffer_.get()); + } + + if (dtmf_return_value < 0) { + algorithm_buffer_->Zeros(output_size_samples_); + return dtmf_return_value; + } + + // if (dtmf_switch) { + // // This is the special case where the previous operation was DTMF + // // overdub, but the current instruction is "regular" DTMF. We must make + // // sure that the DTMF does not have any discontinuities. The first DTMF + // // sample that we generate now must be played out immediately, therefore + // // it must be copied to the speech buffer. + // // TODO(hlundin): This code seems incorrect. (Legacy.) Write test and + // // verify correct operation. + // RTC_DCHECK_NOTREACHED(); + // // Must generate enough data to replace all of the `sync_buffer_` + // // "future". + // int required_length = sync_buffer_->FutureLength(); + // RTC_DCHECK(dtmf_tone_generator_->initialized()); + // dtmf_return_value = dtmf_tone_generator_->Generate(required_length, + // algorithm_buffer_); + // RTC_DCHECK((size_t) required_length == algorithm_buffer_->Size()); + // if (dtmf_return_value < 0) { + // algorithm_buffer_->Zeros(output_size_samples_); + // return dtmf_return_value; + // } + // + // // Overwrite the "future" part of the speech buffer with the new DTMF + // // data. + // // TODO(hlundin): It seems that this overwriting has gone lost. + // // Not adapted for multi-channel yet. + // RTC_DCHECK(algorithm_buffer_->Channels() == 1); + // if (algorithm_buffer_->Channels() != 1) { + // RTC_LOG(LS_WARNING) << "DTMF not supported for more than one channel"; + // return kStereoNotSupported; + // } + // // Shuffle the remaining data to the beginning of algorithm buffer. + // algorithm_buffer_->PopFront(sync_buffer_->FutureLength()); + // } + + sync_buffer_->IncreaseEndTimestamp( + static_cast<uint32_t>(output_size_samples_)); + expand_->Reset(); + last_mode_ = Mode::kDtmf; + + // Set to false because the DTMF is already in the algorithm buffer. + *play_dtmf = false; + return 0; +} + +int NetEqImpl::DtmfOverdub(const DtmfEvent& dtmf_event, + size_t num_channels, + int16_t* output) const { + size_t out_index = 0; + size_t overdub_length = output_size_samples_; // Default value. + + if (sync_buffer_->dtmf_index() > sync_buffer_->next_index()) { + // Special operation for transition from "DTMF only" to "DTMF overdub". + out_index = + std::min(sync_buffer_->dtmf_index() - sync_buffer_->next_index(), + output_size_samples_); + overdub_length = output_size_samples_ - out_index; + } + + AudioMultiVector dtmf_output(num_channels); + int dtmf_return_value = 0; + if (!dtmf_tone_generator_->initialized()) { + dtmf_return_value = dtmf_tone_generator_->Init(fs_hz_, dtmf_event.event_no, + dtmf_event.volume); + } + if (dtmf_return_value == 0) { + dtmf_return_value = + dtmf_tone_generator_->Generate(overdub_length, &dtmf_output); + RTC_DCHECK_EQ(overdub_length, dtmf_output.Size()); + } + dtmf_output.ReadInterleaved(overdub_length, &output[out_index]); + return dtmf_return_value < 0 ? dtmf_return_value : 0; +} + +int NetEqImpl::ExtractPackets(size_t required_samples, + PacketList* packet_list) { + bool first_packet = true; + uint8_t prev_payload_type = 0; + uint32_t prev_timestamp = 0; + uint16_t prev_sequence_number = 0; + bool next_packet_available = false; + + const Packet* next_packet = packet_buffer_->PeekNextPacket(); + RTC_DCHECK(next_packet); + if (!next_packet) { + RTC_LOG(LS_ERROR) << "Packet buffer unexpectedly empty."; + return -1; + } + uint32_t first_timestamp = next_packet->timestamp; + size_t extracted_samples = 0; + + // Packet extraction loop. + do { + timestamp_ = next_packet->timestamp; + absl::optional<Packet> packet = packet_buffer_->GetNextPacket(); + // `next_packet` may be invalid after the `packet_buffer_` operation. + next_packet = nullptr; + if (!packet) { + RTC_LOG(LS_ERROR) << "Should always be able to extract a packet here"; + RTC_DCHECK_NOTREACHED(); // Should always be able to extract a packet + // here. + return -1; + } + const uint64_t waiting_time_ms = packet->waiting_time->ElapsedMs(); + stats_->StoreWaitingTime(waiting_time_ms); + RTC_DCHECK(!packet->empty()); + + if (first_packet) { + first_packet = false; + if (nack_enabled_) { + RTC_DCHECK(nack_); + // TODO(henrik.lundin): Should we update this for all decoded packets? + nack_->UpdateLastDecodedPacket(packet->sequence_number, + packet->timestamp); + } + prev_sequence_number = packet->sequence_number; + prev_timestamp = packet->timestamp; + prev_payload_type = packet->payload_type; + } + + const bool has_cng_packet = + decoder_database_->IsComfortNoise(packet->payload_type); + // Store number of extracted samples. + size_t packet_duration = 0; + if (packet->frame) { + packet_duration = packet->frame->Duration(); + // TODO(ossu): Is this the correct way to track Opus FEC packets? + if (packet->priority.codec_level > 0) { + stats_->SecondaryDecodedSamples( + rtc::dchecked_cast<int>(packet_duration)); + } + } else if (!has_cng_packet) { + RTC_LOG(LS_WARNING) << "Unknown payload type " + << static_cast<int>(packet->payload_type); + RTC_DCHECK_NOTREACHED(); + } + + if (packet_duration == 0) { + // Decoder did not return a packet duration. Assume that the packet + // contains the same number of samples as the previous one. + packet_duration = decoder_frame_length_; + } + extracted_samples = packet->timestamp - first_timestamp + packet_duration; + + RTC_DCHECK(controller_); + stats_->JitterBufferDelay(packet_duration, waiting_time_ms, + controller_->TargetLevelMs(), + controller_->UnlimitedTargetLevelMs()); + + packet_list->push_back(std::move(*packet)); // Store packet in list. + packet = absl::nullopt; // Ensure it's never used after the move. + + // Check what packet is available next. + next_packet = packet_buffer_->PeekNextPacket(); + next_packet_available = false; + if (next_packet && prev_payload_type == next_packet->payload_type && + !has_cng_packet) { + int16_t seq_no_diff = next_packet->sequence_number - prev_sequence_number; + size_t ts_diff = next_packet->timestamp - prev_timestamp; + if ((seq_no_diff == 1 || seq_no_diff == 0) && + ts_diff <= packet_duration) { + // The next sequence number is available, or the next part of a packet + // that was split into pieces upon insertion. + next_packet_available = true; + } + prev_sequence_number = next_packet->sequence_number; + prev_timestamp = next_packet->timestamp; + } + } while (extracted_samples < required_samples && next_packet_available); + + if (extracted_samples > 0) { + // Delete old packets only when we are going to decode something. Otherwise, + // we could end up in the situation where we never decode anything, since + // all incoming packets are considered too old but the buffer will also + // never be flooded and flushed. + packet_buffer_->DiscardAllOldPackets(timestamp_, stats_.get()); + } + + return rtc::dchecked_cast<int>(extracted_samples); +} + +void NetEqImpl::UpdatePlcComponents(int fs_hz, size_t channels) { + // Delete objects and create new ones. + expand_.reset(expand_factory_->Create(background_noise_.get(), + sync_buffer_.get(), &random_vector_, + stats_.get(), fs_hz, channels)); + merge_.reset(new Merge(fs_hz, channels, expand_.get(), sync_buffer_.get())); +} + +void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) { + RTC_LOG(LS_VERBOSE) << "SetSampleRateAndChannels " << fs_hz << " " + << channels; + // TODO(hlundin): Change to an enumerator and skip assert. + RTC_DCHECK(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || + fs_hz == 48000); + RTC_DCHECK_GT(channels, 0); + + // Before changing the sample rate, end and report any ongoing expand event. + stats_->EndExpandEvent(fs_hz_); + fs_hz_ = fs_hz; + fs_mult_ = fs_hz / 8000; + output_size_samples_ = static_cast<size_t>(kOutputSizeMs * 8 * fs_mult_); + decoder_frame_length_ = 3 * output_size_samples_; // Initialize to 30ms. + + last_mode_ = Mode::kNormal; + + ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder(); + if (cng_decoder) + cng_decoder->Reset(); + + // Reinit post-decode VAD with new sample rate. + RTC_DCHECK(vad_.get()); // Cannot be NULL here. + vad_->Init(); + + // Delete algorithm buffer and create a new one. + algorithm_buffer_.reset(new AudioMultiVector(channels)); + + // Delete sync buffer and create a new one. + sync_buffer_.reset(new SyncBuffer(channels, kSyncBufferSize * fs_mult_)); + + // Delete BackgroundNoise object and create a new one. + background_noise_.reset(new BackgroundNoise(channels)); + + // Reset random vector. + random_vector_.Reset(); + + UpdatePlcComponents(fs_hz, channels); + + // Move index so that we create a small set of future samples (all 0). + sync_buffer_->set_next_index(sync_buffer_->next_index() - + expand_->overlap_length()); + + normal_.reset(new Normal(fs_hz, decoder_database_.get(), *background_noise_, + expand_.get(), stats_.get())); + accelerate_.reset( + accelerate_factory_->Create(fs_hz, channels, *background_noise_)); + preemptive_expand_.reset(preemptive_expand_factory_->Create( + fs_hz, channels, *background_noise_, expand_->overlap_length())); + + // Delete ComfortNoise object and create a new one. + comfort_noise_.reset( + new ComfortNoise(fs_hz, decoder_database_.get(), sync_buffer_.get())); + + // Verify that `decoded_buffer_` is long enough. + if (decoded_buffer_length_ < kMaxFrameSize * channels) { + // Reallocate to larger size. + decoded_buffer_length_ = kMaxFrameSize * channels; + decoded_buffer_.reset(new int16_t[decoded_buffer_length_]); + } + RTC_CHECK(controller_) << "Unexpectedly found no NetEqController"; + controller_->SetSampleRate(fs_hz_, output_size_samples_); +} + +NetEqImpl::OutputType NetEqImpl::LastOutputType() { + RTC_DCHECK(vad_.get()); + RTC_DCHECK(expand_.get()); + if (last_mode_ == Mode::kCodecInternalCng || + last_mode_ == Mode::kRfc3389Cng) { + return OutputType::kCNG; + } else if (last_mode_ == Mode::kExpand && expand_->MuteFactor(0) == 0) { + // Expand mode has faded down to background noise only (very long expand). + return OutputType::kPLCCNG; + } else if (last_mode_ == Mode::kExpand) { + return OutputType::kPLC; + } else if (vad_->running() && !vad_->active_speech()) { + return OutputType::kVadPassive; + } else if (last_mode_ == Mode::kCodecPlc) { + return OutputType::kCodecPLC; + } else { + return OutputType::kNormalSpeech; + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl.h b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl.h new file mode 100644 index 0000000000..6120eab5b6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl.h @@ -0,0 +1,404 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_ +#define MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_ + +#include <map> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +#include "absl/types/optional.h" +#include "api/audio/audio_frame.h" +#include "api/neteq/neteq.h" +#include "api/neteq/neteq_controller.h" +#include "api/neteq/neteq_controller_factory.h" +#include "api/neteq/tick_timer.h" +#include "api/rtp_packet_info.h" +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "modules/audio_coding/neteq/expand_uma_logger.h" +#include "modules/audio_coding/neteq/packet.h" +#include "modules/audio_coding/neteq/random_vector.h" +#include "modules/audio_coding/neteq/statistics_calculator.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/thread_annotations.h" + +namespace webrtc { + +// Forward declarations. +class Accelerate; +class BackgroundNoise; +class Clock; +class ComfortNoise; +class DecoderDatabase; +class DtmfBuffer; +class DtmfToneGenerator; +class Expand; +class Merge; +class NackTracker; +class Normal; +class PacketBuffer; +class RedPayloadSplitter; +class PostDecodeVad; +class PreemptiveExpand; +class RandomVector; +class SyncBuffer; +class TimestampScaler; +struct AccelerateFactory; +struct DtmfEvent; +struct ExpandFactory; +struct PreemptiveExpandFactory; + +class NetEqImpl : public webrtc::NetEq { + public: + enum class OutputType { + kNormalSpeech, + kPLC, + kCNG, + kPLCCNG, + kVadPassive, + kCodecPLC + }; + + enum ErrorCodes { + kNoError = 0, + kOtherError, + kUnknownRtpPayloadType, + kDecoderNotFound, + kInvalidPointer, + kAccelerateError, + kPreemptiveExpandError, + kComfortNoiseErrorCode, + kDecoderErrorCode, + kOtherDecoderError, + kInvalidOperation, + kDtmfParsingError, + kDtmfInsertError, + kSampleUnderrun, + kDecodedTooMuch, + kRedundancySplitError, + kPacketBufferCorruption + }; + + struct Dependencies { + // The constructor populates the Dependencies struct with the default + // implementations of the objects. They can all be replaced by the user + // before sending the struct to the NetEqImpl constructor. However, there + // are dependencies between some of the classes inside the struct, so + // swapping out one may make it necessary to re-create another one. + Dependencies(const NetEq::Config& config, + Clock* clock, + const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory, + const NetEqControllerFactory& controller_factory); + ~Dependencies(); + + Clock* const clock; + std::unique_ptr<TickTimer> tick_timer; + std::unique_ptr<StatisticsCalculator> stats; + std::unique_ptr<DecoderDatabase> decoder_database; + std::unique_ptr<DtmfBuffer> dtmf_buffer; + std::unique_ptr<DtmfToneGenerator> dtmf_tone_generator; + std::unique_ptr<PacketBuffer> packet_buffer; + std::unique_ptr<NetEqController> neteq_controller; + std::unique_ptr<RedPayloadSplitter> red_payload_splitter; + std::unique_ptr<TimestampScaler> timestamp_scaler; + std::unique_ptr<AccelerateFactory> accelerate_factory; + std::unique_ptr<ExpandFactory> expand_factory; + std::unique_ptr<PreemptiveExpandFactory> preemptive_expand_factory; + }; + + // Creates a new NetEqImpl object. + NetEqImpl(const NetEq::Config& config, + Dependencies&& deps, + bool create_components = true); + + ~NetEqImpl() override; + + NetEqImpl(const NetEqImpl&) = delete; + NetEqImpl& operator=(const NetEqImpl&) = delete; + + // Inserts a new packet into NetEq. Returns 0 on success, -1 on failure. + int InsertPacket(const RTPHeader& rtp_header, + rtc::ArrayView<const uint8_t> payload) override; + + void InsertEmptyPacket(const RTPHeader& rtp_header) override; + + int GetAudio( + AudioFrame* audio_frame, + bool* muted, + int* current_sample_rate_hz = nullptr, + absl::optional<Operation> action_override = absl::nullopt) override; + + void SetCodecs(const std::map<int, SdpAudioFormat>& codecs) override; + + bool RegisterPayloadType(int rtp_payload_type, + const SdpAudioFormat& audio_format) override; + + // Removes `rtp_payload_type` from the codec database. Returns 0 on success, + // -1 on failure. + int RemovePayloadType(uint8_t rtp_payload_type) override; + + void RemoveAllPayloadTypes() override; + + bool SetMinimumDelay(int delay_ms) override; + + bool SetMaximumDelay(int delay_ms) override; + + bool SetBaseMinimumDelayMs(int delay_ms) override; + + int GetBaseMinimumDelayMs() const override; + + int TargetDelayMs() const override; + + int FilteredCurrentDelayMs() const override; + + // Writes the current network statistics to `stats`. The statistics are reset + // after the call. + int NetworkStatistics(NetEqNetworkStatistics* stats) override; + + NetEqNetworkStatistics CurrentNetworkStatistics() const override; + + NetEqLifetimeStatistics GetLifetimeStatistics() const override; + + NetEqOperationsAndState GetOperationsAndState() const override; + + // Enables post-decode VAD. When enabled, GetAudio() will return + // kOutputVADPassive when the signal contains no speech. + void EnableVad() override; + + // Disables post-decode VAD. + void DisableVad() override; + + absl::optional<uint32_t> GetPlayoutTimestamp() const override; + + int last_output_sample_rate_hz() const override; + + absl::optional<DecoderFormat> GetDecoderFormat( + int payload_type) const override; + + // Flushes both the packet buffer and the sync buffer. + void FlushBuffers() override; + + void EnableNack(size_t max_nack_list_size) override; + + void DisableNack() override; + + std::vector<uint16_t> GetNackList(int64_t round_trip_time_ms) const override; + + int SyncBufferSizeMs() const override; + + // This accessor method is only intended for testing purposes. + const SyncBuffer* sync_buffer_for_test() const; + Operation last_operation_for_test() const; + + protected: + static const int kOutputSizeMs = 10; + static const size_t kMaxFrameSize = 5760; // 120 ms @ 48 kHz. + // TODO(hlundin): Provide a better value for kSyncBufferSize. + // Current value is kMaxFrameSize + 60 ms * 48 kHz, which is enough for + // calculating correlations of current frame against history. + static const size_t kSyncBufferSize = kMaxFrameSize + 60 * 48; + + // Inserts a new packet into NetEq. This is used by the InsertPacket method + // above. Returns 0 on success, otherwise an error code. + // TODO(hlundin): Merge this with InsertPacket above? + int InsertPacketInternal(const RTPHeader& rtp_header, + rtc::ArrayView<const uint8_t> payload) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Delivers 10 ms of audio data. The data is written to `audio_frame`. + // Returns 0 on success, otherwise an error code. + int GetAudioInternal(AudioFrame* audio_frame, + bool* muted, + absl::optional<Operation> action_override) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Provides a decision to the GetAudioInternal method. The decision what to + // do is written to `operation`. Packets to decode are written to + // `packet_list`, and a DTMF event to play is written to `dtmf_event`. When + // DTMF should be played, `play_dtmf` is set to true by the method. + // Returns 0 on success, otherwise an error code. + int GetDecision(Operation* operation, + PacketList* packet_list, + DtmfEvent* dtmf_event, + bool* play_dtmf, + absl::optional<Operation> action_override) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Decodes the speech packets in `packet_list`, and writes the results to + // `decoded_buffer`, which is allocated to hold `decoded_buffer_length` + // elements. The length of the decoded data is written to `decoded_length`. + // The speech type -- speech or (codec-internal) comfort noise -- is written + // to `speech_type`. If `packet_list` contains any SID frames for RFC 3389 + // comfort noise, those are not decoded. + int Decode(PacketList* packet_list, + Operation* operation, + int* decoded_length, + AudioDecoder::SpeechType* speech_type) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Sub-method to Decode(). Performs codec internal CNG. + int DecodeCng(AudioDecoder* decoder, + int* decoded_length, + AudioDecoder::SpeechType* speech_type) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Sub-method to Decode(). Performs the actual decoding. + int DecodeLoop(PacketList* packet_list, + const Operation& operation, + AudioDecoder* decoder, + int* decoded_length, + AudioDecoder::SpeechType* speech_type) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Sub-method which calls the Normal class to perform the normal operation. + void DoNormal(const int16_t* decoded_buffer, + size_t decoded_length, + AudioDecoder::SpeechType speech_type, + bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Sub-method which calls the Merge class to perform the merge operation. + void DoMerge(int16_t* decoded_buffer, + size_t decoded_length, + AudioDecoder::SpeechType speech_type, + bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + bool DoCodecPlc() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Sub-method which calls the Expand class to perform the expand operation. + int DoExpand(bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Sub-method which calls the Accelerate class to perform the accelerate + // operation. + int DoAccelerate(int16_t* decoded_buffer, + size_t decoded_length, + AudioDecoder::SpeechType speech_type, + bool play_dtmf, + bool fast_accelerate) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Sub-method which calls the PreemptiveExpand class to perform the + // preemtive expand operation. + int DoPreemptiveExpand(int16_t* decoded_buffer, + size_t decoded_length, + AudioDecoder::SpeechType speech_type, + bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Sub-method which calls the ComfortNoise class to generate RFC 3389 comfort + // noise. `packet_list` can either contain one SID frame to update the + // noise parameters, or no payload at all, in which case the previously + // received parameters are used. + int DoRfc3389Cng(PacketList* packet_list, bool play_dtmf) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Calls the audio decoder to generate codec-internal comfort noise when + // no packet was received. + void DoCodecInternalCng(const int16_t* decoded_buffer, size_t decoded_length) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Calls the DtmfToneGenerator class to generate DTMF tones. + int DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Overdub DTMF on top of `output`. + int DtmfOverdub(const DtmfEvent& dtmf_event, + size_t num_channels, + int16_t* output) const RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Extracts packets from `packet_buffer_` to produce at least + // `required_samples` samples. The packets are inserted into `packet_list`. + // Returns the number of samples that the packets in the list will produce, or + // -1 in case of an error. + int ExtractPackets(size_t required_samples, PacketList* packet_list) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Resets various variables and objects to new values based on the sample rate + // `fs_hz` and `channels` number audio channels. + void SetSampleRateAndChannels(int fs_hz, size_t channels) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Returns the output type for the audio produced by the latest call to + // GetAudio(). + OutputType LastOutputType() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Updates Expand and Merge. + virtual void UpdatePlcComponents(int fs_hz, size_t channels) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + NetEqNetworkStatistics CurrentNetworkStatisticsInternal() const + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + Clock* const clock_; + + mutable Mutex mutex_; + const std::unique_ptr<TickTimer> tick_timer_ RTC_GUARDED_BY(mutex_); + const std::unique_ptr<DecoderDatabase> decoder_database_ + RTC_GUARDED_BY(mutex_); + const std::unique_ptr<DtmfBuffer> dtmf_buffer_ RTC_GUARDED_BY(mutex_); + const std::unique_ptr<DtmfToneGenerator> dtmf_tone_generator_ + RTC_GUARDED_BY(mutex_); + const std::unique_ptr<PacketBuffer> packet_buffer_ RTC_GUARDED_BY(mutex_); + const std::unique_ptr<RedPayloadSplitter> red_payload_splitter_ + RTC_GUARDED_BY(mutex_); + const std::unique_ptr<TimestampScaler> timestamp_scaler_ + RTC_GUARDED_BY(mutex_); + const std::unique_ptr<PostDecodeVad> vad_ RTC_GUARDED_BY(mutex_); + const std::unique_ptr<ExpandFactory> expand_factory_ RTC_GUARDED_BY(mutex_); + const std::unique_ptr<AccelerateFactory> accelerate_factory_ + RTC_GUARDED_BY(mutex_); + const std::unique_ptr<PreemptiveExpandFactory> preemptive_expand_factory_ + RTC_GUARDED_BY(mutex_); + const std::unique_ptr<StatisticsCalculator> stats_ RTC_GUARDED_BY(mutex_); + + std::unique_ptr<BackgroundNoise> background_noise_ RTC_GUARDED_BY(mutex_); + std::unique_ptr<NetEqController> controller_ RTC_GUARDED_BY(mutex_); + std::unique_ptr<AudioMultiVector> algorithm_buffer_ RTC_GUARDED_BY(mutex_); + std::unique_ptr<SyncBuffer> sync_buffer_ RTC_GUARDED_BY(mutex_); + std::unique_ptr<Expand> expand_ RTC_GUARDED_BY(mutex_); + std::unique_ptr<Normal> normal_ RTC_GUARDED_BY(mutex_); + std::unique_ptr<Merge> merge_ RTC_GUARDED_BY(mutex_); + std::unique_ptr<Accelerate> accelerate_ RTC_GUARDED_BY(mutex_); + std::unique_ptr<PreemptiveExpand> preemptive_expand_ RTC_GUARDED_BY(mutex_); + RandomVector random_vector_ RTC_GUARDED_BY(mutex_); + std::unique_ptr<ComfortNoise> comfort_noise_ RTC_GUARDED_BY(mutex_); + int fs_hz_ RTC_GUARDED_BY(mutex_); + int fs_mult_ RTC_GUARDED_BY(mutex_); + int last_output_sample_rate_hz_ RTC_GUARDED_BY(mutex_); + size_t output_size_samples_ RTC_GUARDED_BY(mutex_); + size_t decoder_frame_length_ RTC_GUARDED_BY(mutex_); + Mode last_mode_ RTC_GUARDED_BY(mutex_); + Operation last_operation_ RTC_GUARDED_BY(mutex_); + size_t decoded_buffer_length_ RTC_GUARDED_BY(mutex_); + std::unique_ptr<int16_t[]> decoded_buffer_ RTC_GUARDED_BY(mutex_); + uint32_t playout_timestamp_ RTC_GUARDED_BY(mutex_); + bool new_codec_ RTC_GUARDED_BY(mutex_); + uint32_t timestamp_ RTC_GUARDED_BY(mutex_); + bool reset_decoder_ RTC_GUARDED_BY(mutex_); + absl::optional<uint8_t> current_rtp_payload_type_ RTC_GUARDED_BY(mutex_); + absl::optional<uint8_t> current_cng_rtp_payload_type_ RTC_GUARDED_BY(mutex_); + bool first_packet_ RTC_GUARDED_BY(mutex_); + bool enable_fast_accelerate_ RTC_GUARDED_BY(mutex_); + std::unique_ptr<NackTracker> nack_ RTC_GUARDED_BY(mutex_); + bool nack_enabled_ RTC_GUARDED_BY(mutex_); + const bool enable_muted_state_ RTC_GUARDED_BY(mutex_); + AudioFrame::VADActivity last_vad_activity_ RTC_GUARDED_BY(mutex_) = + AudioFrame::kVadPassive; + std::unique_ptr<TickTimer::Stopwatch> generated_noise_stopwatch_ + RTC_GUARDED_BY(mutex_); + std::vector<RtpPacketInfo> last_decoded_packet_infos_ RTC_GUARDED_BY(mutex_); + ExpandUmaLogger expand_uma_logger_ RTC_GUARDED_BY(mutex_); + ExpandUmaLogger speech_expand_uma_logger_ RTC_GUARDED_BY(mutex_); + bool no_time_stretching_ RTC_GUARDED_BY(mutex_); // Only used for test. + rtc::BufferT<int16_t> concealment_audio_ RTC_GUARDED_BY(mutex_); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc new file mode 100644 index 0000000000..ce2be656ef --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc @@ -0,0 +1,1871 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/neteq_impl.h" + +#include <memory> +#include <utility> +#include <vector> + +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/neteq/default_neteq_controller_factory.h" +#include "api/neteq/neteq.h" +#include "api/neteq/neteq_controller.h" +#include "modules/audio_coding/codecs/g711/audio_decoder_pcm.h" +#include "modules/audio_coding/neteq/accelerate.h" +#include "modules/audio_coding/neteq/decision_logic.h" +#include "modules/audio_coding/neteq/default_neteq_factory.h" +#include "modules/audio_coding/neteq/expand.h" +#include "modules/audio_coding/neteq/histogram.h" +#include "modules/audio_coding/neteq/mock/mock_decoder_database.h" +#include "modules/audio_coding/neteq/mock/mock_dtmf_buffer.h" +#include "modules/audio_coding/neteq/mock/mock_dtmf_tone_generator.h" +#include "modules/audio_coding/neteq/mock/mock_neteq_controller.h" +#include "modules/audio_coding/neteq/mock/mock_packet_buffer.h" +#include "modules/audio_coding/neteq/mock/mock_red_payload_splitter.h" +#include "modules/audio_coding/neteq/preemptive_expand.h" +#include "modules/audio_coding/neteq/statistics_calculator.h" +#include "modules/audio_coding/neteq/sync_buffer.h" +#include "modules/audio_coding/neteq/timestamp_scaler.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "system_wrappers/include/clock.h" +#include "test/audio_decoder_proxy_factory.h" +#include "test/function_audio_decoder_factory.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/mock_audio_decoder.h" +#include "test/mock_audio_decoder_factory.h" + +using ::testing::_; +using ::testing::AtLeast; +using ::testing::DoAll; +using ::testing::ElementsAre; +using ::testing::InSequence; +using ::testing::Invoke; +using ::testing::IsEmpty; +using ::testing::IsNull; +using ::testing::Pointee; +using ::testing::Return; +using ::testing::ReturnNull; +using ::testing::SetArgPointee; +using ::testing::SetArrayArgument; +using ::testing::SizeIs; +using ::testing::WithArg; + +namespace webrtc { + +// This function is called when inserting a packet list into the mock packet +// buffer. The purpose is to delete all inserted packets properly, to avoid +// memory leaks in the test. +int DeletePacketsAndReturnOk(PacketList* packet_list) { + packet_list->clear(); + return PacketBuffer::kOK; +} + +class NetEqImplTest : public ::testing::Test { + protected: + NetEqImplTest() : clock_(0) { config_.sample_rate_hz = 8000; } + + void CreateInstance( + const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory) { + ASSERT_TRUE(decoder_factory); + config_.enable_muted_state = enable_muted_state_; + NetEqImpl::Dependencies deps(config_, &clock_, decoder_factory, + DefaultNetEqControllerFactory()); + + // Get a local pointer to NetEq's TickTimer object. + tick_timer_ = deps.tick_timer.get(); + + if (use_mock_decoder_database_) { + std::unique_ptr<MockDecoderDatabase> mock(new MockDecoderDatabase); + mock_decoder_database_ = mock.get(); + EXPECT_CALL(*mock_decoder_database_, GetActiveCngDecoder()) + .WillOnce(ReturnNull()); + deps.decoder_database = std::move(mock); + } + decoder_database_ = deps.decoder_database.get(); + + if (use_mock_dtmf_buffer_) { + std::unique_ptr<MockDtmfBuffer> mock( + new MockDtmfBuffer(config_.sample_rate_hz)); + mock_dtmf_buffer_ = mock.get(); + deps.dtmf_buffer = std::move(mock); + } + dtmf_buffer_ = deps.dtmf_buffer.get(); + + if (use_mock_dtmf_tone_generator_) { + std::unique_ptr<MockDtmfToneGenerator> mock(new MockDtmfToneGenerator); + mock_dtmf_tone_generator_ = mock.get(); + deps.dtmf_tone_generator = std::move(mock); + } + dtmf_tone_generator_ = deps.dtmf_tone_generator.get(); + + if (use_mock_packet_buffer_) { + std::unique_ptr<MockPacketBuffer> mock( + new MockPacketBuffer(config_.max_packets_in_buffer, tick_timer_)); + mock_packet_buffer_ = mock.get(); + deps.packet_buffer = std::move(mock); + } + packet_buffer_ = deps.packet_buffer.get(); + + if (use_mock_neteq_controller_) { + std::unique_ptr<MockNetEqController> mock(new MockNetEqController()); + mock_neteq_controller_ = mock.get(); + deps.neteq_controller = std::move(mock); + } else { + deps.stats = std::make_unique<StatisticsCalculator>(); + NetEqController::Config controller_config; + controller_config.tick_timer = tick_timer_; + controller_config.base_min_delay_ms = config_.min_delay_ms; + controller_config.allow_time_stretching = true; + controller_config.max_packets_in_buffer = config_.max_packets_in_buffer; + controller_config.clock = &clock_; + deps.neteq_controller = + std::make_unique<DecisionLogic>(std::move(controller_config)); + } + neteq_controller_ = deps.neteq_controller.get(); + + if (use_mock_payload_splitter_) { + std::unique_ptr<MockRedPayloadSplitter> mock(new MockRedPayloadSplitter); + mock_payload_splitter_ = mock.get(); + deps.red_payload_splitter = std::move(mock); + } + red_payload_splitter_ = deps.red_payload_splitter.get(); + + deps.timestamp_scaler = std::unique_ptr<TimestampScaler>( + new TimestampScaler(*deps.decoder_database.get())); + + neteq_.reset(new NetEqImpl(config_, std::move(deps))); + ASSERT_TRUE(neteq_ != NULL); + } + + void CreateInstance() { CreateInstance(CreateBuiltinAudioDecoderFactory()); } + + void UseNoMocks() { + ASSERT_TRUE(neteq_ == NULL) << "Must call UseNoMocks before CreateInstance"; + use_mock_decoder_database_ = false; + use_mock_neteq_controller_ = false; + use_mock_dtmf_buffer_ = false; + use_mock_dtmf_tone_generator_ = false; + use_mock_packet_buffer_ = false; + use_mock_payload_splitter_ = false; + } + + virtual ~NetEqImplTest() { + if (use_mock_decoder_database_) { + EXPECT_CALL(*mock_decoder_database_, Die()).Times(1); + } + if (use_mock_neteq_controller_) { + EXPECT_CALL(*mock_neteq_controller_, Die()).Times(1); + } + if (use_mock_dtmf_buffer_) { + EXPECT_CALL(*mock_dtmf_buffer_, Die()).Times(1); + } + if (use_mock_dtmf_tone_generator_) { + EXPECT_CALL(*mock_dtmf_tone_generator_, Die()).Times(1); + } + if (use_mock_packet_buffer_) { + EXPECT_CALL(*mock_packet_buffer_, Die()).Times(1); + } + } + + void TestDtmfPacket(int sample_rate_hz) { + const size_t kPayloadLength = 4; + const uint8_t kPayloadType = 110; + const int kSampleRateHz = 16000; + config_.sample_rate_hz = kSampleRateHz; + UseNoMocks(); + CreateInstance(); + // Event: 2, E bit, Volume: 17, Length: 4336. + uint8_t payload[kPayloadLength] = {0x02, 0x80 + 0x11, 0x10, 0xF0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + EXPECT_TRUE(neteq_->RegisterPayloadType( + kPayloadType, SdpAudioFormat("telephone-event", sample_rate_hz, 1))); + + // Insert first packet. + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + + // Pull audio once. + const size_t kMaxOutputSize = + static_cast<size_t>(10 * kSampleRateHz / 1000); + AudioFrame output; + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + ASSERT_FALSE(muted); + ASSERT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); + + // DTMF packets are immediately consumed by `InsertPacket()` and won't be + // returned by `GetAudio()`. + EXPECT_THAT(output.packet_infos_, IsEmpty()); + + // Verify first 64 samples of actual output. + const std::vector<int16_t> kOutput( + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -1578, -2816, -3460, -3403, -2709, -1594, -363, 671, 1269, 1328, + 908, 202, -513, -964, -955, -431, 504, 1617, 2602, 3164, + 3101, 2364, 1073, -511, -2047, -3198, -3721, -3525, -2688, -1440, + -99, 1015, 1663, 1744, 1319, 588, -171, -680, -747, -315, + 515, 1512, 2378, 2828, 2674, 1877, 568, -986, -2446, -3482, + -3864, -3516, -2534, -1163}); + ASSERT_GE(kMaxOutputSize, kOutput.size()); + EXPECT_TRUE(std::equal(kOutput.begin(), kOutput.end(), output.data())); + } + + std::unique_ptr<NetEqImpl> neteq_; + NetEq::Config config_; + SimulatedClock clock_; + TickTimer* tick_timer_ = nullptr; + MockDecoderDatabase* mock_decoder_database_ = nullptr; + DecoderDatabase* decoder_database_ = nullptr; + bool use_mock_decoder_database_ = true; + MockNetEqController* mock_neteq_controller_ = nullptr; + NetEqController* neteq_controller_ = nullptr; + bool use_mock_neteq_controller_ = true; + MockDtmfBuffer* mock_dtmf_buffer_ = nullptr; + DtmfBuffer* dtmf_buffer_ = nullptr; + bool use_mock_dtmf_buffer_ = true; + MockDtmfToneGenerator* mock_dtmf_tone_generator_ = nullptr; + DtmfToneGenerator* dtmf_tone_generator_ = nullptr; + bool use_mock_dtmf_tone_generator_ = true; + MockPacketBuffer* mock_packet_buffer_ = nullptr; + PacketBuffer* packet_buffer_ = nullptr; + bool use_mock_packet_buffer_ = true; + MockRedPayloadSplitter* mock_payload_splitter_ = nullptr; + RedPayloadSplitter* red_payload_splitter_ = nullptr; + bool use_mock_payload_splitter_ = true; + bool enable_muted_state_ = false; +}; + +// This tests the interface class NetEq. +// TODO(hlundin): Move to separate file? +TEST(NetEq, CreateAndDestroy) { + NetEq::Config config; + SimulatedClock clock(0); + auto decoder_factory = CreateBuiltinAudioDecoderFactory(); + std::unique_ptr<NetEq> neteq = + DefaultNetEqFactory().CreateNetEq(config, decoder_factory, &clock); +} + +TEST_F(NetEqImplTest, RegisterPayloadType) { + CreateInstance(); + constexpr int rtp_payload_type = 0; + const SdpAudioFormat format("pcmu", 8000, 1); + EXPECT_CALL(*mock_decoder_database_, + RegisterPayload(rtp_payload_type, format)); + neteq_->RegisterPayloadType(rtp_payload_type, format); +} + +TEST_F(NetEqImplTest, RemovePayloadType) { + CreateInstance(); + uint8_t rtp_payload_type = 0; + EXPECT_CALL(*mock_decoder_database_, Remove(rtp_payload_type)) + .WillOnce(Return(DecoderDatabase::kDecoderNotFound)); + // Check that kOK is returned when database returns kDecoderNotFound, because + // removing a payload type that was never registered is not an error. + EXPECT_EQ(NetEq::kOK, neteq_->RemovePayloadType(rtp_payload_type)); +} + +TEST_F(NetEqImplTest, RemoveAllPayloadTypes) { + CreateInstance(); + EXPECT_CALL(*mock_decoder_database_, RemoveAll()).WillOnce(Return()); + neteq_->RemoveAllPayloadTypes(); +} + +TEST_F(NetEqImplTest, InsertPacket) { + using ::testing::AllOf; + using ::testing::Field; + CreateInstance(); + const size_t kPayloadLength = 100; + const uint8_t kPayloadType = 0; + const uint16_t kFirstSequenceNumber = 0x1234; + const uint32_t kFirstTimestamp = 0x12345678; + const uint32_t kSsrc = 0x87654321; + uint8_t payload[kPayloadLength] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = kFirstSequenceNumber; + rtp_header.timestamp = kFirstTimestamp; + rtp_header.ssrc = kSsrc; + Packet fake_packet; + fake_packet.payload_type = kPayloadType; + fake_packet.sequence_number = kFirstSequenceNumber; + fake_packet.timestamp = kFirstTimestamp; + + auto mock_decoder_factory = rtc::make_ref_counted<MockAudioDecoderFactory>(); + EXPECT_CALL(*mock_decoder_factory, MakeAudioDecoderMock(_, _, _)) + .WillOnce(Invoke([&](const SdpAudioFormat& format, + absl::optional<AudioCodecPairId> codec_pair_id, + std::unique_ptr<AudioDecoder>* dec) { + EXPECT_EQ("pcmu", format.name); + + std::unique_ptr<MockAudioDecoder> mock_decoder(new MockAudioDecoder); + EXPECT_CALL(*mock_decoder, Channels()).WillRepeatedly(Return(1)); + EXPECT_CALL(*mock_decoder, SampleRateHz()).WillRepeatedly(Return(8000)); + EXPECT_CALL(*mock_decoder, Die()).Times(1); // Called when deleted. + + *dec = std::move(mock_decoder); + })); + DecoderDatabase::DecoderInfo info(SdpAudioFormat("pcmu", 8000, 1), + absl::nullopt, mock_decoder_factory.get()); + + // Expectations for decoder database. + EXPECT_CALL(*mock_decoder_database_, GetDecoderInfo(kPayloadType)) + .WillRepeatedly(Return(&info)); + + // Expectations for packet buffer. + EXPECT_CALL(*mock_packet_buffer_, Empty()) + .WillOnce(Return(false)); // Called once after first packet is inserted. + EXPECT_CALL(*mock_packet_buffer_, Flush(_)).Times(1); + EXPECT_CALL(*mock_packet_buffer_, InsertPacketList(_, _, _, _, _, _, _, _)) + .Times(2) + .WillRepeatedly(DoAll(SetArgPointee<2>(kPayloadType), + WithArg<0>(Invoke(DeletePacketsAndReturnOk)))); + // SetArgPointee<2>(kPayloadType) means that the third argument (zero-based + // index) is a pointer, and the variable pointed to is set to kPayloadType. + // Also invoke the function DeletePacketsAndReturnOk to properly delete all + // packets in the list (to avoid memory leaks in the test). + EXPECT_CALL(*mock_packet_buffer_, PeekNextPacket()) + .Times(1) + .WillOnce(Return(&fake_packet)); + + // Expectations for DTMF buffer. + EXPECT_CALL(*mock_dtmf_buffer_, Flush()).Times(1); + + // Expectations for delay manager. + { + // All expectations within this block must be called in this specific order. + InSequence sequence; // Dummy variable. + // Expectations when the first packet is inserted. + EXPECT_CALL( + *mock_neteq_controller_, + PacketArrived( + /*fs_hz*/ 8000, + /*should_update_stats*/ _, + /*info*/ + AllOf( + Field(&NetEqController::PacketArrivedInfo::is_cng_or_dtmf, + false), + Field(&NetEqController::PacketArrivedInfo::main_sequence_number, + kFirstSequenceNumber), + Field(&NetEqController::PacketArrivedInfo::main_timestamp, + kFirstTimestamp)))); + EXPECT_CALL( + *mock_neteq_controller_, + PacketArrived( + /*fs_hz*/ 8000, + /*should_update_stats*/ _, + /*info*/ + AllOf( + Field(&NetEqController::PacketArrivedInfo::is_cng_or_dtmf, + false), + Field(&NetEqController::PacketArrivedInfo::main_sequence_number, + kFirstSequenceNumber + 1), + Field(&NetEqController::PacketArrivedInfo::main_timestamp, + kFirstTimestamp + 160)))); + } + + // Insert first packet. + neteq_->InsertPacket(rtp_header, payload); + + // Insert second packet. + rtp_header.timestamp += 160; + rtp_header.sequenceNumber += 1; + neteq_->InsertPacket(rtp_header, payload); +} + +TEST_F(NetEqImplTest, InsertPacketsUntilBufferIsFull) { + UseNoMocks(); + CreateInstance(); + + const int kPayloadLengthSamples = 80; + const size_t kPayloadLengthBytes = 2 * kPayloadLengthSamples; // PCM 16-bit. + const uint8_t kPayloadType = 17; // Just an arbitrary number. + uint8_t payload[kPayloadLengthBytes] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, + SdpAudioFormat("l16", 8000, 1))); + + // Insert packets. The buffer should not flush. + for (size_t i = 1; i <= config_.max_packets_in_buffer; ++i) { + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + rtp_header.timestamp += kPayloadLengthSamples; + rtp_header.sequenceNumber += 1; + EXPECT_EQ(i, packet_buffer_->NumPacketsInBuffer()); + } + + // Insert one more packet and make sure the buffer got flushed. That is, it + // should only hold one single packet. + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + EXPECT_EQ(1u, packet_buffer_->NumPacketsInBuffer()); + const Packet* test_packet = packet_buffer_->PeekNextPacket(); + EXPECT_EQ(rtp_header.timestamp, test_packet->timestamp); + EXPECT_EQ(rtp_header.sequenceNumber, test_packet->sequence_number); +} + +TEST_F(NetEqImplTest, TestDtmfPacketAVT) { + TestDtmfPacket(8000); +} + +TEST_F(NetEqImplTest, TestDtmfPacketAVT16kHz) { + TestDtmfPacket(16000); +} + +TEST_F(NetEqImplTest, TestDtmfPacketAVT32kHz) { + TestDtmfPacket(32000); +} + +TEST_F(NetEqImplTest, TestDtmfPacketAVT48kHz) { + TestDtmfPacket(48000); +} + +// This test verifies that timestamps propagate from the incoming packets +// through to the sync buffer and to the playout timestamp. +TEST_F(NetEqImplTest, VerifyTimestampPropagation) { + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const int kSampleRateHz = 8000; + const size_t kPayloadLengthSamples = + static_cast<size_t>(10 * kSampleRateHz / 1000); // 10 ms. + const size_t kPayloadLengthBytes = kPayloadLengthSamples; + uint8_t payload[kPayloadLengthBytes] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + rtp_header.numCSRCs = 3; + rtp_header.arrOfCSRCs[0] = 43; + rtp_header.arrOfCSRCs[1] = 65; + rtp_header.arrOfCSRCs[2] = 17; + + // This is a dummy decoder that produces as many output samples as the input + // has bytes. The output is an increasing series, starting at 1 for the first + // sample, and then increasing by 1 for each sample. + class CountingSamplesDecoder : public AudioDecoder { + public: + CountingSamplesDecoder() : next_value_(1) {} + + // Produce as many samples as input bytes (`encoded_len`). + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int /* sample_rate_hz */, + int16_t* decoded, + SpeechType* speech_type) override { + for (size_t i = 0; i < encoded_len; ++i) { + decoded[i] = next_value_++; + } + *speech_type = kSpeech; + return rtc::checked_cast<int>(encoded_len); + } + + void Reset() override { next_value_ = 1; } + + int SampleRateHz() const override { return kSampleRateHz; } + + size_t Channels() const override { return 1; } + + uint16_t next_value() const { return next_value_; } + + private: + int16_t next_value_; + } decoder_; + + auto decoder_factory = + rtc::make_ref_counted<test::AudioDecoderProxyFactory>(&decoder_); + + UseNoMocks(); + CreateInstance(decoder_factory); + + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, + SdpAudioFormat("L16", 8000, 1))); + + // Insert one packet. + clock_.AdvanceTimeMilliseconds(123456); + Timestamp expected_receive_time = clock_.CurrentTime(); + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + + // Pull audio once. + const size_t kMaxOutputSize = static_cast<size_t>(10 * kSampleRateHz / 1000); + AudioFrame output; + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + ASSERT_FALSE(muted); + ASSERT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); + + // Verify `output.packet_infos_`. + ASSERT_THAT(output.packet_infos_, SizeIs(1)); + { + const auto& packet_info = output.packet_infos_[0]; + EXPECT_EQ(packet_info.ssrc(), rtp_header.ssrc); + EXPECT_THAT(packet_info.csrcs(), ElementsAre(43, 65, 17)); + EXPECT_EQ(packet_info.rtp_timestamp(), rtp_header.timestamp); + EXPECT_FALSE(packet_info.audio_level().has_value()); + EXPECT_EQ(packet_info.receive_time(), expected_receive_time); + } + + // Start with a simple check that the fake decoder is behaving as expected. + EXPECT_EQ(kPayloadLengthSamples, + static_cast<size_t>(decoder_.next_value() - 1)); + + // The value of the last of the output samples is the same as the number of + // samples played from the decoded packet. Thus, this number + the RTP + // timestamp should match the playout timestamp. + // Wrap the expected value in an absl::optional to compare them as such. + EXPECT_EQ( + absl::optional<uint32_t>(rtp_header.timestamp + + output.data()[output.samples_per_channel_ - 1]), + neteq_->GetPlayoutTimestamp()); + + // Check the timestamp for the last value in the sync buffer. This should + // be one full frame length ahead of the RTP timestamp. + const SyncBuffer* sync_buffer = neteq_->sync_buffer_for_test(); + ASSERT_TRUE(sync_buffer != NULL); + EXPECT_EQ(rtp_header.timestamp + kPayloadLengthSamples, + sync_buffer->end_timestamp()); + + // Check that the number of samples still to play from the sync buffer add + // up with what was already played out. + EXPECT_EQ( + kPayloadLengthSamples - output.data()[output.samples_per_channel_ - 1], + sync_buffer->FutureLength()); +} + +TEST_F(NetEqImplTest, ReorderedPacket) { + UseNoMocks(); + + // Create a mock decoder object. + MockAudioDecoder mock_decoder; + + CreateInstance( + rtc::make_ref_counted<test::AudioDecoderProxyFactory>(&mock_decoder)); + + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const int kSampleRateHz = 8000; + const size_t kPayloadLengthSamples = + static_cast<size_t>(10 * kSampleRateHz / 1000); // 10 ms. + const size_t kPayloadLengthBytes = kPayloadLengthSamples; + uint8_t payload[kPayloadLengthBytes] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + rtp_header.extension.hasAudioLevel = true; + rtp_header.extension.audioLevel = 42; + + EXPECT_CALL(mock_decoder, Reset()).WillRepeatedly(Return()); + EXPECT_CALL(mock_decoder, SampleRateHz()) + .WillRepeatedly(Return(kSampleRateHz)); + EXPECT_CALL(mock_decoder, Channels()).WillRepeatedly(Return(1)); + EXPECT_CALL(mock_decoder, PacketDuration(_, kPayloadLengthBytes)) + .WillRepeatedly(Return(rtc::checked_cast<int>(kPayloadLengthSamples))); + int16_t dummy_output[kPayloadLengthSamples] = {0}; + // The below expectation will make the mock decoder write + // `kPayloadLengthSamples` zeros to the output array, and mark it as speech. + EXPECT_CALL(mock_decoder, DecodeInternal(Pointee(0), kPayloadLengthBytes, + kSampleRateHz, _, _)) + .WillOnce(DoAll(SetArrayArgument<3>(dummy_output, + dummy_output + kPayloadLengthSamples), + SetArgPointee<4>(AudioDecoder::kSpeech), + Return(rtc::checked_cast<int>(kPayloadLengthSamples)))); + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, + SdpAudioFormat("L16", 8000, 1))); + + // Insert one packet. + clock_.AdvanceTimeMilliseconds(123456); + Timestamp expected_receive_time = clock_.CurrentTime(); + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + + // Pull audio once. + const size_t kMaxOutputSize = static_cast<size_t>(10 * kSampleRateHz / 1000); + AudioFrame output; + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + ASSERT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); + + // Verify `output.packet_infos_`. + ASSERT_THAT(output.packet_infos_, SizeIs(1)); + { + const auto& packet_info = output.packet_infos_[0]; + EXPECT_EQ(packet_info.ssrc(), rtp_header.ssrc); + EXPECT_THAT(packet_info.csrcs(), IsEmpty()); + EXPECT_EQ(packet_info.rtp_timestamp(), rtp_header.timestamp); + EXPECT_EQ(packet_info.audio_level(), rtp_header.extension.audioLevel); + EXPECT_EQ(packet_info.receive_time(), expected_receive_time); + } + + // Insert two more packets. The first one is out of order, and is already too + // old, the second one is the expected next packet. + rtp_header.sequenceNumber -= 1; + rtp_header.timestamp -= kPayloadLengthSamples; + rtp_header.extension.audioLevel = 1; + payload[0] = 1; + clock_.AdvanceTimeMilliseconds(1000); + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + rtp_header.sequenceNumber += 2; + rtp_header.timestamp += 2 * kPayloadLengthSamples; + rtp_header.extension.audioLevel = 2; + payload[0] = 2; + clock_.AdvanceTimeMilliseconds(2000); + expected_receive_time = clock_.CurrentTime(); + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + + // Expect only the second packet to be decoded (the one with "2" as the first + // payload byte). + EXPECT_CALL(mock_decoder, DecodeInternal(Pointee(2), kPayloadLengthBytes, + kSampleRateHz, _, _)) + .WillOnce(DoAll(SetArrayArgument<3>(dummy_output, + dummy_output + kPayloadLengthSamples), + SetArgPointee<4>(AudioDecoder::kSpeech), + Return(rtc::checked_cast<int>(kPayloadLengthSamples)))); + + // Pull audio once. + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + ASSERT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); + + // Now check the packet buffer, and make sure it is empty, since the + // out-of-order packet should have been discarded. + EXPECT_TRUE(packet_buffer_->Empty()); + + // NetEq `packets_discarded` should capture this packet discard. + EXPECT_EQ(1u, neteq_->GetLifetimeStatistics().packets_discarded); + + // Verify `output.packet_infos_`. Expect to only see the second packet. + ASSERT_THAT(output.packet_infos_, SizeIs(1)); + { + const auto& packet_info = output.packet_infos_[0]; + EXPECT_EQ(packet_info.ssrc(), rtp_header.ssrc); + EXPECT_THAT(packet_info.csrcs(), IsEmpty()); + EXPECT_EQ(packet_info.rtp_timestamp(), rtp_header.timestamp); + EXPECT_EQ(packet_info.audio_level(), rtp_header.extension.audioLevel); + EXPECT_EQ(packet_info.receive_time(), expected_receive_time); + } + + EXPECT_CALL(mock_decoder, Die()); +} + +// This test verifies that NetEq can handle the situation where the first +// incoming packet is rejected. +TEST_F(NetEqImplTest, FirstPacketUnknown) { + UseNoMocks(); + CreateInstance(); + + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const int kSampleRateHz = 8000; + const size_t kPayloadLengthSamples = + static_cast<size_t>(10 * kSampleRateHz / 1000); // 10 ms. + const size_t kPayloadLengthBytes = kPayloadLengthSamples * 2; + uint8_t payload[kPayloadLengthBytes] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + // Insert one packet. Note that we have not registered any payload type, so + // this packet will be rejected. + EXPECT_EQ(NetEq::kFail, neteq_->InsertPacket(rtp_header, payload)); + + // Pull audio once. + const size_t kMaxOutputSize = static_cast<size_t>(10 * kSampleRateHz / 1000); + AudioFrame output; + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + ASSERT_LE(output.samples_per_channel_, kMaxOutputSize); + EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kPLC, output.speech_type_); + EXPECT_THAT(output.packet_infos_, IsEmpty()); + + // Register the payload type. + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, + SdpAudioFormat("l16", 8000, 1))); + + // Insert 10 packets. + for (size_t i = 0; i < 10; ++i) { + rtp_header.sequenceNumber++; + rtp_header.timestamp += kPayloadLengthSamples; + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + EXPECT_EQ(i + 1, packet_buffer_->NumPacketsInBuffer()); + } + + // Pull audio repeatedly and make sure we get normal output, that is not PLC. + for (size_t i = 0; i < 3; ++i) { + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + ASSERT_LE(output.samples_per_channel_, kMaxOutputSize); + EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_) + << "NetEq did not decode the packets as expected."; + EXPECT_THAT(output.packet_infos_, SizeIs(1)); + } +} + +// This test verifies that audio interruption is not logged for the initial +// PLC period before the first packet is deocoded. +// TODO(henrik.lundin) Maybe move this test to neteq_network_stats_unittest.cc. +// Make the test parametrized, so that we can test with different initial +// sample rates in NetEq. +class NetEqImplTestSampleRateParameter + : public NetEqImplTest, + public testing::WithParamInterface<int> { + protected: + NetEqImplTestSampleRateParameter() + : NetEqImplTest(), initial_sample_rate_hz_(GetParam()) { + config_.sample_rate_hz = initial_sample_rate_hz_; + } + + const int initial_sample_rate_hz_; +}; + +class NetEqImplTestSdpFormatParameter + : public NetEqImplTest, + public testing::WithParamInterface<SdpAudioFormat> { + protected: + NetEqImplTestSdpFormatParameter() + : NetEqImplTest(), sdp_format_(GetParam()) {} + const SdpAudioFormat sdp_format_; +}; + +// This test does the following: +// 0. Set up NetEq with initial sample rate given by test parameter, and a codec +// sample rate of 16000. +// 1. Start calling GetAudio before inserting any encoded audio. The audio +// produced will be PLC. +// 2. Insert a number of encoded audio packets. +// 3. Keep calling GetAudio and verify that no audio interruption was logged. +// Call GetAudio until NetEq runs out of data again; PLC starts. +// 4. Insert one more packet. +// 5. Call GetAudio until that packet is decoded and the PLC ends. + +TEST_P(NetEqImplTestSampleRateParameter, + NoAudioInterruptionLoggedBeforeFirstDecode) { + UseNoMocks(); + CreateInstance(); + + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const int kPayloadSampleRateHz = 16000; + const size_t kPayloadLengthSamples = + static_cast<size_t>(10 * kPayloadSampleRateHz / 1000); // 10 ms. + const size_t kPayloadLengthBytes = kPayloadLengthSamples * 2; + uint8_t payload[kPayloadLengthBytes] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + // Register the payload type. + EXPECT_TRUE(neteq_->RegisterPayloadType( + kPayloadType, SdpAudioFormat("l16", kPayloadSampleRateHz, 1))); + + // Pull audio several times. No packets have been inserted yet. + const size_t initial_output_size = + static_cast<size_t>(10 * initial_sample_rate_hz_ / 1000); // 10 ms + AudioFrame output; + bool muted; + for (int i = 0; i < 100; ++i) { + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + EXPECT_EQ(initial_output_size, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_NE(AudioFrame::kNormalSpeech, output.speech_type_); + EXPECT_THAT(output.packet_infos_, IsEmpty()); + } + + // Lambda for inserting packets. + auto insert_packet = [&]() { + rtp_header.sequenceNumber++; + rtp_header.timestamp += kPayloadLengthSamples; + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + }; + // Insert 10 packets. + for (size_t i = 0; i < 10; ++i) { + insert_packet(); + EXPECT_EQ(i + 1, packet_buffer_->NumPacketsInBuffer()); + } + + // Pull audio repeatedly and make sure we get normal output, that is not PLC. + constexpr size_t kOutputSize = + static_cast<size_t>(10 * kPayloadSampleRateHz / 1000); // 10 ms + for (size_t i = 0; i < 3; ++i) { + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + EXPECT_EQ(kOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_) + << "NetEq did not decode the packets as expected."; + EXPECT_THAT(output.packet_infos_, SizeIs(1)); + } + + // Verify that no interruption was logged. + auto lifetime_stats = neteq_->GetLifetimeStatistics(); + EXPECT_EQ(0, lifetime_stats.interruption_count); + + // Keep pulling audio data until a new PLC period is started. + size_t count_loops = 0; + while (output.speech_type_ == AudioFrame::kNormalSpeech) { + // Make sure we don't hang the test if we never go to PLC. + ASSERT_LT(++count_loops, 100u); + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + } + + // Insert a few packets to avoid postpone decoding after expand. + for (size_t i = 0; i < 5; ++i) { + insert_packet(); + } + + // Pull audio until the newly inserted packet is decoded and the PLC ends. + while (output.speech_type_ != AudioFrame::kNormalSpeech) { + // Make sure we don't hang the test if we never go to PLC. + ASSERT_LT(++count_loops, 100u); + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + } + + // Verify that no interruption was logged. + lifetime_stats = neteq_->GetLifetimeStatistics(); + EXPECT_EQ(0, lifetime_stats.interruption_count); +} + +// This test does the following: +// 0. Set up NetEq with initial sample rate given by test parameter, and a codec +// sample rate of 16000. +// 1. Insert a number of encoded audio packets. +// 2. Call GetAudio and verify that decoded audio is produced. +// 3. Keep calling GetAudio until NetEq runs out of data; PLC starts. +// 4. Keep calling GetAudio until PLC has been produced for at least 150 ms. +// 5. Insert one more packet. +// 6. Call GetAudio until that packet is decoded and the PLC ends. +// 7. Verify that an interruption was logged. + +TEST_P(NetEqImplTestSampleRateParameter, AudioInterruptionLogged) { + UseNoMocks(); + CreateInstance(); + + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const int kPayloadSampleRateHz = 16000; + const size_t kPayloadLengthSamples = + static_cast<size_t>(10 * kPayloadSampleRateHz / 1000); // 10 ms. + const size_t kPayloadLengthBytes = kPayloadLengthSamples * 2; + uint8_t payload[kPayloadLengthBytes] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + // Register the payload type. + EXPECT_TRUE(neteq_->RegisterPayloadType( + kPayloadType, SdpAudioFormat("l16", kPayloadSampleRateHz, 1))); + + // Lambda for inserting packets. + auto insert_packet = [&]() { + rtp_header.sequenceNumber++; + rtp_header.timestamp += kPayloadLengthSamples; + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + }; + // Insert 10 packets. + for (size_t i = 0; i < 10; ++i) { + insert_packet(); + EXPECT_EQ(i + 1, packet_buffer_->NumPacketsInBuffer()); + } + + AudioFrame output; + bool muted; + // Keep pulling audio data until a new PLC period is started. + size_t count_loops = 0; + do { + // Make sure we don't hang the test if we never go to PLC. + ASSERT_LT(++count_loops, 100u); + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + } while (output.speech_type_ == AudioFrame::kNormalSpeech); + + // Pull audio 15 times, which produces 150 ms of output audio. This should + // all be produced as PLC. The total length of the gap will then be 150 ms + // plus an initial fraction of 10 ms at the start and the end of the PLC + // period. In total, less than 170 ms. + for (size_t i = 0; i < 15; ++i) { + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + EXPECT_NE(AudioFrame::kNormalSpeech, output.speech_type_); + } + + // Insert a few packets to avoid postpone decoding after expand. + for (size_t i = 0; i < 5; ++i) { + insert_packet(); + } + + // Pull audio until the newly inserted packet is decoded and the PLC ends. + while (output.speech_type_ != AudioFrame::kNormalSpeech) { + // Make sure we don't hang the test if we never go to PLC. + ASSERT_LT(++count_loops, 100u); + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + } + + // Verify that the interruption was logged. + auto lifetime_stats = neteq_->GetLifetimeStatistics(); + EXPECT_EQ(1, lifetime_stats.interruption_count); + EXPECT_GT(lifetime_stats.total_interruption_duration_ms, 150); + EXPECT_LT(lifetime_stats.total_interruption_duration_ms, 170); +} + +INSTANTIATE_TEST_SUITE_P(SampleRates, + NetEqImplTestSampleRateParameter, + testing::Values(8000, 16000, 32000, 48000)); + +TEST_P(NetEqImplTestSdpFormatParameter, GetNackListScaledTimestamp) { + UseNoMocks(); + CreateInstance(); + + neteq_->EnableNack(128); + + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const int kPayloadSampleRateHz = sdp_format_.clockrate_hz; + const size_t kPayloadLengthSamples = + static_cast<size_t>(10 * kPayloadSampleRateHz / 1000); // 10 ms. + const size_t kPayloadLengthBytes = kPayloadLengthSamples * 2; + std::vector<uint8_t> payload(kPayloadLengthBytes, 0); + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, sdp_format_)); + + auto insert_packet = [&](bool lost = false) { + rtp_header.sequenceNumber++; + rtp_header.timestamp += kPayloadLengthSamples; + if (!lost) + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + }; + + // Insert and decode 10 packets. + for (size_t i = 0; i < 10; ++i) { + insert_packet(); + } + AudioFrame output; + size_t count_loops = 0; + do { + bool muted; + // Make sure we don't hang the test if we never go to PLC. + ASSERT_LT(++count_loops, 100u); + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + } while (output.speech_type_ == AudioFrame::kNormalSpeech); + + insert_packet(); + + insert_packet(/*lost=*/true); + + // Ensure packet gets marked as missing. + for (int i = 0; i < 5; ++i) { + insert_packet(); + } + + // Missing packet recoverable with 5ms RTT. + EXPECT_THAT(neteq_->GetNackList(5), Not(IsEmpty())); + + // No packets should have TimeToPlay > 500ms. + EXPECT_THAT(neteq_->GetNackList(500), IsEmpty()); +} + +INSTANTIATE_TEST_SUITE_P(GetNackList, + NetEqImplTestSdpFormatParameter, + testing::Values(SdpAudioFormat("g722", 8000, 1), + SdpAudioFormat("opus", 48000, 2))); + +// This test verifies that NetEq can handle comfort noise and enters/quits codec +// internal CNG mode properly. +TEST_F(NetEqImplTest, CodecInternalCng) { + UseNoMocks(); + // Create a mock decoder object. + MockAudioDecoder mock_decoder; + CreateInstance( + rtc::make_ref_counted<test::AudioDecoderProxyFactory>(&mock_decoder)); + + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const int kSampleRateKhz = 48; + const size_t kPayloadLengthSamples = + static_cast<size_t>(20 * kSampleRateKhz); // 20 ms. + const size_t kPayloadLengthBytes = 10; + uint8_t payload[kPayloadLengthBytes] = {0}; + int16_t dummy_output[kPayloadLengthSamples] = {0}; + + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + EXPECT_CALL(mock_decoder, Reset()).WillRepeatedly(Return()); + EXPECT_CALL(mock_decoder, SampleRateHz()) + .WillRepeatedly(Return(kSampleRateKhz * 1000)); + EXPECT_CALL(mock_decoder, Channels()).WillRepeatedly(Return(1)); + EXPECT_CALL(mock_decoder, PacketDuration(_, kPayloadLengthBytes)) + .WillRepeatedly(Return(rtc::checked_cast<int>(kPayloadLengthSamples))); + // Packed duration when asking the decoder for more CNG data (without a new + // packet). + EXPECT_CALL(mock_decoder, PacketDuration(nullptr, 0)) + .WillRepeatedly(Return(rtc::checked_cast<int>(kPayloadLengthSamples))); + + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, + SdpAudioFormat("opus", 48000, 2))); + + struct Packet { + int sequence_number_delta; + int timestamp_delta; + AudioDecoder::SpeechType decoder_output_type; + }; + std::vector<Packet> packets = { + {0, 0, AudioDecoder::kSpeech}, + {1, kPayloadLengthSamples, AudioDecoder::kComfortNoise}, + {2, 2 * kPayloadLengthSamples, AudioDecoder::kSpeech}, + {1, kPayloadLengthSamples, AudioDecoder::kSpeech}}; + + for (size_t i = 0; i < packets.size(); ++i) { + rtp_header.sequenceNumber += packets[i].sequence_number_delta; + rtp_header.timestamp += packets[i].timestamp_delta; + payload[0] = i; + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + + // Pointee(x) verifies that first byte of the payload equals x, this makes + // it possible to verify that the correct payload is fed to Decode(). + EXPECT_CALL(mock_decoder, DecodeInternal(Pointee(i), kPayloadLengthBytes, + kSampleRateKhz * 1000, _, _)) + .WillOnce(DoAll(SetArrayArgument<3>( + dummy_output, dummy_output + kPayloadLengthSamples), + SetArgPointee<4>(packets[i].decoder_output_type), + Return(rtc::checked_cast<int>(kPayloadLengthSamples)))); + } + + // Expect comfort noise to be returned by the decoder. + EXPECT_CALL(mock_decoder, + DecodeInternal(IsNull(), 0, kSampleRateKhz * 1000, _, _)) + .WillOnce(DoAll(SetArrayArgument<3>(dummy_output, + dummy_output + kPayloadLengthSamples), + SetArgPointee<4>(AudioDecoder::kComfortNoise), + Return(rtc::checked_cast<int>(kPayloadLengthSamples)))); + + std::vector<AudioFrame::SpeechType> expected_output = { + AudioFrame::kNormalSpeech, AudioFrame::kCNG, AudioFrame::kNormalSpeech}; + size_t output_index = 0; + + int timeout_counter = 0; + while (!packet_buffer_->Empty()) { + ASSERT_LT(timeout_counter++, 20) << "Test timed out"; + AudioFrame output; + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + if (output_index + 1 < expected_output.size() && + output.speech_type_ == expected_output[output_index + 1]) { + ++output_index; + } else { + EXPECT_EQ(output.speech_type_, expected_output[output_index]); + } + } + + EXPECT_CALL(mock_decoder, Die()); +} + +TEST_F(NetEqImplTest, UnsupportedDecoder) { + UseNoMocks(); + ::testing::NiceMock<MockAudioDecoder> decoder; + + CreateInstance( + rtc::make_ref_counted<test::AudioDecoderProxyFactory>(&decoder)); + static const size_t kNetEqMaxFrameSize = 5760; // 120 ms @ 48 kHz. + static const size_t kChannels = 2; + + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const int kSampleRateHz = 8000; + + const size_t kPayloadLengthSamples = + static_cast<size_t>(10 * kSampleRateHz / 1000); // 10 ms. + const size_t kPayloadLengthBytes = 1; + uint8_t payload[kPayloadLengthBytes] = {0}; + int16_t dummy_output[kPayloadLengthSamples * kChannels] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + const uint8_t kFirstPayloadValue = 1; + const uint8_t kSecondPayloadValue = 2; + + EXPECT_CALL(decoder, + PacketDuration(Pointee(kFirstPayloadValue), kPayloadLengthBytes)) + .Times(AtLeast(1)) + .WillRepeatedly(Return(rtc::checked_cast<int>(kNetEqMaxFrameSize + 1))); + + EXPECT_CALL(decoder, DecodeInternal(Pointee(kFirstPayloadValue), _, _, _, _)) + .Times(0); + + EXPECT_CALL(decoder, DecodeInternal(Pointee(kSecondPayloadValue), + kPayloadLengthBytes, kSampleRateHz, _, _)) + .Times(1) + .WillOnce(DoAll( + SetArrayArgument<3>(dummy_output, + dummy_output + kPayloadLengthSamples * kChannels), + SetArgPointee<4>(AudioDecoder::kSpeech), + Return(static_cast<int>(kPayloadLengthSamples * kChannels)))); + + EXPECT_CALL(decoder, + PacketDuration(Pointee(kSecondPayloadValue), kPayloadLengthBytes)) + .Times(AtLeast(1)) + .WillRepeatedly(Return(rtc::checked_cast<int>(kNetEqMaxFrameSize))); + + EXPECT_CALL(decoder, SampleRateHz()).WillRepeatedly(Return(kSampleRateHz)); + + EXPECT_CALL(decoder, Channels()).WillRepeatedly(Return(kChannels)); + + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, + SdpAudioFormat("L16", 8000, 1))); + + // Insert one packet. + payload[0] = kFirstPayloadValue; // This will make Decode() fail. + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + + // Insert another packet. + payload[0] = kSecondPayloadValue; // This will make Decode() successful. + rtp_header.sequenceNumber++; + // The second timestamp needs to be at least 30 ms after the first to make + // the second packet get decoded. + rtp_header.timestamp += 3 * kPayloadLengthSamples; + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + + AudioFrame output; + bool muted; + // First call to GetAudio will try to decode the "faulty" packet. + // Expect kFail return value. + EXPECT_EQ(NetEq::kFail, neteq_->GetAudio(&output, &muted)); + // Output size and number of channels should be correct. + const size_t kExpectedOutputSize = 10 * (kSampleRateHz / 1000) * kChannels; + EXPECT_EQ(kExpectedOutputSize, output.samples_per_channel_ * kChannels); + EXPECT_EQ(kChannels, output.num_channels_); + EXPECT_THAT(output.packet_infos_, IsEmpty()); + + // Second call to GetAudio will decode the packet that is ok. No errors are + // expected. + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + EXPECT_EQ(kExpectedOutputSize, output.samples_per_channel_ * kChannels); + EXPECT_EQ(kChannels, output.num_channels_); + EXPECT_THAT(output.packet_infos_, SizeIs(1)); + + // Die isn't called through NiceMock (since it's called by the + // MockAudioDecoder constructor), so it needs to be mocked explicitly. + EXPECT_CALL(decoder, Die()); +} + +// This test inserts packets until the buffer is flushed. After that, it asks +// NetEq for the network statistics. The purpose of the test is to make sure +// that even though the buffer size increment is negative (which it becomes when +// the packet causing a flush is inserted), the packet length stored in the +// decision logic remains valid. +TEST_F(NetEqImplTest, FloodBufferAndGetNetworkStats) { + UseNoMocks(); + CreateInstance(); + + const size_t kPayloadLengthSamples = 80; + const size_t kPayloadLengthBytes = 2 * kPayloadLengthSamples; // PCM 16-bit. + const uint8_t kPayloadType = 17; // Just an arbitrary number. + uint8_t payload[kPayloadLengthBytes] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, + SdpAudioFormat("l16", 8000, 1))); + + // Insert packets until the buffer flushes. + for (size_t i = 0; i <= config_.max_packets_in_buffer; ++i) { + EXPECT_EQ(i, packet_buffer_->NumPacketsInBuffer()); + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + rtp_header.timestamp += rtc::checked_cast<uint32_t>(kPayloadLengthSamples); + ++rtp_header.sequenceNumber; + } + EXPECT_EQ(1u, packet_buffer_->NumPacketsInBuffer()); + + // Ask for network statistics. This should not crash. + NetEqNetworkStatistics stats; + EXPECT_EQ(NetEq::kOK, neteq_->NetworkStatistics(&stats)); +} + +TEST_F(NetEqImplTest, DecodedPayloadTooShort) { + UseNoMocks(); + // Create a mock decoder object. + MockAudioDecoder mock_decoder; + + CreateInstance( + rtc::make_ref_counted<test::AudioDecoderProxyFactory>(&mock_decoder)); + + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const int kSampleRateHz = 8000; + const size_t kPayloadLengthSamples = + static_cast<size_t>(10 * kSampleRateHz / 1000); // 10 ms. + const size_t kPayloadLengthBytes = 2 * kPayloadLengthSamples; + uint8_t payload[kPayloadLengthBytes] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + EXPECT_CALL(mock_decoder, Reset()).WillRepeatedly(Return()); + EXPECT_CALL(mock_decoder, SampleRateHz()) + .WillRepeatedly(Return(kSampleRateHz)); + EXPECT_CALL(mock_decoder, Channels()).WillRepeatedly(Return(1)); + EXPECT_CALL(mock_decoder, PacketDuration(_, _)) + .WillRepeatedly(Return(rtc::checked_cast<int>(kPayloadLengthSamples))); + int16_t dummy_output[kPayloadLengthSamples] = {0}; + // The below expectation will make the mock decoder write + // `kPayloadLengthSamples` - 5 zeros to the output array, and mark it as + // speech. That is, the decoded length is 5 samples shorter than the expected. + EXPECT_CALL(mock_decoder, + DecodeInternal(_, kPayloadLengthBytes, kSampleRateHz, _, _)) + .WillOnce( + DoAll(SetArrayArgument<3>(dummy_output, + dummy_output + kPayloadLengthSamples - 5), + SetArgPointee<4>(AudioDecoder::kSpeech), + Return(rtc::checked_cast<int>(kPayloadLengthSamples - 5)))); + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, + SdpAudioFormat("L16", 8000, 1))); + + // Insert one packet. + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + + EXPECT_EQ(5u, neteq_->sync_buffer_for_test()->FutureLength()); + + // Pull audio once. + const size_t kMaxOutputSize = static_cast<size_t>(10 * kSampleRateHz / 1000); + AudioFrame output; + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + ASSERT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); + EXPECT_THAT(output.packet_infos_, SizeIs(1)); + + EXPECT_CALL(mock_decoder, Die()); +} + +// This test checks the behavior of NetEq when audio decoder fails. +TEST_F(NetEqImplTest, DecodingError) { + UseNoMocks(); + // Create a mock decoder object. + MockAudioDecoder mock_decoder; + + CreateInstance( + rtc::make_ref_counted<test::AudioDecoderProxyFactory>(&mock_decoder)); + + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const int kSampleRateHz = 8000; + const int kDecoderErrorCode = -97; // Any negative number. + + // We let decoder return 5 ms each time, and therefore, 2 packets make 10 ms. + const size_t kFrameLengthSamples = + static_cast<size_t>(5 * kSampleRateHz / 1000); + + const size_t kPayloadLengthBytes = 1; // This can be arbitrary. + + uint8_t payload[kPayloadLengthBytes] = {0}; + + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + EXPECT_CALL(mock_decoder, Reset()).WillRepeatedly(Return()); + EXPECT_CALL(mock_decoder, SampleRateHz()) + .WillRepeatedly(Return(kSampleRateHz)); + EXPECT_CALL(mock_decoder, Channels()).WillRepeatedly(Return(1)); + EXPECT_CALL(mock_decoder, PacketDuration(_, _)) + .WillRepeatedly(Return(rtc::checked_cast<int>(kFrameLengthSamples))); + EXPECT_CALL(mock_decoder, ErrorCode()).WillOnce(Return(kDecoderErrorCode)); + EXPECT_CALL(mock_decoder, HasDecodePlc()).WillOnce(Return(false)); + int16_t dummy_output[kFrameLengthSamples] = {0}; + + { + InSequence sequence; // Dummy variable. + // Mock decoder works normally the first time. + EXPECT_CALL(mock_decoder, + DecodeInternal(_, kPayloadLengthBytes, kSampleRateHz, _, _)) + .Times(3) + .WillRepeatedly( + DoAll(SetArrayArgument<3>(dummy_output, + dummy_output + kFrameLengthSamples), + SetArgPointee<4>(AudioDecoder::kSpeech), + Return(rtc::checked_cast<int>(kFrameLengthSamples)))) + .RetiresOnSaturation(); + + // Then mock decoder fails. A common reason for failure can be buffer being + // too short + EXPECT_CALL(mock_decoder, + DecodeInternal(_, kPayloadLengthBytes, kSampleRateHz, _, _)) + .WillOnce(Return(-1)) + .RetiresOnSaturation(); + + // Mock decoder finally returns to normal. + EXPECT_CALL(mock_decoder, + DecodeInternal(_, kPayloadLengthBytes, kSampleRateHz, _, _)) + .Times(2) + .WillRepeatedly( + DoAll(SetArrayArgument<3>(dummy_output, + dummy_output + kFrameLengthSamples), + SetArgPointee<4>(AudioDecoder::kSpeech), + Return(rtc::checked_cast<int>(kFrameLengthSamples)))); + } + + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, + SdpAudioFormat("L16", 8000, 1))); + + // Insert packets. + for (int i = 0; i < 20; ++i) { + rtp_header.sequenceNumber += 1; + rtp_header.timestamp += kFrameLengthSamples; + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + } + + // Pull audio. + const size_t kMaxOutputSize = static_cast<size_t>(10 * kSampleRateHz / 1000); + AudioFrame output; + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); + EXPECT_THAT(output.packet_infos_, SizeIs(2)); // 5 ms packets vs 10 ms output + + // Pull audio again. Decoder fails. + EXPECT_EQ(NetEq::kFail, neteq_->GetAudio(&output, &muted)); + EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + // We are not expecting anything for output.speech_type_, since an error was + // returned. + + // Pull audio again, should behave normal. + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); + EXPECT_THAT(output.packet_infos_, SizeIs(2)); // 5 ms packets vs 10 ms output + + EXPECT_CALL(mock_decoder, Die()); +} + +// This test checks the behavior of NetEq when audio decoder fails during CNG. +TEST_F(NetEqImplTest, DecodingErrorDuringInternalCng) { + UseNoMocks(); + + // Create a mock decoder object. + MockAudioDecoder mock_decoder; + CreateInstance( + rtc::make_ref_counted<test::AudioDecoderProxyFactory>(&mock_decoder)); + + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const int kSampleRateHz = 8000; + const int kDecoderErrorCode = -97; // Any negative number. + + // We let decoder return 5 ms each time, and therefore, 2 packets make 10 ms. + const size_t kFrameLengthSamples = + static_cast<size_t>(5 * kSampleRateHz / 1000); + + const size_t kPayloadLengthBytes = 1; // This can be arbitrary. + + uint8_t payload[kPayloadLengthBytes] = {0}; + + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + EXPECT_CALL(mock_decoder, Reset()).WillRepeatedly(Return()); + EXPECT_CALL(mock_decoder, SampleRateHz()) + .WillRepeatedly(Return(kSampleRateHz)); + EXPECT_CALL(mock_decoder, Channels()).WillRepeatedly(Return(1)); + EXPECT_CALL(mock_decoder, PacketDuration(_, _)) + .WillRepeatedly(Return(rtc::checked_cast<int>(kFrameLengthSamples))); + EXPECT_CALL(mock_decoder, ErrorCode()).WillOnce(Return(kDecoderErrorCode)); + int16_t dummy_output[kFrameLengthSamples] = {0}; + + { + InSequence sequence; // Dummy variable. + // Mock decoder works normally the first 2 times. + EXPECT_CALL(mock_decoder, + DecodeInternal(_, kPayloadLengthBytes, kSampleRateHz, _, _)) + .Times(2) + .WillRepeatedly( + DoAll(SetArrayArgument<3>(dummy_output, + dummy_output + kFrameLengthSamples), + SetArgPointee<4>(AudioDecoder::kComfortNoise), + Return(rtc::checked_cast<int>(kFrameLengthSamples)))) + .RetiresOnSaturation(); + + // Then mock decoder fails. A common reason for failure can be buffer being + // too short + EXPECT_CALL(mock_decoder, DecodeInternal(nullptr, 0, kSampleRateHz, _, _)) + .WillOnce(Return(-1)) + .RetiresOnSaturation(); + + // Mock decoder finally returns to normal. + EXPECT_CALL(mock_decoder, DecodeInternal(nullptr, 0, kSampleRateHz, _, _)) + .Times(2) + .WillRepeatedly( + DoAll(SetArrayArgument<3>(dummy_output, + dummy_output + kFrameLengthSamples), + SetArgPointee<4>(AudioDecoder::kComfortNoise), + Return(rtc::checked_cast<int>(kFrameLengthSamples)))); + } + + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, + SdpAudioFormat("l16", 8000, 1))); + + // Insert 2 packets. This will make netEq into codec internal CNG mode. + for (int i = 0; i < 2; ++i) { + rtp_header.sequenceNumber += 1; + rtp_header.timestamp += kFrameLengthSamples; + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + } + + // Pull audio. + const size_t kMaxOutputSize = static_cast<size_t>(10 * kSampleRateHz / 1000); + AudioFrame output; + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kCNG, output.speech_type_); + + // Pull audio again. Decoder fails. + EXPECT_EQ(NetEq::kFail, neteq_->GetAudio(&output, &muted)); + EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + // We are not expecting anything for output.speech_type_, since an error was + // returned. + + // Pull audio again, should resume codec CNG. + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_); + EXPECT_EQ(1u, output.num_channels_); + EXPECT_EQ(AudioFrame::kCNG, output.speech_type_); + + EXPECT_CALL(mock_decoder, Die()); +} + +// Tests that the return value from last_output_sample_rate_hz() is equal to the +// configured inital sample rate. +TEST_F(NetEqImplTest, InitialLastOutputSampleRate) { + UseNoMocks(); + config_.sample_rate_hz = 48000; + CreateInstance(); + EXPECT_EQ(48000, neteq_->last_output_sample_rate_hz()); +} + +TEST_F(NetEqImplTest, TickTimerIncrement) { + UseNoMocks(); + CreateInstance(); + ASSERT_TRUE(tick_timer_); + EXPECT_EQ(0u, tick_timer_->ticks()); + AudioFrame output; + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + EXPECT_EQ(1u, tick_timer_->ticks()); +} + +TEST_F(NetEqImplTest, SetBaseMinimumDelay) { + UseNoMocks(); + use_mock_neteq_controller_ = true; + CreateInstance(); + + EXPECT_CALL(*mock_neteq_controller_, SetBaseMinimumDelay(_)) + .WillOnce(Return(true)) + .WillOnce(Return(false)); + + const int delay_ms = 200; + + EXPECT_EQ(true, neteq_->SetBaseMinimumDelayMs(delay_ms)); + EXPECT_EQ(false, neteq_->SetBaseMinimumDelayMs(delay_ms)); +} + +TEST_F(NetEqImplTest, GetBaseMinimumDelayMs) { + UseNoMocks(); + use_mock_neteq_controller_ = true; + CreateInstance(); + + const int delay_ms = 200; + + EXPECT_CALL(*mock_neteq_controller_, GetBaseMinimumDelay()) + .WillOnce(Return(delay_ms)); + + EXPECT_EQ(delay_ms, neteq_->GetBaseMinimumDelayMs()); +} + +TEST_F(NetEqImplTest, TargetDelayMs) { + UseNoMocks(); + use_mock_neteq_controller_ = true; + CreateInstance(); + constexpr int kTargetLevelMs = 510; + EXPECT_CALL(*mock_neteq_controller_, TargetLevelMs()) + .WillOnce(Return(kTargetLevelMs)); + EXPECT_EQ(510, neteq_->TargetDelayMs()); +} + +TEST_F(NetEqImplTest, InsertEmptyPacket) { + UseNoMocks(); + use_mock_neteq_controller_ = true; + CreateInstance(); + + RTPHeader rtp_header; + rtp_header.payloadType = 17; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + EXPECT_CALL(*mock_neteq_controller_, RegisterEmptyPacket()); + neteq_->InsertEmptyPacket(rtp_header); +} + +TEST_F(NetEqImplTest, NotifyControllerOfReorderedPacket) { + using ::testing::AllOf; + using ::testing::Field; + UseNoMocks(); + use_mock_neteq_controller_ = true; + CreateInstance(); + EXPECT_CALL(*mock_neteq_controller_, GetDecision(_, _)) + .Times(1) + .WillOnce(Return(NetEq::Operation::kNormal)); + + const int kPayloadLengthSamples = 80; + const size_t kPayloadLengthBytes = 2 * kPayloadLengthSamples; // PCM 16-bit. + const uint8_t kPayloadType = 17; // Just an arbitrary number. + uint8_t payload[kPayloadLengthBytes] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = 0x1234; + rtp_header.timestamp = 0x12345678; + rtp_header.ssrc = 0x87654321; + + EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, + SdpAudioFormat("l16", 8000, 1))); + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + AudioFrame output; + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); + + // Insert second packet that was sent before the first packet. + rtp_header.sequenceNumber -= 1; + rtp_header.timestamp -= kPayloadLengthSamples; + EXPECT_CALL( + *mock_neteq_controller_, + PacketArrived( + /*fs_hz*/ 8000, + /*should_update_stats*/ true, + /*info*/ + AllOf( + Field(&NetEqController::PacketArrivedInfo::packet_length_samples, + kPayloadLengthSamples), + Field(&NetEqController::PacketArrivedInfo::main_sequence_number, + rtp_header.sequenceNumber), + Field(&NetEqController::PacketArrivedInfo::main_timestamp, + rtp_header.timestamp)))); + + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); +} + +// When using a codec with 1000 channels, there should be no crashes. +TEST_F(NetEqImplTest, NoCrashWith1000Channels) { + using ::testing::AllOf; + using ::testing::Field; + UseNoMocks(); + use_mock_decoder_database_ = true; + enable_muted_state_ = true; + CreateInstance(); + const size_t kPayloadLength = 100; + const uint8_t kPayloadType = 0; + const uint16_t kFirstSequenceNumber = 0x1234; + const uint32_t kFirstTimestamp = 0x12345678; + const uint32_t kSsrc = 0x87654321; + uint8_t payload[kPayloadLength] = {0}; + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = kFirstSequenceNumber; + rtp_header.timestamp = kFirstTimestamp; + rtp_header.ssrc = kSsrc; + Packet fake_packet; + fake_packet.payload_type = kPayloadType; + fake_packet.sequence_number = kFirstSequenceNumber; + fake_packet.timestamp = kFirstTimestamp; + + AudioDecoder* decoder = nullptr; + + auto mock_decoder_factory = rtc::make_ref_counted<MockAudioDecoderFactory>(); + EXPECT_CALL(*mock_decoder_factory, MakeAudioDecoderMock(_, _, _)) + .WillOnce(Invoke([&](const SdpAudioFormat& format, + absl::optional<AudioCodecPairId> codec_pair_id, + std::unique_ptr<AudioDecoder>* dec) { + EXPECT_EQ("pcmu", format.name); + *dec = std::make_unique<AudioDecoderPcmU>(1000); + decoder = dec->get(); + })); + DecoderDatabase::DecoderInfo info(SdpAudioFormat("pcmu", 8000, 1), + absl::nullopt, mock_decoder_factory.get()); + // Expectations for decoder database. + EXPECT_CALL(*mock_decoder_database_, GetDecoderInfo(kPayloadType)) + .WillRepeatedly(Return(&info)); + EXPECT_CALL(*mock_decoder_database_, GetActiveCngDecoder()) + .WillRepeatedly(ReturnNull()); + EXPECT_CALL(*mock_decoder_database_, GetActiveDecoder()) + .WillRepeatedly(Return(decoder)); + EXPECT_CALL(*mock_decoder_database_, SetActiveDecoder(_, _)) + .WillOnce(Invoke([](uint8_t rtp_payload_type, bool* new_decoder) { + *new_decoder = true; + return 0; + })); + + // Insert first packet. + neteq_->InsertPacket(rtp_header, payload); + + AudioFrame audio_frame; + bool muted; + + // Repeat 40 times to ensure we enter muted state. + for (int i = 0; i < 40; i++) { + // GetAudio should return an error, and not crash, even in muted state. + EXPECT_NE(0, neteq_->GetAudio(&audio_frame, &muted)); + } +} + +class Decoder120ms : public AudioDecoder { + public: + Decoder120ms(int sample_rate_hz, SpeechType speech_type) + : sample_rate_hz_(sample_rate_hz), + next_value_(1), + speech_type_(speech_type) {} + + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override { + EXPECT_EQ(sample_rate_hz_, sample_rate_hz); + size_t decoded_len = + rtc::CheckedDivExact(sample_rate_hz, 1000) * 120 * Channels(); + for (size_t i = 0; i < decoded_len; ++i) { + decoded[i] = next_value_++; + } + *speech_type = speech_type_; + return rtc::checked_cast<int>(decoded_len); + } + + void Reset() override { next_value_ = 1; } + int SampleRateHz() const override { return sample_rate_hz_; } + size_t Channels() const override { return 2; } + + private: + int sample_rate_hz_; + int16_t next_value_; + SpeechType speech_type_; +}; + +class NetEqImplTest120ms : public NetEqImplTest { + protected: + NetEqImplTest120ms() : NetEqImplTest() {} + virtual ~NetEqImplTest120ms() {} + + void CreateInstanceNoMocks() { + UseNoMocks(); + CreateInstance(decoder_factory_); + EXPECT_TRUE(neteq_->RegisterPayloadType( + kPayloadType, SdpAudioFormat("opus", 48000, 2, {{"stereo", "1"}}))); + } + + void CreateInstanceWithDelayManagerMock() { + UseNoMocks(); + use_mock_neteq_controller_ = true; + CreateInstance(decoder_factory_); + EXPECT_TRUE(neteq_->RegisterPayloadType( + kPayloadType, SdpAudioFormat("opus", 48000, 2, {{"stereo", "1"}}))); + } + + uint32_t timestamp_diff_between_packets() const { + return rtc::CheckedDivExact(kSamplingFreq_, 1000u) * 120; + } + + uint32_t first_timestamp() const { return 10u; } + + void GetFirstPacket() { + bool muted; + for (int i = 0; i < 12; i++) { + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_, &muted)); + EXPECT_FALSE(muted); + } + } + + void InsertPacket(uint32_t timestamp) { + RTPHeader rtp_header; + rtp_header.payloadType = kPayloadType; + rtp_header.sequenceNumber = sequence_number_; + rtp_header.timestamp = timestamp; + rtp_header.ssrc = 15; + const size_t kPayloadLengthBytes = 1; // This can be arbitrary. + uint8_t payload[kPayloadLengthBytes] = {0}; + EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); + sequence_number_++; + } + + void Register120msCodec(AudioDecoder::SpeechType speech_type) { + const uint32_t sampling_freq = kSamplingFreq_; + decoder_factory_ = rtc::make_ref_counted<test::FunctionAudioDecoderFactory>( + [sampling_freq, speech_type]() { + std::unique_ptr<AudioDecoder> decoder = + std::make_unique<Decoder120ms>(sampling_freq, speech_type); + RTC_CHECK_EQ(2, decoder->Channels()); + return decoder; + }); + } + + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory_; + AudioFrame output_; + const uint32_t kPayloadType = 17; + const uint32_t kSamplingFreq_ = 48000; + uint16_t sequence_number_ = 1; +}; + +TEST_F(NetEqImplTest120ms, CodecInternalCng) { + Register120msCodec(AudioDecoder::kComfortNoise); + CreateInstanceNoMocks(); + + InsertPacket(first_timestamp()); + GetFirstPacket(); + + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_, &muted)); + EXPECT_EQ(NetEq::Operation::kCodecInternalCng, + neteq_->last_operation_for_test()); +} + +TEST_F(NetEqImplTest120ms, Normal) { + Register120msCodec(AudioDecoder::kSpeech); + CreateInstanceNoMocks(); + + InsertPacket(first_timestamp()); + GetFirstPacket(); + + EXPECT_EQ(NetEq::Operation::kNormal, neteq_->last_operation_for_test()); +} + +TEST_F(NetEqImplTest120ms, Merge) { + Register120msCodec(AudioDecoder::kSpeech); + CreateInstanceWithDelayManagerMock(); + + EXPECT_CALL(*mock_neteq_controller_, CngOff()).WillRepeatedly(Return(true)); + InsertPacket(first_timestamp()); + + GetFirstPacket(); + bool muted; + EXPECT_CALL(*mock_neteq_controller_, GetDecision(_, _)) + .WillOnce(Return(NetEq::Operation::kExpand)); + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_, &muted)); + + InsertPacket(first_timestamp() + 2 * timestamp_diff_between_packets()); + + EXPECT_CALL(*mock_neteq_controller_, GetDecision(_, _)) + .WillOnce(Return(NetEq::Operation::kMerge)); + + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_, &muted)); + EXPECT_EQ(NetEq::Operation::kMerge, neteq_->last_operation_for_test()); +} + +TEST_F(NetEqImplTest120ms, Expand) { + Register120msCodec(AudioDecoder::kSpeech); + CreateInstanceNoMocks(); + + InsertPacket(first_timestamp()); + GetFirstPacket(); + + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_, &muted)); + EXPECT_EQ(NetEq::Operation::kExpand, neteq_->last_operation_for_test()); +} + +TEST_F(NetEqImplTest120ms, FastAccelerate) { + Register120msCodec(AudioDecoder::kSpeech); + CreateInstanceWithDelayManagerMock(); + + InsertPacket(first_timestamp()); + GetFirstPacket(); + InsertPacket(first_timestamp() + timestamp_diff_between_packets()); + + EXPECT_CALL(*mock_neteq_controller_, GetDecision(_, _)) + .Times(1) + .WillOnce(Return(NetEq::Operation::kFastAccelerate)); + + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_, &muted)); + EXPECT_EQ(NetEq::Operation::kFastAccelerate, + neteq_->last_operation_for_test()); +} + +TEST_F(NetEqImplTest120ms, PreemptiveExpand) { + Register120msCodec(AudioDecoder::kSpeech); + CreateInstanceWithDelayManagerMock(); + + InsertPacket(first_timestamp()); + GetFirstPacket(); + + InsertPacket(first_timestamp() + timestamp_diff_between_packets()); + + EXPECT_CALL(*mock_neteq_controller_, GetDecision(_, _)) + .Times(1) + .WillOnce(Return(NetEq::Operation::kPreemptiveExpand)); + + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_, &muted)); + EXPECT_EQ(NetEq::Operation::kPreemptiveExpand, + neteq_->last_operation_for_test()); +} + +TEST_F(NetEqImplTest120ms, Accelerate) { + Register120msCodec(AudioDecoder::kSpeech); + CreateInstanceWithDelayManagerMock(); + + InsertPacket(first_timestamp()); + GetFirstPacket(); + + InsertPacket(first_timestamp() + timestamp_diff_between_packets()); + + EXPECT_CALL(*mock_neteq_controller_, GetDecision(_, _)) + .Times(1) + .WillOnce(Return(NetEq::Operation::kAccelerate)); + + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_, &muted)); + EXPECT_EQ(NetEq::Operation::kAccelerate, neteq_->last_operation_for_test()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/neteq_network_stats_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_network_stats_unittest.cc new file mode 100644 index 0000000000..a669ad727e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_network_stats_unittest.cc @@ -0,0 +1,345 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <memory> + +#include "absl/memory/memory.h" +#include "api/audio/audio_frame.h" +#include "api/audio_codecs/audio_decoder.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/neteq/neteq.h" +#include "modules/audio_coding/neteq/default_neteq_factory.h" +#include "modules/audio_coding/neteq/tools/rtp_generator.h" +#include "system_wrappers/include/clock.h" +#include "test/audio_decoder_proxy_factory.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { + +namespace { + +std::unique_ptr<NetEq> CreateNetEq( + const NetEq::Config& config, + Clock* clock, + const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory) { + return DefaultNetEqFactory().CreateNetEq(config, decoder_factory, clock); +} + +} // namespace + +using ::testing::_; +using ::testing::Return; +using ::testing::SetArgPointee; + +class MockAudioDecoder final : public AudioDecoder { + public: + static const int kPacketDuration = 960; // 48 kHz * 20 ms + + MockAudioDecoder(int sample_rate_hz, size_t num_channels) + : sample_rate_hz_(sample_rate_hz), + num_channels_(num_channels), + fec_enabled_(false) {} + ~MockAudioDecoder() override { Die(); } + MOCK_METHOD(void, Die, ()); + + MOCK_METHOD(void, Reset, (), (override)); + + class MockFrame : public AudioDecoder::EncodedAudioFrame { + public: + MockFrame(size_t num_channels) : num_channels_(num_channels) {} + + size_t Duration() const override { return kPacketDuration; } + + absl::optional<DecodeResult> Decode( + rtc::ArrayView<int16_t> decoded) const override { + const size_t output_size = + sizeof(int16_t) * kPacketDuration * num_channels_; + if (decoded.size() >= output_size) { + memset(decoded.data(), 0, + sizeof(int16_t) * kPacketDuration * num_channels_); + return DecodeResult{kPacketDuration * num_channels_, kSpeech}; + } else { + ADD_FAILURE() << "Expected decoded.size() to be >= output_size (" + << decoded.size() << " vs. " << output_size << ")"; + return absl::nullopt; + } + } + + private: + const size_t num_channels_; + }; + + std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload, + uint32_t timestamp) override { + std::vector<ParseResult> results; + if (fec_enabled_) { + std::unique_ptr<MockFrame> fec_frame(new MockFrame(num_channels_)); + results.emplace_back(timestamp - kPacketDuration, 1, + std::move(fec_frame)); + } + + std::unique_ptr<MockFrame> frame(new MockFrame(num_channels_)); + results.emplace_back(timestamp, 0, std::move(frame)); + return results; + } + + int PacketDuration(const uint8_t* encoded, + size_t encoded_len) const override { + ADD_FAILURE() << "Since going through ParsePayload, PacketDuration should " + "never get called."; + return kPacketDuration; + } + + bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const override { + ADD_FAILURE() << "Since going through ParsePayload, PacketHasFec should " + "never get called."; + return fec_enabled_; + } + + int SampleRateHz() const override { return sample_rate_hz_; } + + size_t Channels() const override { return num_channels_; } + + void set_fec_enabled(bool enable_fec) { fec_enabled_ = enable_fec; } + + bool fec_enabled() const { return fec_enabled_; } + + protected: + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override { + ADD_FAILURE() << "Since going through ParsePayload, DecodeInternal should " + "never get called."; + return -1; + } + + private: + const int sample_rate_hz_; + const size_t num_channels_; + bool fec_enabled_; +}; + +class NetEqNetworkStatsTest { + public: + static const int kPayloadSizeByte = 30; + static const int kFrameSizeMs = 20; + static const uint8_t kPayloadType = 95; + static const int kOutputLengthMs = 10; + + enum logic { + kIgnore, + kEqual, + kSmallerThan, + kLargerThan, + }; + + struct NetEqNetworkStatsCheck { + logic current_buffer_size_ms; + logic preferred_buffer_size_ms; + logic jitter_peaks_found; + logic packet_loss_rate; + logic expand_rate; + logic speech_expand_rate; + logic preemptive_rate; + logic accelerate_rate; + logic secondary_decoded_rate; + logic secondary_discarded_rate; + logic added_zero_samples; + NetEqNetworkStatistics stats_ref; + }; + + NetEqNetworkStatsTest(const SdpAudioFormat& format, MockAudioDecoder* decoder) + : decoder_(decoder), + decoder_factory_( + rtc::make_ref_counted<AudioDecoderProxyFactory>(decoder)), + samples_per_ms_(format.clockrate_hz / 1000), + frame_size_samples_(kFrameSizeMs * samples_per_ms_), + rtp_generator_(new RtpGenerator(samples_per_ms_)), + last_lost_time_(0), + packet_loss_interval_(0xffffffff) { + NetEq::Config config; + config.sample_rate_hz = format.clockrate_hz; + neteq_ = CreateNetEq(config, Clock::GetRealTimeClock(), decoder_factory_); + neteq_->RegisterPayloadType(kPayloadType, format); + } + + bool Lost(uint32_t send_time) { + if (send_time - last_lost_time_ >= packet_loss_interval_) { + last_lost_time_ = send_time; + return true; + } + return false; + } + + void SetPacketLossRate(double loss_rate) { + packet_loss_interval_ = + (loss_rate >= 1e-3 ? static_cast<double>(kFrameSizeMs) / loss_rate + : 0xffffffff); + } + + // `stats_ref` + // expects.x = -1, do not care + // expects.x = 0, 'x' in current stats should equal 'x' in `stats_ref` + // expects.x = 1, 'x' in current stats should < 'x' in `stats_ref` + // expects.x = 2, 'x' in current stats should > 'x' in `stats_ref` + void CheckNetworkStatistics(NetEqNetworkStatsCheck expects) { + NetEqNetworkStatistics stats; + neteq_->NetworkStatistics(&stats); + +#define CHECK_NETEQ_NETWORK_STATS(x) \ + switch (expects.x) { \ + case kEqual: \ + EXPECT_EQ(stats.x, expects.stats_ref.x); \ + break; \ + case kSmallerThan: \ + EXPECT_LT(stats.x, expects.stats_ref.x); \ + break; \ + case kLargerThan: \ + EXPECT_GT(stats.x, expects.stats_ref.x); \ + break; \ + default: \ + break; \ + } + + CHECK_NETEQ_NETWORK_STATS(current_buffer_size_ms); + CHECK_NETEQ_NETWORK_STATS(preferred_buffer_size_ms); + CHECK_NETEQ_NETWORK_STATS(jitter_peaks_found); + CHECK_NETEQ_NETWORK_STATS(expand_rate); + CHECK_NETEQ_NETWORK_STATS(speech_expand_rate); + CHECK_NETEQ_NETWORK_STATS(preemptive_rate); + CHECK_NETEQ_NETWORK_STATS(accelerate_rate); + CHECK_NETEQ_NETWORK_STATS(secondary_decoded_rate); + CHECK_NETEQ_NETWORK_STATS(secondary_discarded_rate); + +#undef CHECK_NETEQ_NETWORK_STATS + } + + void RunTest(int num_loops, NetEqNetworkStatsCheck expects) { + uint32_t time_now; + uint32_t next_send_time; + + // Initiate `last_lost_time_`. + time_now = next_send_time = last_lost_time_ = rtp_generator_->GetRtpHeader( + kPayloadType, frame_size_samples_, &rtp_header_); + for (int k = 0; k < num_loops; ++k) { + // Delay by one frame such that the FEC can come in. + while (time_now + kFrameSizeMs >= next_send_time) { + next_send_time = rtp_generator_->GetRtpHeader( + kPayloadType, frame_size_samples_, &rtp_header_); + if (!Lost(next_send_time)) { + static const uint8_t payload[kPayloadSizeByte] = {0}; + ASSERT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header_, payload)); + } + } + bool muted = true; + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_frame_, &muted)); + ASSERT_FALSE(muted); + EXPECT_EQ(decoder_->Channels(), output_frame_.num_channels_); + EXPECT_EQ(static_cast<size_t>(kOutputLengthMs * samples_per_ms_), + output_frame_.samples_per_channel_); + EXPECT_EQ(48000, neteq_->last_output_sample_rate_hz()); + + time_now += kOutputLengthMs; + } + CheckNetworkStatistics(expects); + neteq_->FlushBuffers(); + } + + void DecodeFecTest() { + decoder_->set_fec_enabled(false); + NetEqNetworkStatsCheck expects = {kIgnore, // current_buffer_size_ms + kIgnore, // preferred_buffer_size_ms + kIgnore, // jitter_peaks_found + kEqual, // packet_loss_rate + kEqual, // expand_rate + kEqual, // voice_expand_rate + kIgnore, // preemptive_rate + kEqual, // accelerate_rate + kEqual, // decoded_fec_rate + kEqual, // discarded_fec_rate + kEqual, // added_zero_samples + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; + RunTest(50, expects); + + // Next we introduce packet losses. + SetPacketLossRate(0.1); + expects.stats_ref.expand_rate = expects.stats_ref.speech_expand_rate = 898; + RunTest(50, expects); + + // Next we enable FEC. + decoder_->set_fec_enabled(true); + // If FEC fills in the lost packets, no packet loss will be counted. + expects.stats_ref.expand_rate = expects.stats_ref.speech_expand_rate = 0; + expects.stats_ref.secondary_decoded_rate = 2006; + expects.stats_ref.secondary_discarded_rate = 14336; + RunTest(50, expects); + } + + void NoiseExpansionTest() { + NetEqNetworkStatsCheck expects = {kIgnore, // current_buffer_size_ms + kIgnore, // preferred_buffer_size_ms + kIgnore, // jitter_peaks_found + kEqual, // packet_loss_rate + kEqual, // expand_rate + kEqual, // speech_expand_rate + kIgnore, // preemptive_rate + kEqual, // accelerate_rate + kEqual, // decoded_fec_rate + kEqual, // discard_fec_rate + kEqual, // added_zero_samples + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; + RunTest(50, expects); + + SetPacketLossRate(1); + expects.stats_ref.expand_rate = 16384; + expects.stats_ref.speech_expand_rate = 5324; + RunTest(10, expects); // Lost 10 * 20ms in a row. + } + + private: + MockAudioDecoder* decoder_; + rtc::scoped_refptr<AudioDecoderProxyFactory> decoder_factory_; + std::unique_ptr<NetEq> neteq_; + + const int samples_per_ms_; + const size_t frame_size_samples_; + std::unique_ptr<RtpGenerator> rtp_generator_; + RTPHeader rtp_header_; + uint32_t last_lost_time_; + uint32_t packet_loss_interval_; + AudioFrame output_frame_; +}; + +TEST(NetEqNetworkStatsTest, DecodeFec) { + MockAudioDecoder decoder(48000, 1); + NetEqNetworkStatsTest test(SdpAudioFormat("opus", 48000, 2), &decoder); + test.DecodeFecTest(); + EXPECT_CALL(decoder, Die()).Times(1); +} + +TEST(NetEqNetworkStatsTest, StereoDecodeFec) { + MockAudioDecoder decoder(48000, 2); + NetEqNetworkStatsTest test(SdpAudioFormat("opus", 48000, 2), &decoder); + test.DecodeFecTest(); + EXPECT_CALL(decoder, Die()).Times(1); +} + +TEST(NetEqNetworkStatsTest, NoiseExpansionTest) { + MockAudioDecoder decoder(48000, 1); + NetEqNetworkStatsTest test(SdpAudioFormat("opus", 48000, 2), &decoder); + test.NoiseExpansionTest(); + EXPECT_CALL(decoder, Die()).Times(1); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/neteq_stereo_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_stereo_unittest.cc new file mode 100644 index 0000000000..6fa56fd1c1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_stereo_unittest.cc @@ -0,0 +1,424 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Test to verify correct stereo and multi-channel operation. + +#include <algorithm> +#include <list> +#include <memory> +#include <string> + +#include "api/audio/audio_frame.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/neteq/neteq.h" +#include "modules/audio_coding/codecs/pcm16b/pcm16b.h" +#include "modules/audio_coding/neteq/default_neteq_factory.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" +#include "modules/audio_coding/neteq/tools/rtp_generator.h" +#include "rtc_base/strings/string_builder.h" +#include "system_wrappers/include/clock.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +struct TestParameters { + int frame_size; + int sample_rate; + size_t num_channels; +}; + +// This is a parameterized test. The test parameters are supplied through a +// TestParameters struct, which is obtained through the GetParam() method. +// +// The objective of the test is to create a mono input signal and a +// multi-channel input signal, where each channel is identical to the mono +// input channel. The two input signals are processed through their respective +// NetEq instances. After that, the output signals are compared. The expected +// result is that each channel in the multi-channel output is identical to the +// mono output. +class NetEqStereoTest : public ::testing::TestWithParam<TestParameters> { + protected: + static const int kTimeStepMs = 10; + static const size_t kMaxBlockSize = 480; // 10 ms @ 48 kHz. + static const uint8_t kPayloadTypeMono = 95; + static const uint8_t kPayloadTypeMulti = 96; + + NetEqStereoTest() + : num_channels_(GetParam().num_channels), + sample_rate_hz_(GetParam().sample_rate), + samples_per_ms_(sample_rate_hz_ / 1000), + frame_size_ms_(GetParam().frame_size), + frame_size_samples_( + static_cast<size_t>(frame_size_ms_ * samples_per_ms_)), + output_size_samples_(10 * samples_per_ms_), + clock_(0), + rtp_generator_mono_(samples_per_ms_), + rtp_generator_(samples_per_ms_), + payload_size_bytes_(0), + multi_payload_size_bytes_(0), + last_send_time_(0), + last_arrival_time_(0) { + NetEq::Config config; + config.sample_rate_hz = sample_rate_hz_; + DefaultNetEqFactory neteq_factory; + auto decoder_factory = CreateBuiltinAudioDecoderFactory(); + neteq_mono_ = neteq_factory.CreateNetEq(config, decoder_factory, &clock_); + neteq_ = neteq_factory.CreateNetEq(config, decoder_factory, &clock_); + input_ = new int16_t[frame_size_samples_]; + encoded_ = new uint8_t[2 * frame_size_samples_]; + input_multi_channel_ = new int16_t[frame_size_samples_ * num_channels_]; + encoded_multi_channel_ = + new uint8_t[frame_size_samples_ * 2 * num_channels_]; + } + + ~NetEqStereoTest() { + delete[] input_; + delete[] encoded_; + delete[] input_multi_channel_; + delete[] encoded_multi_channel_; + } + + virtual void SetUp() { + const std::string file_name = + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + input_file_.reset(new test::InputAudioFile(file_name)); + RTC_CHECK_GE(num_channels_, 2); + ASSERT_TRUE(neteq_mono_->RegisterPayloadType( + kPayloadTypeMono, SdpAudioFormat("l16", sample_rate_hz_, 1))); + ASSERT_TRUE(neteq_->RegisterPayloadType( + kPayloadTypeMulti, + SdpAudioFormat("l16", sample_rate_hz_, num_channels_))); + } + + virtual void TearDown() {} + + int GetNewPackets() { + if (!input_file_->Read(frame_size_samples_, input_)) { + return -1; + } + payload_size_bytes_ = + WebRtcPcm16b_Encode(input_, frame_size_samples_, encoded_); + if (frame_size_samples_ * 2 != payload_size_bytes_) { + return -1; + } + int next_send_time = rtp_generator_mono_.GetRtpHeader( + kPayloadTypeMono, frame_size_samples_, &rtp_header_mono_); + MakeMultiChannelInput(); + multi_payload_size_bytes_ = WebRtcPcm16b_Encode( + input_multi_channel_, frame_size_samples_ * num_channels_, + encoded_multi_channel_); + if (frame_size_samples_ * 2 * num_channels_ != multi_payload_size_bytes_) { + return -1; + } + rtp_generator_.GetRtpHeader(kPayloadTypeMulti, frame_size_samples_, + &rtp_header_); + return next_send_time; + } + + virtual void MakeMultiChannelInput() { + test::InputAudioFile::DuplicateInterleaved( + input_, frame_size_samples_, num_channels_, input_multi_channel_); + } + + virtual void VerifyOutput(size_t num_samples) { + const int16_t* output_data = output_.data(); + const int16_t* output_multi_channel_data = output_multi_channel_.data(); + for (size_t i = 0; i < num_samples; ++i) { + for (size_t j = 0; j < num_channels_; ++j) { + ASSERT_EQ(output_data[i], + output_multi_channel_data[i * num_channels_ + j]) + << "Diff in sample " << i << ", channel " << j << "."; + } + } + } + + virtual int GetArrivalTime(int send_time) { + int arrival_time = last_arrival_time_ + (send_time - last_send_time_); + last_send_time_ = send_time; + last_arrival_time_ = arrival_time; + return arrival_time; + } + + virtual bool Lost() { return false; } + + void RunTest(int num_loops) { + // Get next input packets (mono and multi-channel). + int next_send_time; + int next_arrival_time; + do { + next_send_time = GetNewPackets(); + ASSERT_NE(-1, next_send_time); + next_arrival_time = GetArrivalTime(next_send_time); + } while (Lost()); // If lost, immediately read the next packet. + + int time_now = 0; + for (int k = 0; k < num_loops; ++k) { + while (time_now >= next_arrival_time) { + // Insert packet in mono instance. + ASSERT_EQ(NetEq::kOK, + neteq_mono_->InsertPacket( + rtp_header_mono_, rtc::ArrayView<const uint8_t>( + encoded_, payload_size_bytes_))); + // Insert packet in multi-channel instance. + ASSERT_EQ(NetEq::kOK, neteq_->InsertPacket( + rtp_header_, rtc::ArrayView<const uint8_t>( + encoded_multi_channel_, + multi_payload_size_bytes_))); + // Get next input packets (mono and multi-channel). + do { + next_send_time = GetNewPackets(); + ASSERT_NE(-1, next_send_time); + next_arrival_time = GetArrivalTime(next_send_time); + } while (Lost()); // If lost, immediately read the next packet. + } + // Get audio from mono instance. + bool muted; + EXPECT_EQ(NetEq::kOK, neteq_mono_->GetAudio(&output_, &muted)); + ASSERT_FALSE(muted); + EXPECT_EQ(1u, output_.num_channels_); + EXPECT_EQ(output_size_samples_, output_.samples_per_channel_); + // Get audio from multi-channel instance. + ASSERT_EQ(NetEq::kOK, neteq_->GetAudio(&output_multi_channel_, &muted)); + ASSERT_FALSE(muted); + EXPECT_EQ(num_channels_, output_multi_channel_.num_channels_); + EXPECT_EQ(output_size_samples_, + output_multi_channel_.samples_per_channel_); + rtc::StringBuilder ss; + ss << "Lap number " << k << "."; + SCOPED_TRACE(ss.str()); // Print out the parameter values on failure. + // Compare mono and multi-channel. + ASSERT_NO_FATAL_FAILURE(VerifyOutput(output_size_samples_)); + + time_now += kTimeStepMs; + clock_.AdvanceTimeMilliseconds(kTimeStepMs); + } + } + + const size_t num_channels_; + const int sample_rate_hz_; + const int samples_per_ms_; + const int frame_size_ms_; + const size_t frame_size_samples_; + const size_t output_size_samples_; + SimulatedClock clock_; + std::unique_ptr<NetEq> neteq_mono_; + std::unique_ptr<NetEq> neteq_; + test::RtpGenerator rtp_generator_mono_; + test::RtpGenerator rtp_generator_; + int16_t* input_; + int16_t* input_multi_channel_; + uint8_t* encoded_; + uint8_t* encoded_multi_channel_; + AudioFrame output_; + AudioFrame output_multi_channel_; + RTPHeader rtp_header_mono_; + RTPHeader rtp_header_; + size_t payload_size_bytes_; + size_t multi_payload_size_bytes_; + int last_send_time_; + int last_arrival_time_; + std::unique_ptr<test::InputAudioFile> input_file_; +}; + +class NetEqStereoTestNoJitter : public NetEqStereoTest { + protected: + NetEqStereoTestNoJitter() : NetEqStereoTest() { + // Start the sender 100 ms before the receiver to pre-fill the buffer. + // This is to avoid doing preemptive expand early in the test. + // TODO(hlundin): Mock the decision making instead to control the modes. + last_arrival_time_ = -100; + } +}; + +TEST_P(NetEqStereoTestNoJitter, RunTest) { + RunTest(8); +} + +class NetEqStereoTestPositiveDrift : public NetEqStereoTest { + protected: + NetEqStereoTestPositiveDrift() : NetEqStereoTest(), drift_factor(0.9) { + // Start the sender 100 ms before the receiver to pre-fill the buffer. + // This is to avoid doing preemptive expand early in the test. + // TODO(hlundin): Mock the decision making instead to control the modes. + last_arrival_time_ = -100; + } + virtual int GetArrivalTime(int send_time) { + int arrival_time = + last_arrival_time_ + drift_factor * (send_time - last_send_time_); + last_send_time_ = send_time; + last_arrival_time_ = arrival_time; + return arrival_time; + } + + double drift_factor; +}; + +TEST_P(NetEqStereoTestPositiveDrift, RunTest) { + RunTest(100); +} + +class NetEqStereoTestNegativeDrift : public NetEqStereoTestPositiveDrift { + protected: + NetEqStereoTestNegativeDrift() : NetEqStereoTestPositiveDrift() { + drift_factor = 1.1; + last_arrival_time_ = 0; + } +}; + +TEST_P(NetEqStereoTestNegativeDrift, RunTest) { + RunTest(100); +} + +class NetEqStereoTestDelays : public NetEqStereoTest { + protected: + static const int kDelayInterval = 10; + static const int kDelay = 1000; + NetEqStereoTestDelays() : NetEqStereoTest(), frame_index_(0) {} + + virtual int GetArrivalTime(int send_time) { + // Deliver immediately, unless we have a back-log. + int arrival_time = std::min(last_arrival_time_, send_time); + if (++frame_index_ % kDelayInterval == 0) { + // Delay this packet. + arrival_time += kDelay; + } + last_send_time_ = send_time; + last_arrival_time_ = arrival_time; + return arrival_time; + } + + int frame_index_; +}; + +TEST_P(NetEqStereoTestDelays, RunTest) { + RunTest(1000); +} + +class NetEqStereoTestLosses : public NetEqStereoTest { + protected: + static const int kLossInterval = 10; + NetEqStereoTestLosses() : NetEqStereoTest(), frame_index_(0) {} + + virtual bool Lost() { return (++frame_index_) % kLossInterval == 0; } + + // TODO(hlundin): NetEq is not giving bitexact results for these cases. + virtual void VerifyOutput(size_t num_samples) { + for (size_t i = 0; i < num_samples; ++i) { + const int16_t* output_data = output_.data(); + const int16_t* output_multi_channel_data = output_multi_channel_.data(); + auto first_channel_sample = output_multi_channel_data[i * num_channels_]; + for (size_t j = 0; j < num_channels_; ++j) { + const int kErrorMargin = 200; + EXPECT_NEAR(output_data[i], + output_multi_channel_data[i * num_channels_ + j], + kErrorMargin) + << "Diff in sample " << i << ", channel " << j << "."; + EXPECT_EQ(first_channel_sample, + output_multi_channel_data[i * num_channels_ + j]); + } + } + } + + int frame_index_; +}; + +TEST_P(NetEqStereoTestLosses, RunTest) { + RunTest(100); +} + +class NetEqStereoTestSingleActiveChannelPlc : public NetEqStereoTestLosses { + protected: + NetEqStereoTestSingleActiveChannelPlc() : NetEqStereoTestLosses() {} + + virtual void MakeMultiChannelInput() override { + // Create a multi-channel input by copying the mono channel from file to the + // first channel, and setting the others to zero. + memset(input_multi_channel_, 0, + frame_size_samples_ * num_channels_ * sizeof(int16_t)); + for (size_t i = 0; i < frame_size_samples_; ++i) { + input_multi_channel_[i * num_channels_] = input_[i]; + } + } + + virtual void VerifyOutput(size_t num_samples) override { + // Simply verify that all samples in channels other than the first are zero. + const int16_t* output_multi_channel_data = output_multi_channel_.data(); + for (size_t i = 0; i < num_samples; ++i) { + for (size_t j = 1; j < num_channels_; ++j) { + EXPECT_EQ(0, output_multi_channel_data[i * num_channels_ + j]) + << "Sample " << i << ", channel " << j << " is non-zero."; + } + } + } +}; + +TEST_P(NetEqStereoTestSingleActiveChannelPlc, RunTest) { + RunTest(100); +} + +// Creates a list of parameter sets. +std::list<TestParameters> GetTestParameters() { + std::list<TestParameters> l; + const int sample_rates[] = {8000, 16000, 32000}; + const int num_rates = sizeof(sample_rates) / sizeof(sample_rates[0]); + // Loop through sample rates. + for (int rate_index = 0; rate_index < num_rates; ++rate_index) { + int sample_rate = sample_rates[rate_index]; + // Loop through all frame sizes between 10 and 60 ms. + for (int frame_size = 10; frame_size <= 60; frame_size += 10) { + TestParameters p; + p.frame_size = frame_size; + p.sample_rate = sample_rate; + p.num_channels = 2; + l.push_back(p); + if (sample_rate == 8000) { + // Add a five-channel test for 8000 Hz. + p.num_channels = 5; + l.push_back(p); + } + } + } + return l; +} + +// Pretty-printing the test parameters in case of an error. +void PrintTo(const TestParameters& p, ::std::ostream* os) { + *os << "{frame_size = " << p.frame_size + << ", num_channels = " << p.num_channels + << ", sample_rate = " << p.sample_rate << "}"; +} + +// Instantiate the tests. Each test is instantiated using the function above, +// so that all different parameter combinations are tested. +INSTANTIATE_TEST_SUITE_P(MultiChannel, + NetEqStereoTestNoJitter, + ::testing::ValuesIn(GetTestParameters())); + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + NetEqStereoTestPositiveDrift, + ::testing::ValuesIn(GetTestParameters())); + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + NetEqStereoTestNegativeDrift, + ::testing::ValuesIn(GetTestParameters())); + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + NetEqStereoTestDelays, + ::testing::ValuesIn(GetTestParameters())); + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + NetEqStereoTestLosses, + ::testing::ValuesIn(GetTestParameters())); + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + NetEqStereoTestSingleActiveChannelPlc, + ::testing::ValuesIn(GetTestParameters())); +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/neteq_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_unittest.cc new file mode 100644 index 0000000000..451e0c9587 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_unittest.cc @@ -0,0 +1,1013 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "api/neteq/neteq.h" + +#include <math.h> +#include <stdlib.h> +#include <string.h> // memset + +#include <algorithm> +#include <memory> +#include <set> +#include <string> +#include <vector> + +#include "absl/flags/flag.h" +#include "api/audio/audio_frame.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "modules/audio_coding/codecs/pcm16b/pcm16b.h" +#include "modules/audio_coding/neteq/test/neteq_decoding_test.h" +#include "modules/audio_coding/neteq/tools/audio_loop.h" +#include "modules/audio_coding/neteq/tools/neteq_packet_source_input.h" +#include "modules/audio_coding/neteq/tools/neteq_test.h" +#include "modules/include/module_common_types_public.h" +#include "modules/rtp_rtcp/include/rtcp_statistics.h" +#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" +#include "rtc_base/ignore_wundef.h" +#include "rtc_base/message_digest.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/system/arch.h" +#include "test/field_trial.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +ABSL_FLAG(bool, gen_ref, false, "Generate reference files."); + +namespace webrtc { + +#if defined(WEBRTC_LINUX) && defined(WEBRTC_ARCH_X86_64) && \ + defined(WEBRTC_NETEQ_UNITTEST_BITEXACT) && \ + (defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX)) && \ + defined(WEBRTC_CODEC_ILBC) +#define MAYBE_TestBitExactness TestBitExactness +#else +#define MAYBE_TestBitExactness DISABLED_TestBitExactness +#endif +TEST_F(NetEqDecodingTest, MAYBE_TestBitExactness) { + const std::string input_rtp_file = + webrtc::test::ResourcePath("audio_coding/neteq_universal_new", "rtp"); + + const std::string output_checksum = + "dee7a10ab92526876a70a85bc48a4906901af3df"; + + const std::string network_stats_checksum = + "911dbf5fd97f48d25b8f0967286eb73c9d6f6158"; + + DecodeAndCompare(input_rtp_file, output_checksum, network_stats_checksum, + absl::GetFlag(FLAGS_gen_ref)); +} + +#if defined(WEBRTC_LINUX) && defined(WEBRTC_ARCH_X86_64) && \ + defined(WEBRTC_NETEQ_UNITTEST_BITEXACT) && defined(WEBRTC_CODEC_OPUS) +#define MAYBE_TestOpusBitExactness TestOpusBitExactness +#else +#define MAYBE_TestOpusBitExactness DISABLED_TestOpusBitExactness +#endif +TEST_F(NetEqDecodingTest, MAYBE_TestOpusBitExactness) { + const std::string input_rtp_file = + webrtc::test::ResourcePath("audio_coding/neteq_opus", "rtp"); + + // The checksum depends on SSE being enabled, the second part is the non-SSE + // checksum. + const std::string output_checksum = + "fec6827bb9ee0b21770bbbb4a3a6f8823bf537dc|" + "c5eb0a8fcf7e8255a40f821cb815e1096619efeb"; + + const std::string network_stats_checksum = + "3d043e47e5f4bb81d37e7bce8c44bf802965c853|" + "076662525572dba753b11578330bd491923f7f5e"; + + DecodeAndCompare(input_rtp_file, output_checksum, network_stats_checksum, + absl::GetFlag(FLAGS_gen_ref)); +} + +#if defined(WEBRTC_LINUX) && defined(WEBRTC_ARCH_X86_64) && \ + defined(WEBRTC_NETEQ_UNITTEST_BITEXACT) && defined(WEBRTC_CODEC_OPUS) +#define MAYBE_TestOpusDtxBitExactness TestOpusDtxBitExactness +#else +#define MAYBE_TestOpusDtxBitExactness DISABLED_TestOpusDtxBitExactness +#endif +TEST_F(NetEqDecodingTest, MAYBE_TestOpusDtxBitExactness) { + const std::string input_rtp_file = + webrtc::test::ResourcePath("audio_coding/neteq_opus_dtx", "rtp"); + + // The checksum depends on SSE being enabled, the second part is the non-SSE + // checksum. + const std::string output_checksum = + "b3c4899eab5378ef5e54f2302948872149f6ad5e|" + "e97e32a77355e7ce46a2dc2f43bf1c2805530fcb"; + + const std::string network_stats_checksum = + "dc8447b9fee1a21fd5d1f4045d62b982a3fb0215"; + + DecodeAndCompare(input_rtp_file, output_checksum, network_stats_checksum, + absl::GetFlag(FLAGS_gen_ref)); +} + +// Use fax mode to avoid time-scaling. This is to simplify the testing of +// packet waiting times in the packet buffer. +class NetEqDecodingTestFaxMode : public NetEqDecodingTest { + protected: + NetEqDecodingTestFaxMode() : NetEqDecodingTest() { + config_.for_test_no_time_stretching = true; + } + void TestJitterBufferDelay(bool apply_packet_loss); +}; + +TEST_F(NetEqDecodingTestFaxMode, TestFrameWaitingTimeStatistics) { + // Insert 30 dummy packets at once. Each packet contains 10 ms 16 kHz audio. + size_t num_frames = 30; + const size_t kSamples = 10 * 16; + const size_t kPayloadBytes = kSamples * 2; + for (size_t i = 0; i < num_frames; ++i) { + const uint8_t payload[kPayloadBytes] = {0}; + RTPHeader rtp_info; + rtp_info.sequenceNumber = rtc::checked_cast<uint16_t>(i); + rtp_info.timestamp = rtc::checked_cast<uint32_t>(i * kSamples); + rtp_info.ssrc = 0x1234; // Just an arbitrary SSRC. + rtp_info.payloadType = 94; // PCM16b WB codec. + rtp_info.markerBit = 0; + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload)); + } + // Pull out all data. + for (size_t i = 0; i < num_frames; ++i) { + bool muted; + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + } + + NetEqNetworkStatistics stats; + EXPECT_EQ(0, neteq_->NetworkStatistics(&stats)); + // Since all frames are dumped into NetEQ at once, but pulled out with 10 ms + // spacing (per definition), we expect the delay to increase with 10 ms for + // each packet. Thus, we are calculating the statistics for a series from 10 + // to 300, in steps of 10 ms. + EXPECT_EQ(155, stats.mean_waiting_time_ms); + EXPECT_EQ(155, stats.median_waiting_time_ms); + EXPECT_EQ(10, stats.min_waiting_time_ms); + EXPECT_EQ(300, stats.max_waiting_time_ms); + + // Check statistics again and make sure it's been reset. + EXPECT_EQ(0, neteq_->NetworkStatistics(&stats)); + EXPECT_EQ(-1, stats.mean_waiting_time_ms); + EXPECT_EQ(-1, stats.median_waiting_time_ms); + EXPECT_EQ(-1, stats.min_waiting_time_ms); + EXPECT_EQ(-1, stats.max_waiting_time_ms); +} + + +TEST_F(NetEqDecodingTest, LongCngWithNegativeClockDrift) { + // Apply a clock drift of -25 ms / s (sender faster than receiver). + const double kDriftFactor = 1000.0 / (1000.0 + 25.0); + const double kNetworkFreezeTimeMs = 0.0; + const bool kGetAudioDuringFreezeRecovery = false; + const int kDelayToleranceMs = 20; + const int kMaxTimeToSpeechMs = 100; + LongCngWithClockDrift(kDriftFactor, kNetworkFreezeTimeMs, + kGetAudioDuringFreezeRecovery, kDelayToleranceMs, + kMaxTimeToSpeechMs); +} + +TEST_F(NetEqDecodingTest, LongCngWithPositiveClockDrift) { + // Apply a clock drift of +25 ms / s (sender slower than receiver). + const double kDriftFactor = 1000.0 / (1000.0 - 25.0); + const double kNetworkFreezeTimeMs = 0.0; + const bool kGetAudioDuringFreezeRecovery = false; + const int kDelayToleranceMs = 40; + const int kMaxTimeToSpeechMs = 100; + LongCngWithClockDrift(kDriftFactor, kNetworkFreezeTimeMs, + kGetAudioDuringFreezeRecovery, kDelayToleranceMs, + kMaxTimeToSpeechMs); +} + +TEST_F(NetEqDecodingTest, LongCngWithNegativeClockDriftNetworkFreeze) { + // Apply a clock drift of -25 ms / s (sender faster than receiver). + const double kDriftFactor = 1000.0 / (1000.0 + 25.0); + const double kNetworkFreezeTimeMs = 5000.0; + const bool kGetAudioDuringFreezeRecovery = false; + const int kDelayToleranceMs = 60; + const int kMaxTimeToSpeechMs = 200; + LongCngWithClockDrift(kDriftFactor, kNetworkFreezeTimeMs, + kGetAudioDuringFreezeRecovery, kDelayToleranceMs, + kMaxTimeToSpeechMs); +} + +TEST_F(NetEqDecodingTest, LongCngWithPositiveClockDriftNetworkFreeze) { + // Apply a clock drift of +25 ms / s (sender slower than receiver). + const double kDriftFactor = 1000.0 / (1000.0 - 25.0); + const double kNetworkFreezeTimeMs = 5000.0; + const bool kGetAudioDuringFreezeRecovery = false; + const int kDelayToleranceMs = 40; + const int kMaxTimeToSpeechMs = 100; + LongCngWithClockDrift(kDriftFactor, kNetworkFreezeTimeMs, + kGetAudioDuringFreezeRecovery, kDelayToleranceMs, + kMaxTimeToSpeechMs); +} + +TEST_F(NetEqDecodingTest, LongCngWithPositiveClockDriftNetworkFreezeExtraPull) { + // Apply a clock drift of +25 ms / s (sender slower than receiver). + const double kDriftFactor = 1000.0 / (1000.0 - 25.0); + const double kNetworkFreezeTimeMs = 5000.0; + const bool kGetAudioDuringFreezeRecovery = true; + const int kDelayToleranceMs = 40; + const int kMaxTimeToSpeechMs = 100; + LongCngWithClockDrift(kDriftFactor, kNetworkFreezeTimeMs, + kGetAudioDuringFreezeRecovery, kDelayToleranceMs, + kMaxTimeToSpeechMs); +} + +TEST_F(NetEqDecodingTest, LongCngWithoutClockDrift) { + const double kDriftFactor = 1.0; // No drift. + const double kNetworkFreezeTimeMs = 0.0; + const bool kGetAudioDuringFreezeRecovery = false; + const int kDelayToleranceMs = 10; + const int kMaxTimeToSpeechMs = 50; + LongCngWithClockDrift(kDriftFactor, kNetworkFreezeTimeMs, + kGetAudioDuringFreezeRecovery, kDelayToleranceMs, + kMaxTimeToSpeechMs); +} + +TEST_F(NetEqDecodingTest, UnknownPayloadType) { + const size_t kPayloadBytes = 100; + uint8_t payload[kPayloadBytes] = {0}; + RTPHeader rtp_info; + PopulateRtpInfo(0, 0, &rtp_info); + rtp_info.payloadType = 1; // Not registered as a decoder. + EXPECT_EQ(NetEq::kFail, neteq_->InsertPacket(rtp_info, payload)); +} + +#if defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX) +#define MAYBE_DecoderError DecoderError +#else +#define MAYBE_DecoderError DISABLED_DecoderError +#endif + +TEST_F(NetEqDecodingTest, MAYBE_DecoderError) { + const size_t kPayloadBytes = 100; + uint8_t payload[kPayloadBytes] = {0}; + RTPHeader rtp_info; + PopulateRtpInfo(0, 0, &rtp_info); + rtp_info.payloadType = 103; // iSAC, but the payload is invalid. + EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload)); + // Set all of `out_data_` to 1, and verify that it was set to 0 by the call + // to GetAudio. + int16_t* out_frame_data = out_frame_.mutable_data(); + for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; ++i) { + out_frame_data[i] = 1; + } + bool muted; + EXPECT_EQ(NetEq::kFail, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_FALSE(muted); + + // Verify that the first 160 samples are set to 0. + static const int kExpectedOutputLength = 160; // 10 ms at 16 kHz sample rate. + const int16_t* const_out_frame_data = out_frame_.data(); + for (int i = 0; i < kExpectedOutputLength; ++i) { + rtc::StringBuilder ss; + ss << "i = " << i; + SCOPED_TRACE(ss.str()); // Print out the parameter values on failure. + EXPECT_EQ(0, const_out_frame_data[i]); + } +} + +TEST_F(NetEqDecodingTest, GetAudioBeforeInsertPacket) { + // Set all of `out_data_` to 1, and verify that it was set to 0 by the call + // to GetAudio. + int16_t* out_frame_data = out_frame_.mutable_data(); + for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; ++i) { + out_frame_data[i] = 1; + } + bool muted; + EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_FALSE(muted); + // Verify that the first block of samples is set to 0. + static const int kExpectedOutputLength = + kInitSampleRateHz / 100; // 10 ms at initial sample rate. + const int16_t* const_out_frame_data = out_frame_.data(); + for (int i = 0; i < kExpectedOutputLength; ++i) { + rtc::StringBuilder ss; + ss << "i = " << i; + SCOPED_TRACE(ss.str()); // Print out the parameter values on failure. + EXPECT_EQ(0, const_out_frame_data[i]); + } + // Verify that the sample rate did not change from the initial configuration. + EXPECT_EQ(config_.sample_rate_hz, neteq_->last_output_sample_rate_hz()); +} + +class NetEqBgnTest : public NetEqDecodingTest { + protected: + void CheckBgn(int sampling_rate_hz) { + size_t expected_samples_per_channel = 0; + uint8_t payload_type = 0xFF; // Invalid. + if (sampling_rate_hz == 8000) { + expected_samples_per_channel = kBlockSize8kHz; + payload_type = 93; // PCM 16, 8 kHz. + } else if (sampling_rate_hz == 16000) { + expected_samples_per_channel = kBlockSize16kHz; + payload_type = 94; // PCM 16, 16 kHZ. + } else if (sampling_rate_hz == 32000) { + expected_samples_per_channel = kBlockSize32kHz; + payload_type = 95; // PCM 16, 32 kHz. + } else { + ASSERT_TRUE(false); // Unsupported test case. + } + + AudioFrame output; + test::AudioLoop input; + // We are using the same 32 kHz input file for all tests, regardless of + // `sampling_rate_hz`. The output may sound weird, but the test is still + // valid. + ASSERT_TRUE(input.Init( + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"), + 10 * sampling_rate_hz, // Max 10 seconds loop length. + expected_samples_per_channel)); + + // Payload of 10 ms of PCM16 32 kHz. + uint8_t payload[kBlockSize32kHz * sizeof(int16_t)]; + RTPHeader rtp_info; + PopulateRtpInfo(0, 0, &rtp_info); + rtp_info.payloadType = payload_type; + + bool muted; + for (int n = 0; n < 10; ++n) { // Insert few packets and get audio. + auto block = input.GetNextBlock(); + ASSERT_EQ(expected_samples_per_channel, block.size()); + size_t enc_len_bytes = + WebRtcPcm16b_Encode(block.data(), block.size(), payload); + ASSERT_EQ(enc_len_bytes, expected_samples_per_channel * 2); + + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, rtc::ArrayView<const uint8_t>( + payload, enc_len_bytes))); + output.Reset(); + ASSERT_EQ(0, neteq_->GetAudio(&output, &muted)); + ASSERT_EQ(1u, output.num_channels_); + ASSERT_EQ(expected_samples_per_channel, output.samples_per_channel_); + ASSERT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); + + // Next packet. + rtp_info.timestamp += + rtc::checked_cast<uint32_t>(expected_samples_per_channel); + rtp_info.sequenceNumber++; + } + + output.Reset(); + + // Get audio without inserting packets, expecting PLC and PLC-to-CNG. Pull + // one frame without checking speech-type. This is the first frame pulled + // without inserting any packet, and might not be labeled as PLC. + ASSERT_EQ(0, neteq_->GetAudio(&output, &muted)); + ASSERT_EQ(1u, output.num_channels_); + ASSERT_EQ(expected_samples_per_channel, output.samples_per_channel_); + + // To be able to test the fading of background noise we need at lease to + // pull 611 frames. + const int kFadingThreshold = 611; + + // Test several CNG-to-PLC packet for the expected behavior. The number 20 + // is arbitrary, but sufficiently large to test enough number of frames. + const int kNumPlcToCngTestFrames = 20; + bool plc_to_cng = false; + for (int n = 0; n < kFadingThreshold + kNumPlcToCngTestFrames; ++n) { + output.Reset(); + // Set to non-zero. + memset(output.mutable_data(), 1, AudioFrame::kMaxDataSizeBytes); + ASSERT_EQ(0, neteq_->GetAudio(&output, &muted)); + ASSERT_FALSE(muted); + ASSERT_EQ(1u, output.num_channels_); + ASSERT_EQ(expected_samples_per_channel, output.samples_per_channel_); + if (output.speech_type_ == AudioFrame::kPLCCNG) { + plc_to_cng = true; + double sum_squared = 0; + const int16_t* output_data = output.data(); + for (size_t k = 0; + k < output.num_channels_ * output.samples_per_channel_; ++k) + sum_squared += output_data[k] * output_data[k]; + EXPECT_EQ(0, sum_squared); + } else { + EXPECT_EQ(AudioFrame::kPLC, output.speech_type_); + } + } + EXPECT_TRUE(plc_to_cng); // Just to be sure that PLC-to-CNG has occurred. + } +}; + +TEST_F(NetEqBgnTest, RunTest) { + CheckBgn(8000); + CheckBgn(16000); + CheckBgn(32000); +} + +TEST_F(NetEqDecodingTest, SequenceNumberWrap) { + // Start with a sequence number that will soon wrap. + std::set<uint16_t> drop_seq_numbers; // Don't drop any packets. + WrapTest(0xFFFF - 10, 0, drop_seq_numbers, true, false); +} + +TEST_F(NetEqDecodingTest, SequenceNumberWrapAndDrop) { + // Start with a sequence number that will soon wrap. + std::set<uint16_t> drop_seq_numbers; + drop_seq_numbers.insert(0xFFFF); + drop_seq_numbers.insert(0x0); + WrapTest(0xFFFF - 10, 0, drop_seq_numbers, true, false); +} + +TEST_F(NetEqDecodingTest, TimestampWrap) { + // Start with a timestamp that will soon wrap. + std::set<uint16_t> drop_seq_numbers; + WrapTest(0, 0xFFFFFFFF - 3000, drop_seq_numbers, false, true); +} + +TEST_F(NetEqDecodingTest, TimestampAndSequenceNumberWrap) { + // Start with a timestamp and a sequence number that will wrap at the same + // time. + std::set<uint16_t> drop_seq_numbers; + WrapTest(0xFFFF - 10, 0xFFFFFFFF - 5000, drop_seq_numbers, true, true); +} + +TEST_F(NetEqDecodingTest, DiscardDuplicateCng) { + uint16_t seq_no = 0; + uint32_t timestamp = 0; + const int kFrameSizeMs = 10; + const int kSampleRateKhz = 16; + const int kSamples = kFrameSizeMs * kSampleRateKhz; + const size_t kPayloadBytes = kSamples * 2; + + const int algorithmic_delay_samples = + std::max(algorithmic_delay_ms_ * kSampleRateKhz, 5 * kSampleRateKhz / 8); + // Insert three speech packets. Three are needed to get the frame length + // correct. + uint8_t payload[kPayloadBytes] = {0}; + RTPHeader rtp_info; + bool muted; + for (int i = 0; i < 3; ++i) { + PopulateRtpInfo(seq_no, timestamp, &rtp_info); + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload)); + ++seq_no; + timestamp += kSamples; + + // Pull audio once. + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + } + // Verify speech output. + EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_); + + // Insert same CNG packet twice. + const int kCngPeriodMs = 100; + const int kCngPeriodSamples = kCngPeriodMs * kSampleRateKhz; + size_t payload_len; + PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len); + // This is the first time this CNG packet is inserted. + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, rtc::ArrayView<const uint8_t>( + payload, payload_len))); + + // Pull audio once and make sure CNG is played. + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_); + EXPECT_FALSE( + neteq_->GetPlayoutTimestamp()); // Returns empty value during CNG. + EXPECT_EQ(timestamp - algorithmic_delay_samples, + out_frame_.timestamp_ + out_frame_.samples_per_channel_); + + // Insert the same CNG packet again. Note that at this point it is old, since + // we have already decoded the first copy of it. + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, rtc::ArrayView<const uint8_t>( + payload, payload_len))); + + // Pull audio until we have played `kCngPeriodMs` of CNG. Start at 10 ms since + // we have already pulled out CNG once. + for (int cng_time_ms = 10; cng_time_ms < kCngPeriodMs; cng_time_ms += 10) { + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_); + EXPECT_FALSE( + neteq_->GetPlayoutTimestamp()); // Returns empty value during CNG. + EXPECT_EQ(timestamp - algorithmic_delay_samples, + out_frame_.timestamp_ + out_frame_.samples_per_channel_); + } + + ++seq_no; + timestamp += kCngPeriodSamples; + uint32_t first_speech_timestamp = timestamp; + // Insert speech again. + for (int i = 0; i < 3; ++i) { + PopulateRtpInfo(seq_no, timestamp, &rtp_info); + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload)); + ++seq_no; + timestamp += kSamples; + } + + // Pull audio once and verify that the output is speech again. + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_); + absl::optional<uint32_t> playout_timestamp = neteq_->GetPlayoutTimestamp(); + ASSERT_TRUE(playout_timestamp); + EXPECT_EQ(first_speech_timestamp + kSamples - algorithmic_delay_samples, + *playout_timestamp); +} + +TEST_F(NetEqDecodingTest, CngFirst) { + uint16_t seq_no = 0; + uint32_t timestamp = 0; + const int kFrameSizeMs = 10; + const int kSampleRateKhz = 16; + const int kSamples = kFrameSizeMs * kSampleRateKhz; + const int kPayloadBytes = kSamples * 2; + const int kCngPeriodMs = 100; + const int kCngPeriodSamples = kCngPeriodMs * kSampleRateKhz; + size_t payload_len; + + uint8_t payload[kPayloadBytes] = {0}; + RTPHeader rtp_info; + + PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len); + ASSERT_EQ(NetEq::kOK, + neteq_->InsertPacket( + rtp_info, rtc::ArrayView<const uint8_t>(payload, payload_len))); + ++seq_no; + timestamp += kCngPeriodSamples; + + // Pull audio once and make sure CNG is played. + bool muted; + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_); + + // Insert some speech packets. + const uint32_t first_speech_timestamp = timestamp; + int timeout_counter = 0; + do { + ASSERT_LT(timeout_counter++, 20) << "Test timed out"; + PopulateRtpInfo(seq_no, timestamp, &rtp_info); + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload)); + ++seq_no; + timestamp += kSamples; + + // Pull audio once. + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + } while (!IsNewerTimestamp(out_frame_.timestamp_, first_speech_timestamp)); + // Verify speech output. + EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_); +} + +class NetEqDecodingTestWithMutedState : public NetEqDecodingTest { + public: + NetEqDecodingTestWithMutedState() : NetEqDecodingTest() { + config_.enable_muted_state = true; + } + + protected: + static constexpr size_t kSamples = 10 * 16; + static constexpr size_t kPayloadBytes = kSamples * 2; + + void InsertPacket(uint32_t rtp_timestamp) { + uint8_t payload[kPayloadBytes] = {0}; + RTPHeader rtp_info; + PopulateRtpInfo(0, rtp_timestamp, &rtp_info); + EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload)); + } + + void InsertCngPacket(uint32_t rtp_timestamp) { + uint8_t payload[kPayloadBytes] = {0}; + RTPHeader rtp_info; + size_t payload_len; + PopulateCng(0, rtp_timestamp, &rtp_info, payload, &payload_len); + EXPECT_EQ(NetEq::kOK, + neteq_->InsertPacket(rtp_info, rtc::ArrayView<const uint8_t>( + payload, payload_len))); + } + + bool GetAudioReturnMuted() { + bool muted; + EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + return muted; + } + + void GetAudioUntilMuted() { + while (!GetAudioReturnMuted()) { + ASSERT_LT(counter_++, 1000) << "Test timed out"; + } + } + + void GetAudioUntilNormal() { + bool muted = false; + while (out_frame_.speech_type_ != AudioFrame::kNormalSpeech) { + EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_LT(counter_++, 1000) << "Test timed out"; + } + EXPECT_FALSE(muted); + } + + int counter_ = 0; +}; + +// Verifies that NetEq goes in and out of muted state as expected. +TEST_F(NetEqDecodingTestWithMutedState, MutedState) { + // Insert one speech packet. + InsertPacket(0); + // Pull out audio once and expect it not to be muted. + EXPECT_FALSE(GetAudioReturnMuted()); + // Pull data until faded out. + GetAudioUntilMuted(); + EXPECT_TRUE(out_frame_.muted()); + + // Verify that output audio is not written during muted mode. Other parameters + // should be correct, though. + AudioFrame new_frame; + int16_t* frame_data = new_frame.mutable_data(); + for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) { + frame_data[i] = 17; + } + bool muted; + EXPECT_EQ(0, neteq_->GetAudio(&new_frame, &muted)); + EXPECT_TRUE(muted); + EXPECT_TRUE(out_frame_.muted()); + for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) { + EXPECT_EQ(17, frame_data[i]); + } + EXPECT_EQ(out_frame_.timestamp_ + out_frame_.samples_per_channel_, + new_frame.timestamp_); + EXPECT_EQ(out_frame_.samples_per_channel_, new_frame.samples_per_channel_); + EXPECT_EQ(out_frame_.sample_rate_hz_, new_frame.sample_rate_hz_); + EXPECT_EQ(out_frame_.num_channels_, new_frame.num_channels_); + EXPECT_EQ(out_frame_.speech_type_, new_frame.speech_type_); + EXPECT_EQ(out_frame_.vad_activity_, new_frame.vad_activity_); + + // Insert new data. Timestamp is corrected for the time elapsed since the last + // packet. Verify that normal operation resumes. + InsertPacket(kSamples * counter_); + GetAudioUntilNormal(); + EXPECT_FALSE(out_frame_.muted()); + + NetEqNetworkStatistics stats; + EXPECT_EQ(0, neteq_->NetworkStatistics(&stats)); + // NetEqNetworkStatistics::expand_rate tells the fraction of samples that were + // concealment samples, in Q14 (16384 = 100%) .The vast majority should be + // concealment samples in this test. + EXPECT_GT(stats.expand_rate, 14000); + // And, it should be greater than the speech_expand_rate. + EXPECT_GT(stats.expand_rate, stats.speech_expand_rate); +} + +// Verifies that NetEq goes out of muted state when given a delayed packet. +TEST_F(NetEqDecodingTestWithMutedState, MutedStateDelayedPacket) { + // Insert one speech packet. + InsertPacket(0); + // Pull out audio once and expect it not to be muted. + EXPECT_FALSE(GetAudioReturnMuted()); + // Pull data until faded out. + GetAudioUntilMuted(); + // Insert new data. Timestamp is only corrected for the half of the time + // elapsed since the last packet. That is, the new packet is delayed. Verify + // that normal operation resumes. + InsertPacket(kSamples * counter_ / 2); + GetAudioUntilNormal(); +} + +// Verifies that NetEq goes out of muted state when given a future packet. +TEST_F(NetEqDecodingTestWithMutedState, MutedStateFuturePacket) { + // Insert one speech packet. + InsertPacket(0); + // Pull out audio once and expect it not to be muted. + EXPECT_FALSE(GetAudioReturnMuted()); + // Pull data until faded out. + GetAudioUntilMuted(); + // Insert new data. Timestamp is over-corrected for the time elapsed since the + // last packet. That is, the new packet is too early. Verify that normal + // operation resumes. + InsertPacket(kSamples * counter_ * 2); + GetAudioUntilNormal(); +} + +// Verifies that NetEq goes out of muted state when given an old packet. +TEST_F(NetEqDecodingTestWithMutedState, MutedStateOldPacket) { + // Insert one speech packet. + InsertPacket(0); + // Pull out audio once and expect it not to be muted. + EXPECT_FALSE(GetAudioReturnMuted()); + // Pull data until faded out. + GetAudioUntilMuted(); + + EXPECT_NE(AudioFrame::kNormalSpeech, out_frame_.speech_type_); + // Insert a few packets which are older than the first packet. + for (int i = 0; i < 5; ++i) { + InsertPacket(kSamples * (i - 1000)); + } + EXPECT_FALSE(GetAudioReturnMuted()); + EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_); +} + +// Verifies that NetEq doesn't enter muted state when CNG mode is active and the +// packet stream is suspended for a long time. +TEST_F(NetEqDecodingTestWithMutedState, DoNotMuteExtendedCngWithoutPackets) { + // Insert one CNG packet. + InsertCngPacket(0); + + // Pull 10 seconds of audio (10 ms audio generated per lap). + for (int i = 0; i < 1000; ++i) { + bool muted; + EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_FALSE(muted); + } + EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_); +} + +// Verifies that NetEq goes back to normal after a long CNG period with the +// packet stream suspended. +TEST_F(NetEqDecodingTestWithMutedState, RecoverAfterExtendedCngWithoutPackets) { + // Insert one CNG packet. + InsertCngPacket(0); + + // Pull 10 seconds of audio (10 ms audio generated per lap). + for (int i = 0; i < 1000; ++i) { + bool muted; + EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + } + + // Insert new data. Timestamp is corrected for the time elapsed since the last + // packet. Verify that normal operation resumes. + InsertPacket(kSamples * counter_); + GetAudioUntilNormal(); +} + +namespace { +::testing::AssertionResult AudioFramesEqualExceptData(const AudioFrame& a, + const AudioFrame& b) { + if (a.timestamp_ != b.timestamp_) + return ::testing::AssertionFailure() << "timestamp_ diff (" << a.timestamp_ + << " != " << b.timestamp_ << ")"; + if (a.sample_rate_hz_ != b.sample_rate_hz_) + return ::testing::AssertionFailure() + << "sample_rate_hz_ diff (" << a.sample_rate_hz_ + << " != " << b.sample_rate_hz_ << ")"; + if (a.samples_per_channel_ != b.samples_per_channel_) + return ::testing::AssertionFailure() + << "samples_per_channel_ diff (" << a.samples_per_channel_ + << " != " << b.samples_per_channel_ << ")"; + if (a.num_channels_ != b.num_channels_) + return ::testing::AssertionFailure() + << "num_channels_ diff (" << a.num_channels_ + << " != " << b.num_channels_ << ")"; + if (a.speech_type_ != b.speech_type_) + return ::testing::AssertionFailure() + << "speech_type_ diff (" << a.speech_type_ + << " != " << b.speech_type_ << ")"; + if (a.vad_activity_ != b.vad_activity_) + return ::testing::AssertionFailure() + << "vad_activity_ diff (" << a.vad_activity_ + << " != " << b.vad_activity_ << ")"; + return ::testing::AssertionSuccess(); +} + +::testing::AssertionResult AudioFramesEqual(const AudioFrame& a, + const AudioFrame& b) { + ::testing::AssertionResult res = AudioFramesEqualExceptData(a, b); + if (!res) + return res; + if (memcmp(a.data(), b.data(), + a.samples_per_channel_ * a.num_channels_ * sizeof(*a.data())) != + 0) { + return ::testing::AssertionFailure() << "data_ diff"; + } + return ::testing::AssertionSuccess(); +} + +} // namespace + +TEST_F(NetEqDecodingTestTwoInstances, CompareMutedStateOnOff) { + ASSERT_FALSE(config_.enable_muted_state); + config2_.enable_muted_state = true; + CreateSecondInstance(); + + // Insert one speech packet into both NetEqs. + const size_t kSamples = 10 * 16; + const size_t kPayloadBytes = kSamples * 2; + uint8_t payload[kPayloadBytes] = {0}; + RTPHeader rtp_info; + PopulateRtpInfo(0, 0, &rtp_info); + EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload)); + EXPECT_EQ(0, neteq2_->InsertPacket(rtp_info, payload)); + + AudioFrame out_frame1, out_frame2; + bool muted; + for (int i = 0; i < 1000; ++i) { + rtc::StringBuilder ss; + ss << "i = " << i; + SCOPED_TRACE(ss.str()); // Print out the loop iterator on failure. + EXPECT_EQ(0, neteq_->GetAudio(&out_frame1, &muted)); + EXPECT_FALSE(muted); + EXPECT_EQ(0, neteq2_->GetAudio(&out_frame2, &muted)); + if (muted) { + EXPECT_TRUE(AudioFramesEqualExceptData(out_frame1, out_frame2)); + } else { + EXPECT_TRUE(AudioFramesEqual(out_frame1, out_frame2)); + } + } + EXPECT_TRUE(muted); + + // Insert new data. Timestamp is corrected for the time elapsed since the last + // packet. + for (int i = 0; i < 5; ++i) { + PopulateRtpInfo(0, kSamples * 1000 + kSamples * i, &rtp_info); + EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload)); + EXPECT_EQ(0, neteq2_->InsertPacket(rtp_info, payload)); + } + + int counter = 0; + while (out_frame1.speech_type_ != AudioFrame::kNormalSpeech) { + ASSERT_LT(counter++, 1000) << "Test timed out"; + rtc::StringBuilder ss; + ss << "counter = " << counter; + SCOPED_TRACE(ss.str()); // Print out the loop iterator on failure. + EXPECT_EQ(0, neteq_->GetAudio(&out_frame1, &muted)); + EXPECT_FALSE(muted); + EXPECT_EQ(0, neteq2_->GetAudio(&out_frame2, &muted)); + if (muted) { + EXPECT_TRUE(AudioFramesEqualExceptData(out_frame1, out_frame2)); + } else { + EXPECT_TRUE(AudioFramesEqual(out_frame1, out_frame2)); + } + } + EXPECT_FALSE(muted); +} + +TEST_F(NetEqDecodingTest, TestConcealmentEvents) { + const int kNumConcealmentEvents = 19; + const size_t kSamples = 10 * 16; + const size_t kPayloadBytes = kSamples * 2; + int seq_no = 0; + RTPHeader rtp_info; + rtp_info.ssrc = 0x1234; // Just an arbitrary SSRC. + rtp_info.payloadType = 94; // PCM16b WB codec. + rtp_info.markerBit = 0; + const uint8_t payload[kPayloadBytes] = {0}; + bool muted; + + for (int i = 0; i < kNumConcealmentEvents; i++) { + // Insert some packets of 10 ms size. + for (int j = 0; j < 10; j++) { + rtp_info.sequenceNumber = seq_no++; + rtp_info.timestamp = rtp_info.sequenceNumber * kSamples; + neteq_->InsertPacket(rtp_info, payload); + neteq_->GetAudio(&out_frame_, &muted); + } + + // Lose a number of packets. + int num_lost = 1 + i; + for (int j = 0; j < num_lost; j++) { + seq_no++; + neteq_->GetAudio(&out_frame_, &muted); + } + } + + // Check number of concealment events. + NetEqLifetimeStatistics stats = neteq_->GetLifetimeStatistics(); + EXPECT_EQ(kNumConcealmentEvents, static_cast<int>(stats.concealment_events)); +} + +// Test that the jitter buffer delay stat is computed correctly. +void NetEqDecodingTestFaxMode::TestJitterBufferDelay(bool apply_packet_loss) { + const int kNumPackets = 10; + const int kDelayInNumPackets = 2; + const int kPacketLenMs = 10; // All packets are of 10 ms size. + const size_t kSamples = kPacketLenMs * 16; + const size_t kPayloadBytes = kSamples * 2; + RTPHeader rtp_info; + rtp_info.ssrc = 0x1234; // Just an arbitrary SSRC. + rtp_info.payloadType = 94; // PCM16b WB codec. + rtp_info.markerBit = 0; + const uint8_t payload[kPayloadBytes] = {0}; + bool muted; + int packets_sent = 0; + int packets_received = 0; + int expected_delay = 0; + int expected_target_delay = 0; + uint64_t expected_emitted_count = 0; + while (packets_received < kNumPackets) { + // Insert packet. + if (packets_sent < kNumPackets) { + rtp_info.sequenceNumber = packets_sent++; + rtp_info.timestamp = rtp_info.sequenceNumber * kSamples; + neteq_->InsertPacket(rtp_info, payload); + } + + // Get packet. + if (packets_sent > kDelayInNumPackets) { + neteq_->GetAudio(&out_frame_, &muted); + packets_received++; + + // The delay reported by the jitter buffer never exceeds + // the number of samples previously fetched with GetAudio + // (hence the min()). + int packets_delay = std::min(packets_received, kDelayInNumPackets + 1); + + // The increase of the expected delay is the product of + // the current delay of the jitter buffer in ms * the + // number of samples that are sent for play out. + int current_delay_ms = packets_delay * kPacketLenMs; + expected_delay += current_delay_ms * kSamples; + expected_target_delay += neteq_->TargetDelayMs() * kSamples; + expected_emitted_count += kSamples; + } + } + + if (apply_packet_loss) { + // Extra call to GetAudio to cause concealment. + neteq_->GetAudio(&out_frame_, &muted); + } + + // Check jitter buffer delay. + NetEqLifetimeStatistics stats = neteq_->GetLifetimeStatistics(); + EXPECT_EQ(expected_delay, + rtc::checked_cast<int>(stats.jitter_buffer_delay_ms)); + EXPECT_EQ(expected_emitted_count, stats.jitter_buffer_emitted_count); + EXPECT_EQ(expected_target_delay, + rtc::checked_cast<int>(stats.jitter_buffer_target_delay_ms)); +} + +TEST_F(NetEqDecodingTestFaxMode, TestJitterBufferDelayWithoutLoss) { + TestJitterBufferDelay(false); +} + +TEST_F(NetEqDecodingTestFaxMode, TestJitterBufferDelayWithLoss) { + TestJitterBufferDelay(true); +} + +TEST_F(NetEqDecodingTestFaxMode, TestJitterBufferDelayWithAcceleration) { + const int kPacketLenMs = 10; // All packets are of 10 ms size. + const size_t kSamples = kPacketLenMs * 16; + const size_t kPayloadBytes = kSamples * 2; + RTPHeader rtp_info; + rtp_info.ssrc = 0x1234; // Just an arbitrary SSRC. + rtp_info.payloadType = 94; // PCM16b WB codec. + rtp_info.markerBit = 0; + const uint8_t payload[kPayloadBytes] = {0}; + + int expected_target_delay = neteq_->TargetDelayMs() * kSamples; + neteq_->InsertPacket(rtp_info, payload); + + bool muted; + neteq_->GetAudio(&out_frame_, &muted); + + rtp_info.sequenceNumber += 1; + rtp_info.timestamp += kSamples; + neteq_->InsertPacket(rtp_info, payload); + rtp_info.sequenceNumber += 1; + rtp_info.timestamp += kSamples; + neteq_->InsertPacket(rtp_info, payload); + + expected_target_delay += neteq_->TargetDelayMs() * 2 * kSamples; + // We have two packets in the buffer and kAccelerate operation will + // extract 20 ms of data. + neteq_->GetAudio(&out_frame_, &muted, nullptr, NetEq::Operation::kAccelerate); + + // Check jitter buffer delay. + NetEqLifetimeStatistics stats = neteq_->GetLifetimeStatistics(); + EXPECT_EQ(10 * kSamples * 3, stats.jitter_buffer_delay_ms); + EXPECT_EQ(kSamples * 3, stats.jitter_buffer_emitted_count); + EXPECT_EQ(expected_target_delay, + rtc::checked_cast<int>(stats.jitter_buffer_target_delay_ms)); +} + +namespace test { +TEST(NetEqNoTimeStretchingMode, RunTest) { + NetEq::Config config; + config.for_test_no_time_stretching = true; + auto codecs = NetEqTest::StandardDecoderMap(); + NetEqPacketSourceInput::RtpHeaderExtensionMap rtp_ext_map = { + {1, kRtpExtensionAudioLevel}, + {3, kRtpExtensionAbsoluteSendTime}, + {5, kRtpExtensionTransportSequenceNumber}, + {7, kRtpExtensionVideoContentType}, + {8, kRtpExtensionVideoTiming}}; + std::unique_ptr<NetEqInput> input(new NetEqRtpDumpInput( + webrtc::test::ResourcePath("audio_coding/neteq_universal_new", "rtp"), + rtp_ext_map, absl::nullopt /*No SSRC filter*/)); + std::unique_ptr<TimeLimitedNetEqInput> input_time_limit( + new TimeLimitedNetEqInput(std::move(input), 20000)); + std::unique_ptr<AudioSink> output(new VoidAudioSink); + NetEqTest::Callbacks callbacks; + NetEqTest test(config, CreateBuiltinAudioDecoderFactory(), codecs, + /*text_log=*/nullptr, /*neteq_factory=*/nullptr, + /*input=*/std::move(input_time_limit), std::move(output), + callbacks); + test.Run(); + const auto stats = test.SimulationStats(); + EXPECT_EQ(0, stats.accelerate_rate); + EXPECT_EQ(0, stats.preemptive_rate); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/neteq_unittest.proto b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_unittest.proto new file mode 100644 index 0000000000..b4b4253c3d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_unittest.proto @@ -0,0 +1,31 @@ +syntax = "proto2"; +option optimize_for = LITE_RUNTIME; +package webrtc.neteq_unittest; + +message NetEqNetworkStatistics { + // Next field number 18. + optional uint32 current_buffer_size_ms = 1; + optional uint32 preferred_buffer_size_ms = 2; + optional uint32 jitter_peaks_found = 3; + reserved 4; // Was packet_loss_rate. + reserved 5; // Was packet_discard_rate. + optional uint32 expand_rate = 6; + optional uint32 speech_expand_rate = 7; + optional uint32 preemptive_rate = 8; + optional uint32 accelerate_rate = 9; + optional uint32 secondary_decoded_rate = 10; + optional uint32 secondary_discarded_rate = 17; + optional int32 clockdrift_ppm = 11; + reserved 12; // Was added_zero_samples. + optional int32 mean_waiting_time_ms = 13; + optional int32 median_waiting_time_ms = 14; + optional int32 min_waiting_time_ms = 15; + optional int32 max_waiting_time_ms = 16; +} + +message RtcpStatistics { + optional uint32 fraction_lost = 1; + optional uint32 cumulative_lost = 2; + optional uint32 extended_max_sequence_number = 3; + optional uint32 jitter = 4; +} diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/normal.cc b/third_party/libwebrtc/modules/audio_coding/neteq/normal.cc new file mode 100644 index 0000000000..461ee7fa4a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/normal.cc @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/normal.h" + +#include <string.h> // memset, memcpy + +#include <algorithm> // min + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "modules/audio_coding/neteq/background_noise.h" +#include "modules/audio_coding/neteq/decoder_database.h" +#include "modules/audio_coding/neteq/expand.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +int Normal::Process(const int16_t* input, + size_t length, + NetEq::Mode last_mode, + AudioMultiVector* output) { + if (length == 0) { + // Nothing to process. + output->Clear(); + return static_cast<int>(length); + } + + RTC_DCHECK(output->Empty()); + // Output should be empty at this point. + if (length % output->Channels() != 0) { + // The length does not match the number of channels. + output->Clear(); + return 0; + } + output->PushBackInterleaved(rtc::ArrayView<const int16_t>(input, length)); + + const int fs_mult = fs_hz_ / 8000; + RTC_DCHECK_GT(fs_mult, 0); + // fs_shift = log2(fs_mult), rounded down. + // Note that `fs_shift` is not "exact" for 48 kHz. + // TODO(hlundin): Investigate this further. + const int fs_shift = 30 - WebRtcSpl_NormW32(fs_mult); + + // If last call resulted in a CodedPlc we don't need to do cross-fading but we + // need to report the end of the interruption once we are back to normal + // operation. + if (last_mode == NetEq::Mode::kCodecPlc) { + statistics_->EndExpandEvent(fs_hz_); + } + + // Check if last RecOut call resulted in an Expand. If so, we have to take + // care of some cross-fading and unmuting. + if (last_mode == NetEq::Mode::kExpand) { + // Generate interpolation data using Expand. + // First, set Expand parameters to appropriate values. + expand_->SetParametersForNormalAfterExpand(); + + // Call Expand. + AudioMultiVector expanded(output->Channels()); + expand_->Process(&expanded); + expand_->Reset(); + + size_t length_per_channel = length / output->Channels(); + std::unique_ptr<int16_t[]> signal(new int16_t[length_per_channel]); + for (size_t channel_ix = 0; channel_ix < output->Channels(); ++channel_ix) { + // Set muting factor to the same as expand muting factor. + int16_t mute_factor = expand_->MuteFactor(channel_ix); + + (*output)[channel_ix].CopyTo(length_per_channel, 0, signal.get()); + + // Find largest absolute value in new data. + int16_t decoded_max = + WebRtcSpl_MaxAbsValueW16(signal.get(), length_per_channel); + // Adjust muting factor if needed (to BGN level). + size_t energy_length = + std::min(static_cast<size_t>(fs_mult * 64), length_per_channel); + int scaling = 6 + fs_shift - WebRtcSpl_NormW32(decoded_max * decoded_max); + scaling = std::max(scaling, 0); // `scaling` should always be >= 0. + int32_t energy = WebRtcSpl_DotProductWithScale(signal.get(), signal.get(), + energy_length, scaling); + int32_t scaled_energy_length = + static_cast<int32_t>(energy_length >> scaling); + if (scaled_energy_length > 0) { + energy = energy / scaled_energy_length; + } else { + energy = 0; + } + + int local_mute_factor = 16384; // 1.0 in Q14. + if ((energy != 0) && (energy > background_noise_.Energy(channel_ix))) { + // Normalize new frame energy to 15 bits. + scaling = WebRtcSpl_NormW32(energy) - 16; + // We want background_noise_.energy() / energy in Q14. + int32_t bgn_energy = WEBRTC_SPL_SHIFT_W32( + background_noise_.Energy(channel_ix), scaling + 14); + int16_t energy_scaled = + static_cast<int16_t>(WEBRTC_SPL_SHIFT_W32(energy, scaling)); + int32_t ratio = WebRtcSpl_DivW32W16(bgn_energy, energy_scaled); + local_mute_factor = + std::min(local_mute_factor, WebRtcSpl_SqrtFloor(ratio << 14)); + } + mute_factor = std::max<int16_t>(mute_factor, local_mute_factor); + RTC_DCHECK_LE(mute_factor, 16384); + RTC_DCHECK_GE(mute_factor, 0); + + // If muted increase by 0.64 for every 20 ms (NB/WB 0.0040/0.0020 in Q14), + // or as fast as it takes to come back to full gain within the frame + // length. + const int back_to_fullscale_inc = + static_cast<int>((16384 - mute_factor) / length_per_channel); + const int increment = std::max(64 / fs_mult, back_to_fullscale_inc); + for (size_t i = 0; i < length_per_channel; i++) { + // Scale with mute factor. + RTC_DCHECK_LT(channel_ix, output->Channels()); + RTC_DCHECK_LT(i, output->Size()); + int32_t scaled_signal = (*output)[channel_ix][i] * mute_factor; + // Shift 14 with proper rounding. + (*output)[channel_ix][i] = + static_cast<int16_t>((scaled_signal + 8192) >> 14); + // Increase mute_factor towards 16384. + mute_factor = + static_cast<int16_t>(std::min(mute_factor + increment, 16384)); + } + + // Interpolate the expanded data into the new vector. + // (NB/WB/SWB32/SWB48 8/16/32/48 samples.) + size_t win_length = samples_per_ms_; + int16_t win_slope_Q14 = default_win_slope_Q14_; + RTC_DCHECK_LT(channel_ix, output->Channels()); + if (win_length > output->Size()) { + win_length = output->Size(); + win_slope_Q14 = (1 << 14) / static_cast<int16_t>(win_length); + } + int16_t win_up_Q14 = 0; + for (size_t i = 0; i < win_length; i++) { + win_up_Q14 += win_slope_Q14; + (*output)[channel_ix][i] = + (win_up_Q14 * (*output)[channel_ix][i] + + ((1 << 14) - win_up_Q14) * expanded[channel_ix][i] + (1 << 13)) >> + 14; + } + RTC_DCHECK_GT(win_up_Q14, + (1 << 14) - 32); // Worst case rouding is a length of 34 + } + } else if (last_mode == NetEq::Mode::kRfc3389Cng) { + RTC_DCHECK_EQ(output->Channels(), 1); // Not adapted for multi-channel yet. + static const size_t kCngLength = 48; + RTC_DCHECK_LE(8 * fs_mult, kCngLength); + int16_t cng_output[kCngLength]; + ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder(); + + if (cng_decoder) { + // Generate long enough for 48kHz. + if (!cng_decoder->Generate(cng_output, false)) { + // Error returned; set return vector to all zeros. + memset(cng_output, 0, sizeof(cng_output)); + } + } else { + // If no CNG instance is defined, just copy from the decoded data. + // (This will result in interpolating the decoded with itself.) + (*output)[0].CopyTo(fs_mult * 8, 0, cng_output); + } + // Interpolate the CNG into the new vector. + // (NB/WB/SWB32/SWB48 8/16/32/48 samples.) + size_t win_length = samples_per_ms_; + int16_t win_slope_Q14 = default_win_slope_Q14_; + if (win_length > kCngLength) { + win_length = kCngLength; + win_slope_Q14 = (1 << 14) / static_cast<int16_t>(win_length); + } + int16_t win_up_Q14 = 0; + for (size_t i = 0; i < win_length; i++) { + win_up_Q14 += win_slope_Q14; + (*output)[0][i] = + (win_up_Q14 * (*output)[0][i] + + ((1 << 14) - win_up_Q14) * cng_output[i] + (1 << 13)) >> + 14; + } + RTC_DCHECK_GT(win_up_Q14, + (1 << 14) - 32); // Worst case rouding is a length of 34 + } + + return static_cast<int>(length); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/normal.h b/third_party/libwebrtc/modules/audio_coding/neteq/normal.h new file mode 100644 index 0000000000..772293b605 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/normal.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_NORMAL_H_ +#define MODULES_AUDIO_CODING_NETEQ_NORMAL_H_ + +#include <stdint.h> +#include <string.h> // Access to size_t. + +#include "api/neteq/neteq.h" +#include "modules/audio_coding/neteq/statistics_calculator.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +// Forward declarations. +class AudioMultiVector; +class BackgroundNoise; +class DecoderDatabase; +class Expand; + +// This class provides the "Normal" DSP operation, that is performed when +// there is no data loss, no need to stretch the timing of the signal, and +// no other "special circumstances" are at hand. +class Normal { + public: + Normal(int fs_hz, + DecoderDatabase* decoder_database, + const BackgroundNoise& background_noise, + Expand* expand, + StatisticsCalculator* statistics) + : fs_hz_(fs_hz), + decoder_database_(decoder_database), + background_noise_(background_noise), + expand_(expand), + samples_per_ms_(rtc::CheckedDivExact(fs_hz_, 1000)), + default_win_slope_Q14_( + rtc::dchecked_cast<uint16_t>((1 << 14) / samples_per_ms_)), + statistics_(statistics) {} + + virtual ~Normal() {} + + Normal(const Normal&) = delete; + Normal& operator=(const Normal&) = delete; + + // Performs the "Normal" operation. The decoder data is supplied in `input`, + // having `length` samples in total for all channels (interleaved). The + // result is written to `output`. The number of channels allocated in + // `output` defines the number of channels that will be used when + // de-interleaving `input`. `last_mode` contains the mode used in the previous + // GetAudio call (i.e., not the current one). + int Process(const int16_t* input, + size_t length, + NetEq::Mode last_mode, + AudioMultiVector* output); + + private: + int fs_hz_; + DecoderDatabase* decoder_database_; + const BackgroundNoise& background_noise_; + Expand* expand_; + const size_t samples_per_ms_; + const int16_t default_win_slope_Q14_; + StatisticsCalculator* const statistics_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_NORMAL_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/normal_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/normal_unittest.cc new file mode 100644 index 0000000000..4554d79576 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/normal_unittest.cc @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for Normal class. + +#include "modules/audio_coding/neteq/normal.h" + +#include <memory> +#include <vector> + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "modules/audio_coding/neteq/background_noise.h" +#include "modules/audio_coding/neteq/expand.h" +#include "modules/audio_coding/neteq/mock/mock_decoder_database.h" +#include "modules/audio_coding/neteq/mock/mock_expand.h" +#include "modules/audio_coding/neteq/random_vector.h" +#include "modules/audio_coding/neteq/statistics_calculator.h" +#include "modules/audio_coding/neteq/sync_buffer.h" +#include "test/gtest.h" + +using ::testing::_; +using ::testing::Invoke; + +namespace webrtc { + +namespace { + +int ExpandProcess120ms(AudioMultiVector* output) { + AudioMultiVector dummy_audio(1, 11520u); + dummy_audio.CopyTo(output); + return 0; +} + +} // namespace + +TEST(Normal, CreateAndDestroy) { + MockDecoderDatabase db; + int fs = 8000; + size_t channels = 1; + BackgroundNoise bgn(channels); + SyncBuffer sync_buffer(1, 1000); + RandomVector random_vector; + StatisticsCalculator statistics; + Expand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs, channels); + Normal normal(fs, &db, bgn, &expand, &statistics); + EXPECT_CALL(db, Die()); // Called when `db` goes out of scope. +} + +TEST(Normal, AvoidDivideByZero) { + MockDecoderDatabase db; + int fs = 8000; + size_t channels = 1; + BackgroundNoise bgn(channels); + SyncBuffer sync_buffer(1, 1000); + RandomVector random_vector; + StatisticsCalculator statistics; + MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs, + channels); + Normal normal(fs, &db, bgn, &expand, &statistics); + + int16_t input[1000] = {0}; + AudioMultiVector output(channels); + + // Zero input length. + EXPECT_EQ(0, normal.Process(input, 0, NetEq::Mode::kExpand, &output)); + EXPECT_EQ(0u, output.Size()); + + // Try to make energy_length >> scaling = 0; + EXPECT_CALL(expand, SetParametersForNormalAfterExpand()); + EXPECT_CALL(expand, Process(_)); + EXPECT_CALL(expand, Reset()); + // If input_size_samples < 64, then energy_length in Normal::Process() will + // be equal to input_size_samples. Since the input is all zeros, decoded_max + // will be zero, and scaling will be >= 6. Thus, energy_length >> scaling = 0, + // and using this as a denominator would lead to problems. + int input_size_samples = 63; + EXPECT_EQ(input_size_samples, normal.Process(input, input_size_samples, + NetEq::Mode::kExpand, &output)); + + EXPECT_CALL(db, Die()); // Called when `db` goes out of scope. + EXPECT_CALL(expand, Die()); // Called when `expand` goes out of scope. +} + +TEST(Normal, InputLengthAndChannelsDoNotMatch) { + MockDecoderDatabase db; + int fs = 8000; + size_t channels = 2; + BackgroundNoise bgn(channels); + SyncBuffer sync_buffer(channels, 1000); + RandomVector random_vector; + StatisticsCalculator statistics; + MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs, + channels); + Normal normal(fs, &db, bgn, &expand, &statistics); + + int16_t input[1000] = {0}; + AudioMultiVector output(channels); + + // Let the number of samples be one sample less than 80 samples per channel. + size_t input_len = 80 * channels - 1; + EXPECT_EQ(0, normal.Process(input, input_len, NetEq::Mode::kExpand, &output)); + EXPECT_EQ(0u, output.Size()); + + EXPECT_CALL(db, Die()); // Called when `db` goes out of scope. + EXPECT_CALL(expand, Die()); // Called when `expand` goes out of scope. +} + +TEST(Normal, LastModeExpand120msPacket) { + MockDecoderDatabase db; + const int kFs = 48000; + const size_t kPacketsizeBytes = 11520u; + const size_t kChannels = 1; + BackgroundNoise bgn(kChannels); + SyncBuffer sync_buffer(kChannels, 1000); + RandomVector random_vector; + StatisticsCalculator statistics; + MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, kFs, + kChannels); + Normal normal(kFs, &db, bgn, &expand, &statistics); + + int16_t input[kPacketsizeBytes] = {0}; + AudioMultiVector output(kChannels); + + EXPECT_CALL(expand, SetParametersForNormalAfterExpand()); + EXPECT_CALL(expand, Process(_)).WillOnce(Invoke(ExpandProcess120ms)); + EXPECT_CALL(expand, Reset()); + EXPECT_EQ( + static_cast<int>(kPacketsizeBytes), + normal.Process(input, kPacketsizeBytes, NetEq::Mode::kExpand, &output)); + + EXPECT_EQ(kPacketsizeBytes, output.Size()); + + EXPECT_CALL(db, Die()); // Called when `db` goes out of scope. + EXPECT_CALL(expand, Die()); // Called when `expand` goes out of scope. +} + +// TODO(hlundin): Write more tests. + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/packet.cc b/third_party/libwebrtc/modules/audio_coding/neteq/packet.cc new file mode 100644 index 0000000000..333f161229 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/packet.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/packet.h" + +namespace webrtc { + +Packet::Packet() = default; +Packet::Packet(Packet&& b) = default; + +Packet::~Packet() = default; + +Packet& Packet::operator=(Packet&& b) = default; + +Packet Packet::Clone() const { + RTC_CHECK(!frame); + + Packet clone; + clone.timestamp = timestamp; + clone.sequence_number = sequence_number; + clone.payload_type = payload_type; + clone.payload.SetData(payload.data(), payload.size()); + clone.priority = priority; + clone.packet_info = packet_info; + + return clone; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/packet.h b/third_party/libwebrtc/modules/audio_coding/neteq/packet.h new file mode 100644 index 0000000000..0c6f204edb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/packet.h @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_PACKET_H_ +#define MODULES_AUDIO_CODING_NETEQ_PACKET_H_ + +#include <stdint.h> + +#include <list> +#include <memory> + +#include "api/audio_codecs/audio_decoder.h" +#include "api/neteq/tick_timer.h" +#include "api/rtp_packet_info.h" +#include "rtc_base/buffer.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +// Struct for holding RTP packets. +struct Packet { + struct Priority { + Priority() : codec_level(0), red_level(0) {} + Priority(int codec_level, int red_level) + : codec_level(codec_level), red_level(red_level) { + CheckInvariant(); + } + + int codec_level; + int red_level; + + // Priorities are sorted low-to-high, first on the level the codec + // prioritizes it, then on the level of RED packet it is; i.e. if it is a + // primary or secondary payload of a RED packet. For example: with Opus, an + // Fec packet (which the decoder prioritizes lower than a regular packet) + // will not be used if there is _any_ RED payload for the same + // timeframe. The highest priority packet will have levels {0, 0}. Negative + // priorities are not allowed. + bool operator<(const Priority& b) const { + CheckInvariant(); + b.CheckInvariant(); + if (codec_level == b.codec_level) + return red_level < b.red_level; + + return codec_level < b.codec_level; + } + bool operator==(const Priority& b) const { + CheckInvariant(); + b.CheckInvariant(); + return codec_level == b.codec_level && red_level == b.red_level; + } + bool operator!=(const Priority& b) const { return !(*this == b); } + bool operator>(const Priority& b) const { return b < *this; } + bool operator<=(const Priority& b) const { return !(b > *this); } + bool operator>=(const Priority& b) const { return !(b < *this); } + + private: + void CheckInvariant() const { + RTC_DCHECK_GE(codec_level, 0); + RTC_DCHECK_GE(red_level, 0); + } + }; + + uint32_t timestamp; + uint16_t sequence_number; + uint8_t payload_type; + // Datagram excluding RTP header and header extension. + rtc::Buffer payload; + Priority priority; + RtpPacketInfo packet_info; + std::unique_ptr<TickTimer::Stopwatch> waiting_time; + std::unique_ptr<AudioDecoder::EncodedAudioFrame> frame; + + Packet(); + Packet(Packet&& b); + ~Packet(); + + // Packets should generally be moved around but sometimes it's useful to make + // a copy, for example for testing purposes. NOTE: Will only work for + // un-parsed packets, i.e. `frame` must be unset. The payload will, however, + // be copied. `waiting_time` will also not be copied. + Packet Clone() const; + + Packet& operator=(Packet&& b); + + // Comparison operators. Establish a packet ordering based on (1) timestamp, + // (2) sequence number and (3) redundancy. + // Timestamp and sequence numbers are compared taking wrap-around into + // account. For two packets with the same sequence number and timestamp a + // primary payload is considered "smaller" than a secondary. + bool operator==(const Packet& rhs) const { + return (this->timestamp == rhs.timestamp && + this->sequence_number == rhs.sequence_number && + this->priority == rhs.priority); + } + bool operator!=(const Packet& rhs) const { return !operator==(rhs); } + bool operator<(const Packet& rhs) const { + if (this->timestamp == rhs.timestamp) { + if (this->sequence_number == rhs.sequence_number) { + // Timestamp and sequence numbers are identical - deem the left hand + // side to be "smaller" (i.e., "earlier") if it has higher priority. + return this->priority < rhs.priority; + } + return (static_cast<uint16_t>(rhs.sequence_number - + this->sequence_number) < 0xFFFF / 2); + } + return (static_cast<uint32_t>(rhs.timestamp - this->timestamp) < + 0xFFFFFFFF / 2); + } + bool operator>(const Packet& rhs) const { return rhs.operator<(*this); } + bool operator<=(const Packet& rhs) const { return !operator>(rhs); } + bool operator>=(const Packet& rhs) const { return !operator<(rhs); } + + bool empty() const { return !frame && payload.empty(); } +}; + +// A list of packets. +typedef std::list<Packet> PacketList; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_PACKET_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.cc b/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.cc new file mode 100644 index 0000000000..7196a6e393 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.cc @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/packet_arrival_history.h" + +#include <algorithm> + +#include "api/neteq/tick_timer.h" +#include "modules/include/module_common_types_public.h" + +namespace webrtc { + +PacketArrivalHistory::PacketArrivalHistory(int window_size_ms) + : window_size_ms_(window_size_ms) {} + +void PacketArrivalHistory::Insert(uint32_t rtp_timestamp, + int64_t arrival_time_ms) { + RTC_DCHECK(sample_rate_khz_ > 0); + int64_t unwrapped_rtp_timestamp = timestamp_unwrapper_.Unwrap(rtp_timestamp); + if (!newest_rtp_timestamp_ || + unwrapped_rtp_timestamp > *newest_rtp_timestamp_) { + newest_rtp_timestamp_ = unwrapped_rtp_timestamp; + } + history_.emplace_back(unwrapped_rtp_timestamp / sample_rate_khz_, + arrival_time_ms); + MaybeUpdateCachedArrivals(history_.back()); + while (history_.front().rtp_timestamp_ms + window_size_ms_ < + unwrapped_rtp_timestamp / sample_rate_khz_) { + if (&history_.front() == min_packet_arrival_) { + min_packet_arrival_ = nullptr; + } + if (&history_.front() == max_packet_arrival_) { + max_packet_arrival_ = nullptr; + } + history_.pop_front(); + } + if (!min_packet_arrival_ || !max_packet_arrival_) { + for (const PacketArrival& packet : history_) { + MaybeUpdateCachedArrivals(packet); + } + } +} + +void PacketArrivalHistory::MaybeUpdateCachedArrivals( + const PacketArrival& packet_arrival) { + if (!min_packet_arrival_ || packet_arrival <= *min_packet_arrival_) { + min_packet_arrival_ = &packet_arrival; + } + if (!max_packet_arrival_ || packet_arrival >= *max_packet_arrival_) { + max_packet_arrival_ = &packet_arrival; + } +} + +void PacketArrivalHistory::Reset() { + history_.clear(); + min_packet_arrival_ = nullptr; + max_packet_arrival_ = nullptr; + timestamp_unwrapper_ = TimestampUnwrapper(); + newest_rtp_timestamp_ = absl::nullopt; +} + +int PacketArrivalHistory::GetDelayMs(uint32_t rtp_timestamp, + int64_t time_ms) const { + RTC_DCHECK(sample_rate_khz_ > 0); + int64_t unwrapped_rtp_timestamp_ms = + timestamp_unwrapper_.UnwrapWithoutUpdate(rtp_timestamp) / + sample_rate_khz_; + PacketArrival packet(unwrapped_rtp_timestamp_ms, time_ms); + return GetPacketArrivalDelayMs(packet); +} + +int PacketArrivalHistory::GetMaxDelayMs() const { + if (!max_packet_arrival_) { + return 0; + } + return GetPacketArrivalDelayMs(*max_packet_arrival_); +} + +bool PacketArrivalHistory::IsNewestRtpTimestamp(uint32_t rtp_timestamp) const { + if (!newest_rtp_timestamp_) { + return false; + } + int64_t unwrapped_rtp_timestamp = + timestamp_unwrapper_.UnwrapWithoutUpdate(rtp_timestamp); + return unwrapped_rtp_timestamp == *newest_rtp_timestamp_; +} + +int PacketArrivalHistory::GetPacketArrivalDelayMs( + const PacketArrival& packet_arrival) const { + if (!min_packet_arrival_) { + return 0; + } + return std::max(static_cast<int>(packet_arrival.arrival_time_ms - + min_packet_arrival_->arrival_time_ms - + (packet_arrival.rtp_timestamp_ms - + min_packet_arrival_->rtp_timestamp_ms)), + 0); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.h b/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.h new file mode 100644 index 0000000000..79fc9176bc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_PACKET_ARRIVAL_HISTORY_H_ +#define MODULES_AUDIO_CODING_NETEQ_PACKET_ARRIVAL_HISTORY_H_ + +#include <cstdint> +#include <deque> + +#include "absl/types/optional.h" +#include "api/neteq/tick_timer.h" +#include "modules/include/module_common_types_public.h" + +namespace webrtc { + +// Stores timing information about previously received packets. +// The history has a fixed window size beyond which old data is automatically +// pruned. +class PacketArrivalHistory { + public: + explicit PacketArrivalHistory(int window_size_ms); + + // Insert packet with `rtp_timestamp` and `arrival_time_ms` into the history. + void Insert(uint32_t rtp_timestamp, int64_t arrival_time_ms); + + // The delay for `rtp_timestamp` at `time_ms` is calculated as + // `(time_ms - p.arrival_time_ms) - (rtp_timestamp - p.rtp_timestamp)` + // where `p` is chosen as the packet arrival in the history that maximizes the + // delay. + int GetDelayMs(uint32_t rtp_timestamp, int64_t time_ms) const; + + // Get the maximum packet arrival delay observed in the history. + int GetMaxDelayMs() const; + + bool IsNewestRtpTimestamp(uint32_t rtp_timestamp) const; + + void Reset(); + + void set_sample_rate(int sample_rate) { + sample_rate_khz_ = sample_rate / 1000; + } + + size_t size() const { return history_.size(); } + + private: + struct PacketArrival { + PacketArrival(int64_t rtp_timestamp_ms, int64_t arrival_time_ms) + : rtp_timestamp_ms(rtp_timestamp_ms), + arrival_time_ms(arrival_time_ms) {} + int64_t rtp_timestamp_ms; + int64_t arrival_time_ms; + bool operator<=(const PacketArrival& other) const { + return arrival_time_ms - rtp_timestamp_ms <= + other.arrival_time_ms - other.rtp_timestamp_ms; + } + bool operator>=(const PacketArrival& other) const { + return arrival_time_ms - rtp_timestamp_ms >= + other.arrival_time_ms - other.rtp_timestamp_ms; + } + }; + std::deque<PacketArrival> history_; + int GetPacketArrivalDelayMs(const PacketArrival& packet_arrival) const; + // Updates `min_packet_arrival_` and `max_packet_arrival_`. + void MaybeUpdateCachedArrivals(const PacketArrival& packet); + const PacketArrival* min_packet_arrival_ = nullptr; + const PacketArrival* max_packet_arrival_ = nullptr; + const int window_size_ms_; + TimestampUnwrapper timestamp_unwrapper_; + absl::optional<int64_t> newest_rtp_timestamp_; + int sample_rate_khz_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_NETEQ_PACKET_ARRIVAL_HISTORY_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history_unittest.cc new file mode 100644 index 0000000000..286a7acb2c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history_unittest.cc @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/packet_arrival_history.h" + +#include <cstdint> +#include <limits> + +#include "test/gtest.h" + +namespace webrtc { +namespace { + +constexpr int kFs = 8000; +constexpr int kFsKhz = kFs / 1000; +constexpr int kFrameSizeMs = 20; +constexpr int kWindowSizeMs = 1000; + +class PacketArrivalHistoryTest : public testing::Test { + public: + PacketArrivalHistoryTest() : history_(kWindowSizeMs) { + history_.set_sample_rate(kFs); + } + void IncrementTime(int delta_ms) { time_ms_ += delta_ms; } + int InsertPacketAndGetDelay(int timestamp_delta_ms) { + uint32_t timestamp = timestamp_ + timestamp_delta_ms * kFsKhz; + if (timestamp_delta_ms > 0) { + timestamp_ = timestamp; + } + history_.Insert(timestamp, time_ms_); + EXPECT_EQ(history_.IsNewestRtpTimestamp(timestamp), + timestamp_delta_ms >= 0); + return history_.GetDelayMs(timestamp, time_ms_); + } + + protected: + int64_t time_ms_ = 0; + PacketArrivalHistory history_; + uint32_t timestamp_ = 0x12345678; +}; + +TEST_F(PacketArrivalHistoryTest, RelativeArrivalDelay) { + EXPECT_EQ(InsertPacketAndGetDelay(0), 0); + + IncrementTime(kFrameSizeMs); + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 0); + + IncrementTime(2 * kFrameSizeMs); + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 20); + + // Reordered packet. + EXPECT_EQ(InsertPacketAndGetDelay(-2 * kFrameSizeMs), 60); + + IncrementTime(2 * kFrameSizeMs); + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 40); + + // Move reference packet forward. + EXPECT_EQ(InsertPacketAndGetDelay(4 * kFrameSizeMs), 0); + + IncrementTime(2 * kFrameSizeMs); + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 20); + + // Earlier packet is now more delayed due to the new reference packet. + EXPECT_EQ(history_.GetMaxDelayMs(), 100); +} + +TEST_F(PacketArrivalHistoryTest, ReorderedPackets) { + // Insert first packet. + EXPECT_EQ(InsertPacketAndGetDelay(0), 0); + + // Insert reordered packet. + EXPECT_EQ(InsertPacketAndGetDelay(-80), 80); + + // Insert another reordered packet. + EXPECT_EQ(InsertPacketAndGetDelay(-kFrameSizeMs), 20); + + // Insert the next packet in order and verify that the relative delay is + // estimated based on the first inserted packet. + IncrementTime(4 * kFrameSizeMs); + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 60); + + EXPECT_EQ(history_.GetMaxDelayMs(), 80); +} + +TEST_F(PacketArrivalHistoryTest, MaxHistorySize) { + EXPECT_EQ(InsertPacketAndGetDelay(0), 0); + + IncrementTime(2 * kFrameSizeMs); + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 20); + EXPECT_EQ(history_.GetMaxDelayMs(), 20); + + // Insert next packet with a timestamp difference larger than maximum history + // size. This removes the previously inserted packet from the history. + IncrementTime(kWindowSizeMs + kFrameSizeMs); + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs + kWindowSizeMs), 0); + EXPECT_EQ(history_.GetMaxDelayMs(), 0); +} + +TEST_F(PacketArrivalHistoryTest, TimestampWraparound) { + timestamp_ = std::numeric_limits<uint32_t>::max(); + EXPECT_EQ(InsertPacketAndGetDelay(0), 0); + + IncrementTime(2 * kFrameSizeMs); + // Insert timestamp that will wrap around. + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), kFrameSizeMs); + + // Insert reordered packet before the wraparound. + EXPECT_EQ(InsertPacketAndGetDelay(-2 * kFrameSizeMs), 3 * kFrameSizeMs); + + // Insert another in-order packet after the wraparound. + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 0); + + EXPECT_EQ(history_.GetMaxDelayMs(), 3 * kFrameSizeMs); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/packet_buffer.cc b/third_party/libwebrtc/modules/audio_coding/neteq/packet_buffer.cc new file mode 100644 index 0000000000..f6b5a476c9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/packet_buffer.cc @@ -0,0 +1,405 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This is the implementation of the PacketBuffer class. It is mostly based on +// an STL list. The list is kept sorted at all times so that the next packet to +// decode is at the beginning of the list. + +#include "modules/audio_coding/neteq/packet_buffer.h" + +#include <algorithm> +#include <list> +#include <memory> +#include <type_traits> +#include <utility> + +#include "api/audio_codecs/audio_decoder.h" +#include "api/neteq/tick_timer.h" +#include "modules/audio_coding/neteq/decoder_database.h" +#include "modules/audio_coding/neteq/statistics_calculator.h" +#include "rtc_base/checks.h" +#include "rtc_base/experiments/struct_parameters_parser.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { +namespace { +// Predicate used when inserting packets in the buffer list. +// Operator() returns true when `packet` goes before `new_packet`. +class NewTimestampIsLarger { + public: + explicit NewTimestampIsLarger(const Packet& new_packet) + : new_packet_(new_packet) {} + bool operator()(const Packet& packet) { return (new_packet_ >= packet); } + + private: + const Packet& new_packet_; +}; + +// Returns true if both payload types are known to the decoder database, and +// have the same sample rate. +bool EqualSampleRates(uint8_t pt1, + uint8_t pt2, + const DecoderDatabase& decoder_database) { + auto* di1 = decoder_database.GetDecoderInfo(pt1); + auto* di2 = decoder_database.GetDecoderInfo(pt2); + return di1 && di2 && di1->SampleRateHz() == di2->SampleRateHz(); +} + +void LogPacketDiscarded(int codec_level, StatisticsCalculator* stats) { + RTC_CHECK(stats); + if (codec_level > 0) { + stats->SecondaryPacketsDiscarded(1); + } else { + stats->PacketsDiscarded(1); + } +} + +absl::optional<SmartFlushingConfig> GetSmartflushingConfig() { + absl::optional<SmartFlushingConfig> result; + std::string field_trial_string = + field_trial::FindFullName("WebRTC-Audio-NetEqSmartFlushing"); + result = SmartFlushingConfig(); + bool enabled = false; + auto parser = StructParametersParser::Create( + "enabled", &enabled, "target_level_threshold_ms", + &result->target_level_threshold_ms, "target_level_multiplier", + &result->target_level_multiplier); + parser->Parse(field_trial_string); + if (!enabled) { + return absl::nullopt; + } + RTC_LOG(LS_INFO) << "Using smart flushing, target_level_threshold_ms: " + << result->target_level_threshold_ms + << ", target_level_multiplier: " + << result->target_level_multiplier; + return result; +} + +} // namespace + +PacketBuffer::PacketBuffer(size_t max_number_of_packets, + const TickTimer* tick_timer) + : smart_flushing_config_(GetSmartflushingConfig()), + max_number_of_packets_(max_number_of_packets), + tick_timer_(tick_timer) {} + +// Destructor. All packets in the buffer will be destroyed. +PacketBuffer::~PacketBuffer() { + buffer_.clear(); +} + +// Flush the buffer. All packets in the buffer will be destroyed. +void PacketBuffer::Flush(StatisticsCalculator* stats) { + for (auto& p : buffer_) { + LogPacketDiscarded(p.priority.codec_level, stats); + } + buffer_.clear(); + stats->FlushedPacketBuffer(); +} + +void PacketBuffer::PartialFlush(int target_level_ms, + size_t sample_rate, + size_t last_decoded_length, + StatisticsCalculator* stats) { + // Make sure that at least half the packet buffer capacity will be available + // after the flush. This is done to avoid getting stuck if the target level is + // very high. + int target_level_samples = + std::min(target_level_ms * sample_rate / 1000, + max_number_of_packets_ * last_decoded_length / 2); + // We should avoid flushing to very low levels. + target_level_samples = std::max( + target_level_samples, smart_flushing_config_->target_level_threshold_ms); + while (GetSpanSamples(last_decoded_length, sample_rate, true) > + static_cast<size_t>(target_level_samples) || + buffer_.size() > max_number_of_packets_ / 2) { + LogPacketDiscarded(PeekNextPacket()->priority.codec_level, stats); + buffer_.pop_front(); + } +} + +bool PacketBuffer::Empty() const { + return buffer_.empty(); +} + +int PacketBuffer::InsertPacket(Packet&& packet, + StatisticsCalculator* stats, + size_t last_decoded_length, + size_t sample_rate, + int target_level_ms, + const DecoderDatabase& decoder_database) { + if (packet.empty()) { + RTC_LOG(LS_WARNING) << "InsertPacket invalid packet"; + return kInvalidPacket; + } + + RTC_DCHECK_GE(packet.priority.codec_level, 0); + RTC_DCHECK_GE(packet.priority.red_level, 0); + + int return_val = kOK; + + packet.waiting_time = tick_timer_->GetNewStopwatch(); + + // Perform a smart flush if the buffer size exceeds a multiple of the target + // level. + const size_t span_threshold = + smart_flushing_config_ + ? smart_flushing_config_->target_level_multiplier * + std::max(smart_flushing_config_->target_level_threshold_ms, + target_level_ms) * + sample_rate / 1000 + : 0; + const bool smart_flush = + smart_flushing_config_.has_value() && + GetSpanSamples(last_decoded_length, sample_rate, true) >= span_threshold; + if (buffer_.size() >= max_number_of_packets_ || smart_flush) { + size_t buffer_size_before_flush = buffer_.size(); + if (smart_flushing_config_.has_value()) { + // Flush down to the target level. + PartialFlush(target_level_ms, sample_rate, last_decoded_length, stats); + return_val = kPartialFlush; + } else { + // Buffer is full. + Flush(stats); + return_val = kFlushed; + } + RTC_LOG(LS_WARNING) << "Packet buffer flushed, " + << (buffer_size_before_flush - buffer_.size()) + << " packets discarded."; + } + + // Get an iterator pointing to the place in the buffer where the new packet + // should be inserted. The list is searched from the back, since the most + // likely case is that the new packet should be near the end of the list. + PacketList::reverse_iterator rit = std::find_if( + buffer_.rbegin(), buffer_.rend(), NewTimestampIsLarger(packet)); + + // The new packet is to be inserted to the right of `rit`. If it has the same + // timestamp as `rit`, which has a higher priority, do not insert the new + // packet to list. + if (rit != buffer_.rend() && packet.timestamp == rit->timestamp) { + LogPacketDiscarded(packet.priority.codec_level, stats); + return return_val; + } + + // The new packet is to be inserted to the left of `it`. If it has the same + // timestamp as `it`, which has a lower priority, replace `it` with the new + // packet. + PacketList::iterator it = rit.base(); + if (it != buffer_.end() && packet.timestamp == it->timestamp) { + LogPacketDiscarded(it->priority.codec_level, stats); + it = buffer_.erase(it); + } + buffer_.insert(it, std::move(packet)); // Insert the packet at that position. + + return return_val; +} + +int PacketBuffer::InsertPacketList( + PacketList* packet_list, + const DecoderDatabase& decoder_database, + absl::optional<uint8_t>* current_rtp_payload_type, + absl::optional<uint8_t>* current_cng_rtp_payload_type, + StatisticsCalculator* stats, + size_t last_decoded_length, + size_t sample_rate, + int target_level_ms) { + RTC_DCHECK(stats); + bool flushed = false; + for (auto& packet : *packet_list) { + if (decoder_database.IsComfortNoise(packet.payload_type)) { + if (*current_cng_rtp_payload_type && + **current_cng_rtp_payload_type != packet.payload_type) { + // New CNG payload type implies new codec type. + *current_rtp_payload_type = absl::nullopt; + Flush(stats); + flushed = true; + } + *current_cng_rtp_payload_type = packet.payload_type; + } else if (!decoder_database.IsDtmf(packet.payload_type)) { + // This must be speech. + if ((*current_rtp_payload_type && + **current_rtp_payload_type != packet.payload_type) || + (*current_cng_rtp_payload_type && + !EqualSampleRates(packet.payload_type, + **current_cng_rtp_payload_type, + decoder_database))) { + *current_cng_rtp_payload_type = absl::nullopt; + Flush(stats); + flushed = true; + } + *current_rtp_payload_type = packet.payload_type; + } + int return_val = + InsertPacket(std::move(packet), stats, last_decoded_length, sample_rate, + target_level_ms, decoder_database); + if (return_val == kFlushed) { + // The buffer flushed, but this is not an error. We can still continue. + flushed = true; + } else if (return_val != kOK) { + // An error occurred. Delete remaining packets in list and return. + packet_list->clear(); + return return_val; + } + } + packet_list->clear(); + return flushed ? kFlushed : kOK; +} + +int PacketBuffer::NextTimestamp(uint32_t* next_timestamp) const { + if (Empty()) { + return kBufferEmpty; + } + if (!next_timestamp) { + return kInvalidPointer; + } + *next_timestamp = buffer_.front().timestamp; + return kOK; +} + +int PacketBuffer::NextHigherTimestamp(uint32_t timestamp, + uint32_t* next_timestamp) const { + if (Empty()) { + return kBufferEmpty; + } + if (!next_timestamp) { + return kInvalidPointer; + } + PacketList::const_iterator it; + for (it = buffer_.begin(); it != buffer_.end(); ++it) { + if (it->timestamp >= timestamp) { + // Found a packet matching the search. + *next_timestamp = it->timestamp; + return kOK; + } + } + return kNotFound; +} + +const Packet* PacketBuffer::PeekNextPacket() const { + return buffer_.empty() ? nullptr : &buffer_.front(); +} + +absl::optional<Packet> PacketBuffer::GetNextPacket() { + if (Empty()) { + // Buffer is empty. + return absl::nullopt; + } + + absl::optional<Packet> packet(std::move(buffer_.front())); + // Assert that the packet sanity checks in InsertPacket method works. + RTC_DCHECK(!packet->empty()); + buffer_.pop_front(); + + return packet; +} + +int PacketBuffer::DiscardNextPacket(StatisticsCalculator* stats) { + if (Empty()) { + return kBufferEmpty; + } + // Assert that the packet sanity checks in InsertPacket method works. + const Packet& packet = buffer_.front(); + RTC_DCHECK(!packet.empty()); + LogPacketDiscarded(packet.priority.codec_level, stats); + buffer_.pop_front(); + return kOK; +} + +void PacketBuffer::DiscardOldPackets(uint32_t timestamp_limit, + uint32_t horizon_samples, + StatisticsCalculator* stats) { + buffer_.remove_if([timestamp_limit, horizon_samples, stats](const Packet& p) { + if (timestamp_limit == p.timestamp || + !IsObsoleteTimestamp(p.timestamp, timestamp_limit, horizon_samples)) { + return false; + } + LogPacketDiscarded(p.priority.codec_level, stats); + return true; + }); +} + +void PacketBuffer::DiscardAllOldPackets(uint32_t timestamp_limit, + StatisticsCalculator* stats) { + DiscardOldPackets(timestamp_limit, 0, stats); +} + +void PacketBuffer::DiscardPacketsWithPayloadType(uint8_t payload_type, + StatisticsCalculator* stats) { + buffer_.remove_if([payload_type, stats](const Packet& p) { + if (p.payload_type != payload_type) { + return false; + } + LogPacketDiscarded(p.priority.codec_level, stats); + return true; + }); +} + +size_t PacketBuffer::NumPacketsInBuffer() const { + return buffer_.size(); +} + +size_t PacketBuffer::NumSamplesInBuffer(size_t last_decoded_length) const { + size_t num_samples = 0; + size_t last_duration = last_decoded_length; + for (const Packet& packet : buffer_) { + if (packet.frame) { + // TODO(hlundin): Verify that it's fine to count all packets and remove + // this check. + if (packet.priority != Packet::Priority(0, 0)) { + continue; + } + size_t duration = packet.frame->Duration(); + if (duration > 0) { + last_duration = duration; // Save the most up-to-date (valid) duration. + } + } + num_samples += last_duration; + } + return num_samples; +} + +size_t PacketBuffer::GetSpanSamples(size_t last_decoded_length, + size_t sample_rate, + bool count_dtx_waiting_time) const { + if (buffer_.size() == 0) { + return 0; + } + + size_t span = buffer_.back().timestamp - buffer_.front().timestamp; + if (buffer_.back().frame && buffer_.back().frame->Duration() > 0) { + size_t duration = buffer_.back().frame->Duration(); + if (count_dtx_waiting_time && buffer_.back().frame->IsDtxPacket()) { + size_t waiting_time_samples = rtc::dchecked_cast<size_t>( + buffer_.back().waiting_time->ElapsedMs() * (sample_rate / 1000)); + duration = std::max(duration, waiting_time_samples); + } + span += duration; + } else { + span += last_decoded_length; + } + return span; +} + +bool PacketBuffer::ContainsDtxOrCngPacket( + const DecoderDatabase* decoder_database) const { + RTC_DCHECK(decoder_database); + for (const Packet& packet : buffer_) { + if ((packet.frame && packet.frame->IsDtxPacket()) || + decoder_database->IsComfortNoise(packet.payload_type)) { + return true; + } + } + return false; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/packet_buffer.h b/third_party/libwebrtc/modules/audio_coding/neteq/packet_buffer.h new file mode 100644 index 0000000000..c6fb47ffbf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/packet_buffer.h @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_PACKET_BUFFER_H_ +#define MODULES_AUDIO_CODING_NETEQ_PACKET_BUFFER_H_ + +#include "absl/types/optional.h" +#include "modules/audio_coding/neteq/decoder_database.h" +#include "modules/audio_coding/neteq/packet.h" +#include "modules/include/module_common_types_public.h" // IsNewerTimestamp + +namespace webrtc { + +class DecoderDatabase; +class StatisticsCalculator; +class TickTimer; +struct SmartFlushingConfig { + // When calculating the flushing threshold, the maximum between the target + // level and this value is used. + int target_level_threshold_ms = 500; + // A smart flush is triggered when the packet buffer contains a multiple of + // the target level. + int target_level_multiplier = 3; +}; + +// This is the actual buffer holding the packets before decoding. +class PacketBuffer { + public: + enum BufferReturnCodes { + kOK = 0, + kFlushed, + kPartialFlush, + kNotFound, + kBufferEmpty, + kInvalidPacket, + kInvalidPointer + }; + + // Constructor creates a buffer which can hold a maximum of + // `max_number_of_packets` packets. + PacketBuffer(size_t max_number_of_packets, const TickTimer* tick_timer); + + // Deletes all packets in the buffer before destroying the buffer. + virtual ~PacketBuffer(); + + PacketBuffer(const PacketBuffer&) = delete; + PacketBuffer& operator=(const PacketBuffer&) = delete; + + // Flushes the buffer and deletes all packets in it. + virtual void Flush(StatisticsCalculator* stats); + + // Partial flush. Flush packets but leave some packets behind. + virtual void PartialFlush(int target_level_ms, + size_t sample_rate, + size_t last_decoded_length, + StatisticsCalculator* stats); + + // Returns true for an empty buffer. + virtual bool Empty() const; + + // Inserts `packet` into the buffer. The buffer will take over ownership of + // the packet object. + // Returns PacketBuffer::kOK on success, PacketBuffer::kFlushed if the buffer + // was flushed due to overfilling. + virtual int InsertPacket(Packet&& packet, + StatisticsCalculator* stats, + size_t last_decoded_length, + size_t sample_rate, + int target_level_ms, + const DecoderDatabase& decoder_database); + + // Inserts a list of packets into the buffer. The buffer will take over + // ownership of the packet objects. + // Returns PacketBuffer::kOK if all packets were inserted successfully. + // If the buffer was flushed due to overfilling, only a subset of the list is + // inserted, and PacketBuffer::kFlushed is returned. + // The last three parameters are included for legacy compatibility. + // TODO(hlundin): Redesign to not use current_*_payload_type and + // decoder_database. + virtual int InsertPacketList( + PacketList* packet_list, + const DecoderDatabase& decoder_database, + absl::optional<uint8_t>* current_rtp_payload_type, + absl::optional<uint8_t>* current_cng_rtp_payload_type, + StatisticsCalculator* stats, + size_t last_decoded_length, + size_t sample_rate, + int target_level_ms); + + // Gets the timestamp for the first packet in the buffer and writes it to the + // output variable `next_timestamp`. + // Returns PacketBuffer::kBufferEmpty if the buffer is empty, + // PacketBuffer::kOK otherwise. + virtual int NextTimestamp(uint32_t* next_timestamp) const; + + // Gets the timestamp for the first packet in the buffer with a timestamp no + // lower than the input limit `timestamp`. The result is written to the output + // variable `next_timestamp`. + // Returns PacketBuffer::kBufferEmpty if the buffer is empty, + // PacketBuffer::kOK otherwise. + virtual int NextHigherTimestamp(uint32_t timestamp, + uint32_t* next_timestamp) const; + + // Returns a (constant) pointer to the first packet in the buffer. Returns + // NULL if the buffer is empty. + virtual const Packet* PeekNextPacket() const; + + // Extracts the first packet in the buffer and returns it. + // Returns an empty optional if the buffer is empty. + virtual absl::optional<Packet> GetNextPacket(); + + // Discards the first packet in the buffer. The packet is deleted. + // Returns PacketBuffer::kBufferEmpty if the buffer is empty, + // PacketBuffer::kOK otherwise. + virtual int DiscardNextPacket(StatisticsCalculator* stats); + + // Discards all packets that are (strictly) older than timestamp_limit, + // but newer than timestamp_limit - horizon_samples. Setting horizon_samples + // to zero implies that the horizon is set to half the timestamp range. That + // is, if a packet is more than 2^31 timestamps into the future compared with + // timestamp_limit (including wrap-around), it is considered old. + virtual void DiscardOldPackets(uint32_t timestamp_limit, + uint32_t horizon_samples, + StatisticsCalculator* stats); + + // Discards all packets that are (strictly) older than timestamp_limit. + virtual void DiscardAllOldPackets(uint32_t timestamp_limit, + StatisticsCalculator* stats); + + // Removes all packets with a specific payload type from the buffer. + virtual void DiscardPacketsWithPayloadType(uint8_t payload_type, + StatisticsCalculator* stats); + + // Returns the number of packets in the buffer, including duplicates and + // redundant packets. + virtual size_t NumPacketsInBuffer() const; + + // Returns the number of samples in the buffer, including samples carried in + // duplicate and redundant packets. + virtual size_t NumSamplesInBuffer(size_t last_decoded_length) const; + + // Returns the total duration in samples that the packets in the buffer spans + // across. + virtual size_t GetSpanSamples(size_t last_decoded_length, + size_t sample_rate, + bool count_dtx_waiting_time) const; + + // Returns true if the packet buffer contains any DTX or CNG packets. + virtual bool ContainsDtxOrCngPacket( + const DecoderDatabase* decoder_database) const; + + // Static method returning true if `timestamp` is older than `timestamp_limit` + // but less than `horizon_samples` behind `timestamp_limit`. For instance, + // with timestamp_limit = 100 and horizon_samples = 10, a timestamp in the + // range (90, 100) is considered obsolete, and will yield true. + // Setting `horizon_samples` to 0 is the same as setting it to 2^31, i.e., + // half the 32-bit timestamp range. + static bool IsObsoleteTimestamp(uint32_t timestamp, + uint32_t timestamp_limit, + uint32_t horizon_samples) { + return IsNewerTimestamp(timestamp_limit, timestamp) && + (horizon_samples == 0 || + IsNewerTimestamp(timestamp, timestamp_limit - horizon_samples)); + } + + private: + absl::optional<SmartFlushingConfig> smart_flushing_config_; + size_t max_number_of_packets_; + PacketList buffer_; + const TickTimer* tick_timer_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_PACKET_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/packet_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/packet_buffer_unittest.cc new file mode 100644 index 0000000000..1a054daca3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/packet_buffer_unittest.cc @@ -0,0 +1,989 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for PacketBuffer class. + +#include "modules/audio_coding/neteq/packet_buffer.h" + +#include <memory> + +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/neteq/tick_timer.h" +#include "modules/audio_coding/neteq/mock/mock_decoder_database.h" +#include "modules/audio_coding/neteq/mock/mock_statistics_calculator.h" +#include "modules/audio_coding/neteq/packet.h" +#include "test/field_trial.h" +#include "test/gmock.h" +#include "test/gtest.h" + +using ::testing::_; +using ::testing::InSequence; +using ::testing::MockFunction; +using ::testing::Return; +using ::testing::StrictMock; + +namespace { +class MockEncodedAudioFrame : public webrtc::AudioDecoder::EncodedAudioFrame { + public: + MOCK_METHOD(size_t, Duration, (), (const, override)); + + MOCK_METHOD(bool, IsDtxPacket, (), (const, override)); + + MOCK_METHOD(absl::optional<DecodeResult>, + Decode, + (rtc::ArrayView<int16_t> decoded), + (const, override)); +}; + +// Helper class to generate packets. Packets must be deleted by the user. +class PacketGenerator { + public: + PacketGenerator(uint16_t seq_no, uint32_t ts, uint8_t pt, int frame_size); + virtual ~PacketGenerator() {} + void Reset(uint16_t seq_no, uint32_t ts, uint8_t pt, int frame_size); + webrtc::Packet NextPacket( + int payload_size_bytes, + std::unique_ptr<webrtc::AudioDecoder::EncodedAudioFrame> audio_frame); + + uint16_t seq_no_; + uint32_t ts_; + uint8_t pt_; + int frame_size_; +}; + +PacketGenerator::PacketGenerator(uint16_t seq_no, + uint32_t ts, + uint8_t pt, + int frame_size) { + Reset(seq_no, ts, pt, frame_size); +} + +void PacketGenerator::Reset(uint16_t seq_no, + uint32_t ts, + uint8_t pt, + int frame_size) { + seq_no_ = seq_no; + ts_ = ts; + pt_ = pt; + frame_size_ = frame_size; +} + +webrtc::Packet PacketGenerator::NextPacket( + int payload_size_bytes, + std::unique_ptr<webrtc::AudioDecoder::EncodedAudioFrame> audio_frame) { + webrtc::Packet packet; + packet.sequence_number = seq_no_; + packet.timestamp = ts_; + packet.payload_type = pt_; + packet.payload.SetSize(payload_size_bytes); + ++seq_no_; + ts_ += frame_size_; + packet.frame = std::move(audio_frame); + return packet; +} + +struct PacketsToInsert { + uint16_t sequence_number; + uint32_t timestamp; + uint8_t payload_type; + bool primary; + // Order of this packet to appear upon extraction, after inserting a series + // of packets. A negative number means that it should have been discarded + // before extraction. + int extract_order; +}; + +} // namespace + +namespace webrtc { + +// Start of test definitions. + +TEST(PacketBuffer, CreateAndDestroy) { + TickTimer tick_timer; + PacketBuffer* buffer = new PacketBuffer(10, &tick_timer); // 10 packets. + EXPECT_TRUE(buffer->Empty()); + delete buffer; +} + +TEST(PacketBuffer, InsertPacket) { + TickTimer tick_timer; + PacketBuffer buffer(10, &tick_timer); // 10 packets. + PacketGenerator gen(17u, 4711u, 0, 10); + StrictMock<MockStatisticsCalculator> mock_stats; + MockDecoderDatabase decoder_database; + + const int payload_len = 100; + const Packet packet = gen.NextPacket(payload_len, nullptr); + EXPECT_EQ(0, buffer.InsertPacket(/*packet=*/packet.Clone(), + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/10000, + /*target_level_ms=*/60, + /*decoder_database=*/decoder_database)); + uint32_t next_ts; + EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts)); + EXPECT_EQ(4711u, next_ts); + EXPECT_FALSE(buffer.Empty()); + EXPECT_EQ(1u, buffer.NumPacketsInBuffer()); + const Packet* next_packet = buffer.PeekNextPacket(); + EXPECT_EQ(packet, *next_packet); // Compare contents. + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. + + // Do not explicitly flush buffer or delete packet to test that it is deleted + // with the buffer. (Tested with Valgrind or similar tool.) +} + +// Test to flush buffer. +TEST(PacketBuffer, FlushBuffer) { + TickTimer tick_timer; + PacketBuffer buffer(10, &tick_timer); // 10 packets. + PacketGenerator gen(0, 0, 0, 10); + const int payload_len = 10; + StrictMock<MockStatisticsCalculator> mock_stats; + MockDecoderDatabase decoder_database; + + // Insert 10 small packets; should be ok. + for (int i = 0; i < 10; ++i) { + EXPECT_EQ( + PacketBuffer::kOK, + buffer.InsertPacket(/*packet=*/gen.NextPacket(payload_len, nullptr), + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/60, + /*decoder_database=*/decoder_database)); + } + EXPECT_EQ(10u, buffer.NumPacketsInBuffer()); + EXPECT_FALSE(buffer.Empty()); + + EXPECT_CALL(mock_stats, PacketsDiscarded(1)).Times(10); + buffer.Flush(&mock_stats); + // Buffer should delete the payloads itself. + EXPECT_EQ(0u, buffer.NumPacketsInBuffer()); + EXPECT_TRUE(buffer.Empty()); + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +// Test to fill the buffer over the limits, and verify that it flushes. +TEST(PacketBuffer, OverfillBuffer) { + TickTimer tick_timer; + PacketBuffer buffer(10, &tick_timer); // 10 packets. + PacketGenerator gen(0, 0, 0, 10); + StrictMock<MockStatisticsCalculator> mock_stats; + MockDecoderDatabase decoder_database; + + // Insert 10 small packets; should be ok. + const int payload_len = 10; + int i; + for (i = 0; i < 10; ++i) { + EXPECT_EQ( + PacketBuffer::kOK, + buffer.InsertPacket(/*packet=*/gen.NextPacket(payload_len, nullptr), + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/60, + /*decoder_database=*/decoder_database)); + } + EXPECT_EQ(10u, buffer.NumPacketsInBuffer()); + uint32_t next_ts; + EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts)); + EXPECT_EQ(0u, next_ts); // Expect first inserted packet to be first in line. + + EXPECT_CALL(mock_stats, PacketsDiscarded(1)).Times(10); + const Packet packet = gen.NextPacket(payload_len, nullptr); + // Insert 11th packet; should flush the buffer and insert it after flushing. + EXPECT_EQ(PacketBuffer::kFlushed, + buffer.InsertPacket(/*packet=*/packet.Clone(), + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/60, + /*decoder_database=*/decoder_database)); + EXPECT_EQ(1u, buffer.NumPacketsInBuffer()); + EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts)); + // Expect last inserted packet to be first in line. + EXPECT_EQ(packet.timestamp, next_ts); + + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +// Test a partial buffer flush. +TEST(PacketBuffer, PartialFlush) { + // Use a field trial to configure smart flushing. + test::ScopedFieldTrials field_trials( + "WebRTC-Audio-NetEqSmartFlushing/enabled:true," + "target_level_threshold_ms:0,target_level_multiplier:2/"); + TickTimer tick_timer; + PacketBuffer buffer(10, &tick_timer); // 10 packets. + PacketGenerator gen(0, 0, 0, 10); + const int payload_len = 10; + StrictMock<MockStatisticsCalculator> mock_stats; + MockDecoderDatabase decoder_database; + + // Insert 10 small packets; should be ok. + for (int i = 0; i < 10; ++i) { + EXPECT_EQ( + PacketBuffer::kOK, + buffer.InsertPacket(/*packet=*/gen.NextPacket(payload_len, nullptr), + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/100, + /*decoder_database=*/decoder_database)); + } + EXPECT_EQ(10u, buffer.NumPacketsInBuffer()); + EXPECT_FALSE(buffer.Empty()); + + EXPECT_CALL(mock_stats, PacketsDiscarded(1)).Times(7); + buffer.PartialFlush(/*target_level_ms=*/30, + /*sample_rate=*/1000, + /*last_decoded_length=*/payload_len, + /*stats=*/&mock_stats); + // There should still be some packets left in the buffer. + EXPECT_EQ(3u, buffer.NumPacketsInBuffer()); + EXPECT_FALSE(buffer.Empty()); + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +// Test to fill the buffer over the limits, and verify that the smart flush +// functionality works as expected. +TEST(PacketBuffer, SmartFlushOverfillBuffer) { + // Use a field trial to configure smart flushing. + test::ScopedFieldTrials field_trials( + "WebRTC-Audio-NetEqSmartFlushing/enabled:true," + "target_level_threshold_ms:0,target_level_multiplier:2/"); + TickTimer tick_timer; + PacketBuffer buffer(10, &tick_timer); // 10 packets. + PacketGenerator gen(0, 0, 0, 10); + StrictMock<MockStatisticsCalculator> mock_stats; + MockDecoderDatabase decoder_database; + + // Insert 10 small packets; should be ok. + const int payload_len = 10; + int i; + for (i = 0; i < 10; ++i) { + EXPECT_EQ( + PacketBuffer::kOK, + buffer.InsertPacket(/*packet=*/gen.NextPacket(payload_len, nullptr), + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/100, + /*decoder_database=*/decoder_database)); + } + EXPECT_EQ(10u, buffer.NumPacketsInBuffer()); + uint32_t next_ts; + EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts)); + EXPECT_EQ(0u, next_ts); // Expect first inserted packet to be first in line. + + const Packet packet = gen.NextPacket(payload_len, nullptr); + EXPECT_CALL(mock_stats, PacketsDiscarded(1)).Times(6); + // Insert 11th packet; should cause a partial flush and insert the packet + // after flushing. + EXPECT_EQ(PacketBuffer::kPartialFlush, + buffer.InsertPacket(/*packet=*/packet.Clone(), + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/40, + /*decoder_database=*/decoder_database)); + EXPECT_EQ(5u, buffer.NumPacketsInBuffer()); + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +// Test inserting a list of packets. +TEST(PacketBuffer, InsertPacketList) { + TickTimer tick_timer; + PacketBuffer buffer(10, &tick_timer); // 10 packets. + PacketGenerator gen(0, 0, 0, 10); + PacketList list; + const int payload_len = 10; + + // Insert 10 small packets. + for (int i = 0; i < 10; ++i) { + list.push_back(gen.NextPacket(payload_len, nullptr)); + } + + MockDecoderDatabase decoder_database; + auto factory = CreateBuiltinAudioDecoderFactory(); + const DecoderDatabase::DecoderInfo info(SdpAudioFormat("pcmu", 8000, 1), + absl::nullopt, factory.get()); + EXPECT_CALL(decoder_database, GetDecoderInfo(0)) + .WillRepeatedly(Return(&info)); + + StrictMock<MockStatisticsCalculator> mock_stats; + + absl::optional<uint8_t> current_pt; + absl::optional<uint8_t> current_cng_pt; + EXPECT_EQ( + PacketBuffer::kOK, + buffer.InsertPacketList(/*packet_list=*/&list, + /*decoder_database=*/decoder_database, + /*current_rtp_payload_type=*/¤t_pt, + /*current_cng_rtp_payload_type=*/¤t_cng_pt, + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/30)); + EXPECT_TRUE(list.empty()); // The PacketBuffer should have depleted the list. + EXPECT_EQ(10u, buffer.NumPacketsInBuffer()); + EXPECT_EQ(0, current_pt); // Current payload type changed to 0. + EXPECT_EQ(absl::nullopt, current_cng_pt); // CNG payload type not changed. + + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +// Test inserting a list of packets. Last packet is of a different payload type. +// Expecting the buffer to flush. +// TODO(hlundin): Remove this test when legacy operation is no longer needed. +TEST(PacketBuffer, InsertPacketListChangePayloadType) { + TickTimer tick_timer; + PacketBuffer buffer(10, &tick_timer); // 10 packets. + PacketGenerator gen(0, 0, 0, 10); + PacketList list; + const int payload_len = 10; + + // Insert 10 small packets. + for (int i = 0; i < 10; ++i) { + list.push_back(gen.NextPacket(payload_len, nullptr)); + } + // Insert 11th packet of another payload type (not CNG). + { + Packet packet = gen.NextPacket(payload_len, nullptr); + packet.payload_type = 1; + list.push_back(std::move(packet)); + } + + MockDecoderDatabase decoder_database; + auto factory = CreateBuiltinAudioDecoderFactory(); + const DecoderDatabase::DecoderInfo info0(SdpAudioFormat("pcmu", 8000, 1), + absl::nullopt, factory.get()); + EXPECT_CALL(decoder_database, GetDecoderInfo(0)) + .WillRepeatedly(Return(&info0)); + const DecoderDatabase::DecoderInfo info1(SdpAudioFormat("pcma", 8000, 1), + absl::nullopt, factory.get()); + EXPECT_CALL(decoder_database, GetDecoderInfo(1)) + .WillRepeatedly(Return(&info1)); + + StrictMock<MockStatisticsCalculator> mock_stats; + + absl::optional<uint8_t> current_pt; + absl::optional<uint8_t> current_cng_pt; + EXPECT_CALL(mock_stats, PacketsDiscarded(1)).Times(10); + EXPECT_EQ( + PacketBuffer::kFlushed, + buffer.InsertPacketList(/*packet_list=*/&list, + /*decoder_database=*/decoder_database, + /*current_rtp_payload_type=*/¤t_pt, + /*current_cng_rtp_payload_type=*/¤t_cng_pt, + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/30)); + EXPECT_TRUE(list.empty()); // The PacketBuffer should have depleted the list. + EXPECT_EQ(1u, buffer.NumPacketsInBuffer()); // Only the last packet. + EXPECT_EQ(1, current_pt); // Current payload type changed to 1. + EXPECT_EQ(absl::nullopt, current_cng_pt); // CNG payload type not changed. + + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +TEST(PacketBuffer, ExtractOrderRedundancy) { + TickTimer tick_timer; + PacketBuffer buffer(100, &tick_timer); // 100 packets. + const int kPackets = 18; + const int kFrameSize = 10; + const int kPayloadLength = 10; + + PacketsToInsert packet_facts[kPackets] = { + {0xFFFD, 0xFFFFFFD7, 0, true, 0}, {0xFFFE, 0xFFFFFFE1, 0, true, 1}, + {0xFFFE, 0xFFFFFFD7, 1, false, -1}, {0xFFFF, 0xFFFFFFEB, 0, true, 2}, + {0xFFFF, 0xFFFFFFE1, 1, false, -1}, {0x0000, 0xFFFFFFF5, 0, true, 3}, + {0x0000, 0xFFFFFFEB, 1, false, -1}, {0x0001, 0xFFFFFFFF, 0, true, 4}, + {0x0001, 0xFFFFFFF5, 1, false, -1}, {0x0002, 0x0000000A, 0, true, 5}, + {0x0002, 0xFFFFFFFF, 1, false, -1}, {0x0003, 0x0000000A, 1, false, -1}, + {0x0004, 0x0000001E, 0, true, 7}, {0x0004, 0x00000014, 1, false, 6}, + {0x0005, 0x0000001E, 0, true, -1}, {0x0005, 0x00000014, 1, false, -1}, + {0x0006, 0x00000028, 0, true, 8}, {0x0006, 0x0000001E, 1, false, -1}, + }; + MockDecoderDatabase decoder_database; + + const size_t kExpectPacketsInBuffer = 9; + + std::vector<Packet> expect_order(kExpectPacketsInBuffer); + + PacketGenerator gen(0, 0, 0, kFrameSize); + + StrictMock<MockStatisticsCalculator> mock_stats; + + // Interleaving the EXPECT_CALL sequence with expectations on the MockFunction + // check ensures that exactly one call to PacketsDiscarded happens in each + // DiscardNextPacket call. + InSequence s; + MockFunction<void(int check_point_id)> check; + for (int i = 0; i < kPackets; ++i) { + gen.Reset(packet_facts[i].sequence_number, packet_facts[i].timestamp, + packet_facts[i].payload_type, kFrameSize); + Packet packet = gen.NextPacket(kPayloadLength, nullptr); + packet.priority.codec_level = packet_facts[i].primary ? 0 : 1; + if (packet_facts[i].extract_order < 0) { + if (packet.priority.codec_level > 0) { + EXPECT_CALL(mock_stats, SecondaryPacketsDiscarded(1)); + } else { + EXPECT_CALL(mock_stats, PacketsDiscarded(1)); + } + } + EXPECT_CALL(check, Call(i)); + EXPECT_EQ(PacketBuffer::kOK, + buffer.InsertPacket(/*packet=*/packet.Clone(), + /*stats=*/&mock_stats, + /*last_decoded_length=*/kPayloadLength, + /*sample_rate=*/1000, + /*target_level_ms=*/60, + /*decoder_database=*/decoder_database)); + if (packet_facts[i].extract_order >= 0) { + expect_order[packet_facts[i].extract_order] = std::move(packet); + } + check.Call(i); + } + + EXPECT_EQ(kExpectPacketsInBuffer, buffer.NumPacketsInBuffer()); + + for (size_t i = 0; i < kExpectPacketsInBuffer; ++i) { + const absl::optional<Packet> packet = buffer.GetNextPacket(); + EXPECT_EQ(packet, expect_order[i]); // Compare contents. + } + EXPECT_TRUE(buffer.Empty()); + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +TEST(PacketBuffer, DiscardPackets) { + TickTimer tick_timer; + PacketBuffer buffer(100, &tick_timer); // 100 packets. + const uint16_t start_seq_no = 17; + const uint32_t start_ts = 4711; + const uint32_t ts_increment = 10; + PacketGenerator gen(start_seq_no, start_ts, 0, ts_increment); + PacketList list; + const int payload_len = 10; + StrictMock<MockStatisticsCalculator> mock_stats; + MockDecoderDatabase decoder_database; + + constexpr int kTotalPackets = 10; + // Insert 10 small packets. + for (int i = 0; i < kTotalPackets; ++i) { + buffer.InsertPacket(/*packet=*/gen.NextPacket(payload_len, nullptr), + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/60, + /*decoder_database=*/decoder_database); + } + EXPECT_EQ(10u, buffer.NumPacketsInBuffer()); + + uint32_t current_ts = start_ts; + + // Discard them one by one and make sure that the right packets are at the + // front of the buffer. + constexpr int kDiscardPackets = 5; + + // Interleaving the EXPECT_CALL sequence with expectations on the MockFunction + // check ensures that exactly one call to PacketsDiscarded happens in each + // DiscardNextPacket call. + InSequence s; + MockFunction<void(int check_point_id)> check; + for (int i = 0; i < kDiscardPackets; ++i) { + uint32_t ts; + EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&ts)); + EXPECT_EQ(current_ts, ts); + EXPECT_CALL(mock_stats, PacketsDiscarded(1)); + EXPECT_CALL(check, Call(i)); + EXPECT_EQ(PacketBuffer::kOK, buffer.DiscardNextPacket(&mock_stats)); + current_ts += ts_increment; + check.Call(i); + } + + constexpr int kRemainingPackets = kTotalPackets - kDiscardPackets; + // This will discard all remaining packets but one. The oldest packet is older + // than the indicated horizon_samples, and will thus be left in the buffer. + constexpr size_t kSkipPackets = 1; + EXPECT_CALL(mock_stats, PacketsDiscarded(1)) + .Times(kRemainingPackets - kSkipPackets); + EXPECT_CALL(check, Call(17)); // Arbitrary id number. + buffer.DiscardOldPackets(start_ts + kTotalPackets * ts_increment, + kRemainingPackets * ts_increment, &mock_stats); + check.Call(17); // Same arbitrary id number. + + EXPECT_EQ(kSkipPackets, buffer.NumPacketsInBuffer()); + uint32_t ts; + EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&ts)); + EXPECT_EQ(current_ts, ts); + + // Discard all remaining packets. + EXPECT_CALL(mock_stats, PacketsDiscarded(kSkipPackets)); + buffer.DiscardAllOldPackets(start_ts + kTotalPackets * ts_increment, + &mock_stats); + + EXPECT_TRUE(buffer.Empty()); + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +TEST(PacketBuffer, Reordering) { + TickTimer tick_timer; + PacketBuffer buffer(100, &tick_timer); // 100 packets. + const uint16_t start_seq_no = 17; + const uint32_t start_ts = 4711; + const uint32_t ts_increment = 10; + PacketGenerator gen(start_seq_no, start_ts, 0, ts_increment); + const int payload_len = 10; + + // Generate 10 small packets and insert them into a PacketList. Insert every + // odd packet to the front, and every even packet to the back, thus creating + // a (rather strange) reordering. + PacketList list; + for (int i = 0; i < 10; ++i) { + Packet packet = gen.NextPacket(payload_len, nullptr); + if (i % 2) { + list.push_front(std::move(packet)); + } else { + list.push_back(std::move(packet)); + } + } + + MockDecoderDatabase decoder_database; + auto factory = CreateBuiltinAudioDecoderFactory(); + const DecoderDatabase::DecoderInfo info(SdpAudioFormat("pcmu", 8000, 1), + absl::nullopt, factory.get()); + EXPECT_CALL(decoder_database, GetDecoderInfo(0)) + .WillRepeatedly(Return(&info)); + absl::optional<uint8_t> current_pt; + absl::optional<uint8_t> current_cng_pt; + + StrictMock<MockStatisticsCalculator> mock_stats; + + EXPECT_EQ( + PacketBuffer::kOK, + buffer.InsertPacketList(/*packet_list=*/&list, + /*decoder_database=*/decoder_database, + /*current_rtp_payload_type=*/¤t_pt, + /*current_cng_rtp_payload_type=*/¤t_cng_pt, + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/30)); + EXPECT_EQ(10u, buffer.NumPacketsInBuffer()); + + // Extract them and make sure that come out in the right order. + uint32_t current_ts = start_ts; + for (int i = 0; i < 10; ++i) { + const absl::optional<Packet> packet = buffer.GetNextPacket(); + ASSERT_TRUE(packet); + EXPECT_EQ(current_ts, packet->timestamp); + current_ts += ts_increment; + } + EXPECT_TRUE(buffer.Empty()); + + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +// The test first inserts a packet with narrow-band CNG, then a packet with +// wide-band speech. The expected behavior of the packet buffer is to detect a +// change in sample rate, even though no speech packet has been inserted before, +// and flush out the CNG packet. +TEST(PacketBuffer, CngFirstThenSpeechWithNewSampleRate) { + TickTimer tick_timer; + PacketBuffer buffer(10, &tick_timer); // 10 packets. + const uint8_t kCngPt = 13; + const int kPayloadLen = 10; + const uint8_t kSpeechPt = 100; + + MockDecoderDatabase decoder_database; + auto factory = CreateBuiltinAudioDecoderFactory(); + const DecoderDatabase::DecoderInfo info_cng(SdpAudioFormat("cn", 8000, 1), + absl::nullopt, factory.get()); + EXPECT_CALL(decoder_database, GetDecoderInfo(kCngPt)) + .WillRepeatedly(Return(&info_cng)); + const DecoderDatabase::DecoderInfo info_speech( + SdpAudioFormat("l16", 16000, 1), absl::nullopt, factory.get()); + EXPECT_CALL(decoder_database, GetDecoderInfo(kSpeechPt)) + .WillRepeatedly(Return(&info_speech)); + + // Insert first packet, which is narrow-band CNG. + PacketGenerator gen(0, 0, kCngPt, 10); + PacketList list; + list.push_back(gen.NextPacket(kPayloadLen, nullptr)); + absl::optional<uint8_t> current_pt; + absl::optional<uint8_t> current_cng_pt; + + StrictMock<MockStatisticsCalculator> mock_stats; + + EXPECT_EQ( + PacketBuffer::kOK, + buffer.InsertPacketList(/*packet_list=*/&list, + /*decoder_database=*/decoder_database, + /*current_rtp_payload_type=*/¤t_pt, + /*current_cng_rtp_payload_type=*/¤t_cng_pt, + /*stats=*/&mock_stats, + /*last_decoded_length=*/kPayloadLen, + /*sample_rate=*/1000, + /*target_level_ms=*/30)); + EXPECT_TRUE(list.empty()); + EXPECT_EQ(1u, buffer.NumPacketsInBuffer()); + ASSERT_TRUE(buffer.PeekNextPacket()); + EXPECT_EQ(kCngPt, buffer.PeekNextPacket()->payload_type); + EXPECT_EQ(current_pt, absl::nullopt); // Current payload type not set. + EXPECT_EQ(kCngPt, current_cng_pt); // CNG payload type set. + + // Insert second packet, which is wide-band speech. + { + Packet packet = gen.NextPacket(kPayloadLen, nullptr); + packet.payload_type = kSpeechPt; + list.push_back(std::move(packet)); + } + // Expect the buffer to flush out the CNG packet, since it does not match the + // new speech sample rate. + EXPECT_CALL(mock_stats, PacketsDiscarded(1)); + EXPECT_EQ( + PacketBuffer::kFlushed, + buffer.InsertPacketList(/*packet_list=*/&list, + /*decoder_database=*/decoder_database, + /*current_rtp_payload_type=*/¤t_pt, + /*current_cng_rtp_payload_type=*/¤t_cng_pt, + /*stats=*/&mock_stats, + /*last_decoded_length=*/kPayloadLen, + /*sample_rate=*/1000, + /*target_level_ms=*/30)); + EXPECT_TRUE(list.empty()); + EXPECT_EQ(1u, buffer.NumPacketsInBuffer()); + ASSERT_TRUE(buffer.PeekNextPacket()); + EXPECT_EQ(kSpeechPt, buffer.PeekNextPacket()->payload_type); + + EXPECT_EQ(kSpeechPt, current_pt); // Current payload type set. + EXPECT_EQ(absl::nullopt, current_cng_pt); // CNG payload type reset. + + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +TEST(PacketBuffer, Failures) { + const uint16_t start_seq_no = 17; + const uint32_t start_ts = 4711; + const uint32_t ts_increment = 10; + int payload_len = 100; + PacketGenerator gen(start_seq_no, start_ts, 0, ts_increment); + TickTimer tick_timer; + StrictMock<MockStatisticsCalculator> mock_stats; + MockDecoderDatabase decoder_database; + + PacketBuffer* buffer = new PacketBuffer(100, &tick_timer); // 100 packets. + { + Packet packet = gen.NextPacket(payload_len, nullptr); + packet.payload.Clear(); + EXPECT_EQ(PacketBuffer::kInvalidPacket, + buffer->InsertPacket(/*packet=*/std::move(packet), + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/60, + /*decoder_database=*/decoder_database)); + } + // Buffer should still be empty. Test all empty-checks. + uint32_t temp_ts; + EXPECT_EQ(PacketBuffer::kBufferEmpty, buffer->NextTimestamp(&temp_ts)); + EXPECT_EQ(PacketBuffer::kBufferEmpty, + buffer->NextHigherTimestamp(0, &temp_ts)); + EXPECT_EQ(NULL, buffer->PeekNextPacket()); + EXPECT_FALSE(buffer->GetNextPacket()); + + // Discarding packets will not invoke mock_stats.PacketDiscarded() because the + // packet buffer is empty. + EXPECT_EQ(PacketBuffer::kBufferEmpty, buffer->DiscardNextPacket(&mock_stats)); + buffer->DiscardAllOldPackets(0, &mock_stats); + + // Insert one packet to make the buffer non-empty. + EXPECT_EQ( + PacketBuffer::kOK, + buffer->InsertPacket(/*packet=*/gen.NextPacket(payload_len, nullptr), + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/60, + /*decoder_database=*/decoder_database)); + EXPECT_EQ(PacketBuffer::kInvalidPointer, buffer->NextTimestamp(NULL)); + EXPECT_EQ(PacketBuffer::kInvalidPointer, + buffer->NextHigherTimestamp(0, NULL)); + delete buffer; + + // Insert packet list of three packets, where the second packet has an invalid + // payload. Expect first packet to be inserted, and the remaining two to be + // discarded. + buffer = new PacketBuffer(100, &tick_timer); // 100 packets. + PacketList list; + list.push_back(gen.NextPacket(payload_len, nullptr)); // Valid packet. + { + Packet packet = gen.NextPacket(payload_len, nullptr); + packet.payload.Clear(); // Invalid. + list.push_back(std::move(packet)); + } + list.push_back(gen.NextPacket(payload_len, nullptr)); // Valid packet. + auto factory = CreateBuiltinAudioDecoderFactory(); + const DecoderDatabase::DecoderInfo info(SdpAudioFormat("pcmu", 8000, 1), + absl::nullopt, factory.get()); + EXPECT_CALL(decoder_database, GetDecoderInfo(0)) + .WillRepeatedly(Return(&info)); + absl::optional<uint8_t> current_pt; + absl::optional<uint8_t> current_cng_pt; + EXPECT_EQ( + PacketBuffer::kInvalidPacket, + buffer->InsertPacketList(/*packet_list=*/&list, + /*decoder_database=*/decoder_database, + /*current_rtp_payload_type=*/¤t_pt, + /*current_cng_rtp_payload_type=*/¤t_cng_pt, + /*stats=*/&mock_stats, + /*last_decoded_length=*/payload_len, + /*sample_rate=*/1000, + /*target_level_ms=*/30)); + EXPECT_TRUE(list.empty()); // The PacketBuffer should have depleted the list. + EXPECT_EQ(1u, buffer->NumPacketsInBuffer()); + delete buffer; + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +// Test packet comparison function. +// The function should return true if the first packet "goes before" the second. +TEST(PacketBuffer, ComparePackets) { + PacketGenerator gen(0, 0, 0, 10); + Packet a(gen.NextPacket(10, nullptr)); // SN = 0, TS = 0. + Packet b(gen.NextPacket(10, nullptr)); // SN = 1, TS = 10. + EXPECT_FALSE(a == b); + EXPECT_TRUE(a != b); + EXPECT_TRUE(a < b); + EXPECT_FALSE(a > b); + EXPECT_TRUE(a <= b); + EXPECT_FALSE(a >= b); + + // Testing wrap-around case; 'a' is earlier but has a larger timestamp value. + a.timestamp = 0xFFFFFFFF - 10; + EXPECT_FALSE(a == b); + EXPECT_TRUE(a != b); + EXPECT_TRUE(a < b); + EXPECT_FALSE(a > b); + EXPECT_TRUE(a <= b); + EXPECT_FALSE(a >= b); + + // Test equal packets. + EXPECT_TRUE(a == a); + EXPECT_FALSE(a != a); + EXPECT_FALSE(a < a); + EXPECT_FALSE(a > a); + EXPECT_TRUE(a <= a); + EXPECT_TRUE(a >= a); + + // Test equal timestamps but different sequence numbers (0 and 1). + a.timestamp = b.timestamp; + EXPECT_FALSE(a == b); + EXPECT_TRUE(a != b); + EXPECT_TRUE(a < b); + EXPECT_FALSE(a > b); + EXPECT_TRUE(a <= b); + EXPECT_FALSE(a >= b); + + // Test equal timestamps but different sequence numbers (32767 and 1). + a.sequence_number = 0xFFFF; + EXPECT_FALSE(a == b); + EXPECT_TRUE(a != b); + EXPECT_TRUE(a < b); + EXPECT_FALSE(a > b); + EXPECT_TRUE(a <= b); + EXPECT_FALSE(a >= b); + + // Test equal timestamps and sequence numbers, but differing priorities. + a.sequence_number = b.sequence_number; + a.priority = {1, 0}; + b.priority = {0, 0}; + // a after b + EXPECT_FALSE(a == b); + EXPECT_TRUE(a != b); + EXPECT_FALSE(a < b); + EXPECT_TRUE(a > b); + EXPECT_FALSE(a <= b); + EXPECT_TRUE(a >= b); + + Packet c(gen.NextPacket(0, nullptr)); // SN = 2, TS = 20. + Packet d(gen.NextPacket(0, nullptr)); // SN = 3, TS = 20. + c.timestamp = b.timestamp; + d.timestamp = b.timestamp; + c.sequence_number = b.sequence_number; + d.sequence_number = b.sequence_number; + c.priority = {1, 1}; + d.priority = {0, 1}; + // c after d + EXPECT_FALSE(c == d); + EXPECT_TRUE(c != d); + EXPECT_FALSE(c < d); + EXPECT_TRUE(c > d); + EXPECT_FALSE(c <= d); + EXPECT_TRUE(c >= d); + + // c after a + EXPECT_FALSE(c == a); + EXPECT_TRUE(c != a); + EXPECT_FALSE(c < a); + EXPECT_TRUE(c > a); + EXPECT_FALSE(c <= a); + EXPECT_TRUE(c >= a); + + // c after b + EXPECT_FALSE(c == b); + EXPECT_TRUE(c != b); + EXPECT_FALSE(c < b); + EXPECT_TRUE(c > b); + EXPECT_FALSE(c <= b); + EXPECT_TRUE(c >= b); + + // a after d + EXPECT_FALSE(a == d); + EXPECT_TRUE(a != d); + EXPECT_FALSE(a < d); + EXPECT_TRUE(a > d); + EXPECT_FALSE(a <= d); + EXPECT_TRUE(a >= d); + + // d after b + EXPECT_FALSE(d == b); + EXPECT_TRUE(d != b); + EXPECT_FALSE(d < b); + EXPECT_TRUE(d > b); + EXPECT_FALSE(d <= b); + EXPECT_TRUE(d >= b); +} + +TEST(PacketBuffer, GetSpanSamples) { + constexpr size_t kFrameSizeSamples = 10; + constexpr int kPayloadSizeBytes = 1; // Does not matter to this test; + constexpr uint32_t kStartTimeStamp = 0xFFFFFFFE; // Close to wrap around. + constexpr int kSampleRateHz = 48000; + constexpr bool KCountDtxWaitingTime = false; + TickTimer tick_timer; + PacketBuffer buffer(3, &tick_timer); + PacketGenerator gen(0, kStartTimeStamp, 0, kFrameSizeSamples); + StrictMock<MockStatisticsCalculator> mock_stats; + MockDecoderDatabase decoder_database; + + Packet packet_1 = gen.NextPacket(kPayloadSizeBytes, nullptr); + + std::unique_ptr<MockEncodedAudioFrame> mock_audio_frame = + std::make_unique<MockEncodedAudioFrame>(); + EXPECT_CALL(*mock_audio_frame, Duration()) + .WillRepeatedly(Return(kFrameSizeSamples)); + Packet packet_2 = + gen.NextPacket(kPayloadSizeBytes, std::move(mock_audio_frame)); + + RTC_DCHECK_GT(packet_1.timestamp, + packet_2.timestamp); // Tmestamp wrapped around. + + EXPECT_EQ(PacketBuffer::kOK, + buffer.InsertPacket(/*packet=*/std::move(packet_1), + /*stats=*/&mock_stats, + /*last_decoded_length=*/kFrameSizeSamples, + /*sample_rate=*/1000, + /*target_level_ms=*/60, + /*decoder_database=*/decoder_database)); + + constexpr size_t kLastDecodedSizeSamples = 2; + // packet_1 has no access to duration, and relies last decoded duration as + // input. + EXPECT_EQ(kLastDecodedSizeSamples, + buffer.GetSpanSamples(kLastDecodedSizeSamples, kSampleRateHz, + KCountDtxWaitingTime)); + + EXPECT_EQ(PacketBuffer::kOK, + buffer.InsertPacket(/*packet=*/std::move(packet_2), + /*stats=*/&mock_stats, + /*last_decoded_length=*/kFrameSizeSamples, + /*sample_rate=*/1000, + /*target_level_ms=*/60, + /*decoder_database=*/decoder_database)); + + EXPECT_EQ(kFrameSizeSamples * 2, + buffer.GetSpanSamples(0, kSampleRateHz, KCountDtxWaitingTime)); + + // packet_2 has access to duration, and ignores last decoded duration as + // input. + EXPECT_EQ(kFrameSizeSamples * 2, + buffer.GetSpanSamples(kLastDecodedSizeSamples, kSampleRateHz, + KCountDtxWaitingTime)); +} + +namespace { +void TestIsObsoleteTimestamp(uint32_t limit_timestamp) { + // Check with zero horizon, which implies that the horizon is at 2^31, i.e., + // half the timestamp range. + static const uint32_t kZeroHorizon = 0; + static const uint32_t k2Pow31Minus1 = 0x7FFFFFFF; + // Timestamp on the limit is not old. + EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp( + limit_timestamp, limit_timestamp, kZeroHorizon)); + // 1 sample behind is old. + EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp - 1, + limit_timestamp, kZeroHorizon)); + // 2^31 - 1 samples behind is old. + EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp - k2Pow31Minus1, + limit_timestamp, kZeroHorizon)); + // 1 sample ahead is not old. + EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp( + limit_timestamp + 1, limit_timestamp, kZeroHorizon)); + // If |t1-t2|=2^31 and t1>t2, t2 is older than t1 but not the opposite. + uint32_t other_timestamp = limit_timestamp + (1 << 31); + uint32_t lowest_timestamp = std::min(limit_timestamp, other_timestamp); + uint32_t highest_timestamp = std::max(limit_timestamp, other_timestamp); + EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp( + lowest_timestamp, highest_timestamp, kZeroHorizon)); + EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp( + highest_timestamp, lowest_timestamp, kZeroHorizon)); + + // Fixed horizon at 10 samples. + static const uint32_t kHorizon = 10; + // Timestamp on the limit is not old. + EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp, + limit_timestamp, kHorizon)); + // 1 sample behind is old. + EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp - 1, + limit_timestamp, kHorizon)); + // 9 samples behind is old. + EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp - 9, + limit_timestamp, kHorizon)); + // 10 samples behind is not old. + EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp - 10, + limit_timestamp, kHorizon)); + // 2^31 - 1 samples behind is not old. + EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp( + limit_timestamp - k2Pow31Minus1, limit_timestamp, kHorizon)); + // 1 sample ahead is not old. + EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp + 1, + limit_timestamp, kHorizon)); + // 2^31 samples ahead is not old. + EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp + (1 << 31), + limit_timestamp, kHorizon)); +} +} // namespace + +// Test the IsObsoleteTimestamp method with different limit timestamps. +TEST(PacketBuffer, IsObsoleteTimestamp) { + TestIsObsoleteTimestamp(0); + TestIsObsoleteTimestamp(1); + TestIsObsoleteTimestamp(0xFFFFFFFF); // -1 in uint32_t. + TestIsObsoleteTimestamp(0x80000000); // 2^31. + TestIsObsoleteTimestamp(0x80000001); // 2^31 + 1. + TestIsObsoleteTimestamp(0x7FFFFFFF); // 2^31 - 1. +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad.cc b/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad.cc new file mode 100644 index 0000000000..9999d6764b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/post_decode_vad.h" + +namespace webrtc { + +PostDecodeVad::~PostDecodeVad() { + if (vad_instance_) + WebRtcVad_Free(vad_instance_); +} + +void PostDecodeVad::Enable() { + if (!vad_instance_) { + // Create the instance. + vad_instance_ = WebRtcVad_Create(); + if (vad_instance_ == nullptr) { + // Failed to create instance. + Disable(); + return; + } + } + Init(); + enabled_ = true; +} + +void PostDecodeVad::Disable() { + enabled_ = false; + running_ = false; +} + +void PostDecodeVad::Init() { + running_ = false; + if (vad_instance_) { + WebRtcVad_Init(vad_instance_); + WebRtcVad_set_mode(vad_instance_, kVadMode); + running_ = true; + } +} + +void PostDecodeVad::Update(int16_t* signal, + size_t length, + AudioDecoder::SpeechType speech_type, + bool sid_frame, + int fs_hz) { + if (!vad_instance_ || !enabled_) { + return; + } + + if (speech_type == AudioDecoder::kComfortNoise || sid_frame || + fs_hz > 16000) { + // TODO(hlundin): Remove restriction on fs_hz. + running_ = false; + active_speech_ = true; + sid_interval_counter_ = 0; + } else if (!running_) { + ++sid_interval_counter_; + } + + if (sid_interval_counter_ >= kVadAutoEnable) { + Init(); + } + + if (length > 0 && running_) { + size_t vad_sample_index = 0; + active_speech_ = false; + // Loop through frame sizes 30, 20, and 10 ms. + for (int vad_frame_size_ms = 30; vad_frame_size_ms >= 10; + vad_frame_size_ms -= 10) { + size_t vad_frame_size_samples = + static_cast<size_t>(vad_frame_size_ms * fs_hz / 1000); + while (length - vad_sample_index >= vad_frame_size_samples) { + int vad_return = + WebRtcVad_Process(vad_instance_, fs_hz, &signal[vad_sample_index], + vad_frame_size_samples); + active_speech_ |= (vad_return == 1); + vad_sample_index += vad_frame_size_samples; + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad.h b/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad.h new file mode 100644 index 0000000000..3bd91b9edb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_ +#define MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "api/audio_codecs/audio_decoder.h" +#include "common_audio/vad/include/webrtc_vad.h" + +namespace webrtc { + +class PostDecodeVad { + public: + PostDecodeVad() + : enabled_(false), + running_(false), + active_speech_(true), + sid_interval_counter_(0), + vad_instance_(NULL) {} + + virtual ~PostDecodeVad(); + + PostDecodeVad(const PostDecodeVad&) = delete; + PostDecodeVad& operator=(const PostDecodeVad&) = delete; + + // Enables post-decode VAD. + void Enable(); + + // Disables post-decode VAD. + void Disable(); + + // Initializes post-decode VAD. + void Init(); + + // Updates post-decode VAD with the audio data in `signal` having `length` + // samples. The data is of type `speech_type`, at the sample rate `fs_hz`. + void Update(int16_t* signal, + size_t length, + AudioDecoder::SpeechType speech_type, + bool sid_frame, + int fs_hz); + + // Accessors. + bool enabled() const { return enabled_; } + bool running() const { return running_; } + bool active_speech() const { return active_speech_; } + + private: + static const int kVadMode = 0; // Sets aggressiveness to "Normal". + // Number of Update() calls without CNG/SID before re-enabling VAD. + static const int kVadAutoEnable = 3000; + + bool enabled_; + bool running_; + bool active_speech_; + int sid_interval_counter_; + ::VadInst* vad_instance_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad_unittest.cc new file mode 100644 index 0000000000..da3e4e864e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad_unittest.cc @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for PostDecodeVad class. + +#include "modules/audio_coding/neteq/post_decode_vad.h" + +#include "test/gtest.h" + +namespace webrtc { + +TEST(PostDecodeVad, CreateAndDestroy) { + PostDecodeVad vad; +} + +// TODO(hlundin): Write more tests. + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/preemptive_expand.cc b/third_party/libwebrtc/modules/audio_coding/neteq/preemptive_expand.cc new file mode 100644 index 0000000000..232170b177 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/preemptive_expand.cc @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/preemptive_expand.h" + +#include <algorithm> + +#include "api/array_view.h" +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "modules/audio_coding/neteq/time_stretch.h" + +namespace webrtc { + +PreemptiveExpand::ReturnCodes PreemptiveExpand::Process( + const int16_t* input, + size_t input_length, + size_t old_data_length, + AudioMultiVector* output, + size_t* length_change_samples) { + old_data_length_per_channel_ = old_data_length; + // Input length must be (almost) 30 ms. + // Also, the new part must be at least `overlap_samples_` elements. + static const size_t k15ms = 120; // 15 ms = 120 samples at 8 kHz sample rate. + if (num_channels_ == 0 || + input_length / num_channels_ < (2 * k15ms - 1) * fs_mult_ || + old_data_length >= input_length / num_channels_ - overlap_samples_) { + // Length of input data too short to do preemptive expand. Simply move all + // data from input to output. + output->PushBackInterleaved( + rtc::ArrayView<const int16_t>(input, input_length)); + return kError; + } + const bool kFastMode = false; // Fast mode is not available for PE Expand. + return TimeStretch::Process(input, input_length, kFastMode, output, + length_change_samples); +} + +void PreemptiveExpand::SetParametersForPassiveSpeech(size_t len, + int16_t* best_correlation, + size_t* peak_index) const { + // When the signal does not contain any active speech, the correlation does + // not matter. Simply set it to zero. + *best_correlation = 0; + + // For low energy expansion, the new data can be less than 15 ms, + // but we must ensure that best_correlation is not larger than the length of + // the new data. + // but we must ensure that best_correlation is not larger than the new data. + *peak_index = std::min(*peak_index, len - old_data_length_per_channel_); +} + +PreemptiveExpand::ReturnCodes PreemptiveExpand::CheckCriteriaAndStretch( + const int16_t* input, + size_t input_length, + size_t peak_index, + int16_t best_correlation, + bool active_speech, + bool /*fast_mode*/, + AudioMultiVector* output) const { + // Pre-calculate common multiplication with `fs_mult_`. + // 120 corresponds to 15 ms. + size_t fs_mult_120 = static_cast<size_t>(fs_mult_ * 120); + // Check for strong correlation (>0.9 in Q14) and at least 15 ms new data, + // or passive speech. + if (((best_correlation > kCorrelationThreshold) && + (old_data_length_per_channel_ <= fs_mult_120)) || + !active_speech) { + // Do accelerate operation by overlap add. + + // Set length of the first part, not to be modified. + size_t unmodified_length = + std::max(old_data_length_per_channel_, fs_mult_120); + // Copy first part, including cross-fade region. + output->PushBackInterleaved(rtc::ArrayView<const int16_t>( + input, (unmodified_length + peak_index) * num_channels_)); + // Copy the last `peak_index` samples up to 15 ms to `temp_vector`. + AudioMultiVector temp_vector(num_channels_); + temp_vector.PushBackInterleaved(rtc::ArrayView<const int16_t>( + &input[(unmodified_length - peak_index) * num_channels_], + peak_index * num_channels_)); + // Cross-fade `temp_vector` onto the end of `output`. + output->CrossFade(temp_vector, peak_index); + // Copy the last unmodified part, 15 ms + pitch period until the end. + output->PushBackInterleaved(rtc::ArrayView<const int16_t>( + &input[unmodified_length * num_channels_], + input_length - unmodified_length * num_channels_)); + + if (active_speech) { + return kSuccess; + } else { + return kSuccessLowEnergy; + } + } else { + // Accelerate not allowed. Simply move all data from decoded to outData. + output->PushBackInterleaved( + rtc::ArrayView<const int16_t>(input, input_length)); + return kNoStretch; + } +} + +PreemptiveExpand* PreemptiveExpandFactory::Create( + int sample_rate_hz, + size_t num_channels, + const BackgroundNoise& background_noise, + size_t overlap_samples) const { + return new PreemptiveExpand(sample_rate_hz, num_channels, background_noise, + overlap_samples); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/preemptive_expand.h b/third_party/libwebrtc/modules/audio_coding/neteq/preemptive_expand.h new file mode 100644 index 0000000000..6338b993fd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/preemptive_expand.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_PREEMPTIVE_EXPAND_H_ +#define MODULES_AUDIO_CODING_NETEQ_PREEMPTIVE_EXPAND_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "modules/audio_coding/neteq/time_stretch.h" + +namespace webrtc { + +class AudioMultiVector; +class BackgroundNoise; + +// This class implements the PreemptiveExpand operation. Most of the work is +// done in the base class TimeStretch, which is shared with the Accelerate +// operation. In the PreemptiveExpand class, the operations that are specific to +// PreemptiveExpand are implemented. +class PreemptiveExpand : public TimeStretch { + public: + PreemptiveExpand(int sample_rate_hz, + size_t num_channels, + const BackgroundNoise& background_noise, + size_t overlap_samples) + : TimeStretch(sample_rate_hz, num_channels, background_noise), + old_data_length_per_channel_(0), + overlap_samples_(overlap_samples) {} + + PreemptiveExpand(const PreemptiveExpand&) = delete; + PreemptiveExpand& operator=(const PreemptiveExpand&) = delete; + + // This method performs the actual PreemptiveExpand operation. The samples are + // read from `input`, of length `input_length` elements, and are written to + // `output`. The number of samples added through time-stretching is + // is provided in the output `length_change_samples`. The method returns + // the outcome of the operation as an enumerator value. + ReturnCodes Process(const int16_t* pw16_decoded, + size_t len, + size_t old_data_len, + AudioMultiVector* output, + size_t* length_change_samples); + + protected: + // Sets the parameters `best_correlation` and `peak_index` to suitable + // values when the signal contains no active speech. + void SetParametersForPassiveSpeech(size_t input_length, + int16_t* best_correlation, + size_t* peak_index) const override; + + // Checks the criteria for performing the time-stretching operation and, + // if possible, performs the time-stretching. + ReturnCodes CheckCriteriaAndStretch(const int16_t* input, + size_t input_length, + size_t peak_index, + int16_t best_correlation, + bool active_speech, + bool /*fast_mode*/, + AudioMultiVector* output) const override; + + private: + size_t old_data_length_per_channel_; + size_t overlap_samples_; +}; + +struct PreemptiveExpandFactory { + PreemptiveExpandFactory() {} + virtual ~PreemptiveExpandFactory() {} + + virtual PreemptiveExpand* Create(int sample_rate_hz, + size_t num_channels, + const BackgroundNoise& background_noise, + size_t overlap_samples) const; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_PREEMPTIVE_EXPAND_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/random_vector.cc b/third_party/libwebrtc/modules/audio_coding/neteq/random_vector.cc new file mode 100644 index 0000000000..ada175831c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/random_vector.cc @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/random_vector.h" + +namespace webrtc { + +const int16_t RandomVector::kRandomTable[RandomVector::kRandomTableSize] = { + 2680, 5532, 441, 5520, 16170, -5146, -1024, -8733, 3115, + 9598, -10380, -4959, -1280, -21716, 7133, -1522, 13458, -3902, + 2789, -675, 3441, 5016, -13599, -4003, -2739, 3922, -7209, + 13352, -11617, -7241, 12905, -2314, 5426, 10121, -9702, 11207, + -13542, 1373, 816, -5934, -12504, 4798, 1811, 4112, -613, + 201, -10367, -2960, -2419, 3442, 4299, -6116, -6092, 1552, + -1650, -480, -1237, 18720, -11858, -8303, -8212, 865, -2890, + -16968, 12052, -5845, -5912, 9777, -5665, -6294, 5426, -4737, + -6335, 1652, 761, 3832, 641, -8552, -9084, -5753, 8146, + 12156, -4915, 15086, -1231, -1869, 11749, -9319, -6403, 11407, + 6232, -1683, 24340, -11166, 4017, -10448, 3153, -2936, 6212, + 2891, -866, -404, -4807, -2324, -1917, -2388, -6470, -3895, + -10300, 5323, -5403, 2205, 4640, 7022, -21186, -6244, -882, + -10031, -3395, -12885, 7155, -5339, 5079, -2645, -9515, 6622, + 14651, 15852, 359, 122, 8246, -3502, -6696, -3679, -13535, + -1409, -704, -7403, -4007, 1798, 279, -420, -12796, -14219, + 1141, 3359, 11434, 7049, -6684, -7473, 14283, -4115, -9123, + -8969, 4152, 4117, 13792, 5742, 16168, 8661, -1609, -6095, + 1881, 14380, -5588, 6758, -6425, -22969, -7269, 7031, 1119, + -1611, -5850, -11281, 3559, -8952, -10146, -4667, -16251, -1538, + 2062, -1012, -13073, 227, -3142, -5265, 20, 5770, -7559, + 4740, -4819, 992, -8208, -7130, -4652, 6725, 7369, -1036, + 13144, -1588, -5304, -2344, -449, -5705, -8894, 5205, -17904, + -11188, -1022, 4852, 10101, -5255, -4200, -752, 7941, -1543, + 5959, 14719, 13346, 17045, -15605, -1678, -1600, -9230, 68, + 23348, 1172, 7750, 11212, -18227, 9956, 4161, 883, 3947, + 4341, 1014, -4889, -2603, 1246, -5630, -3596, -870, -1298, + 2784, -3317, -6612, -20541, 4166, 4181, -8625, 3562, 12890, + 4761, 3205, -12259, -8579}; + +void RandomVector::Reset() { + seed_ = 777; + seed_increment_ = 1; +} + +void RandomVector::Generate(size_t length, int16_t* output) { + for (size_t i = 0; i < length; i++) { + seed_ += seed_increment_; + size_t position = seed_ & (kRandomTableSize - 1); + output[i] = kRandomTable[position]; + } +} + +void RandomVector::IncreaseSeedIncrement(int16_t increase_by) { + seed_increment_ += increase_by; + seed_increment_ &= kRandomTableSize - 1; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/random_vector.h b/third_party/libwebrtc/modules/audio_coding/neteq/random_vector.h new file mode 100644 index 0000000000..4a782f1116 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/random_vector.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_RANDOM_VECTOR_H_ +#define MODULES_AUDIO_CODING_NETEQ_RANDOM_VECTOR_H_ + +#include <stddef.h> +#include <stdint.h> + +namespace webrtc { + +// This class generates pseudo-random samples. +class RandomVector { + public: + static const size_t kRandomTableSize = 256; + static const int16_t kRandomTable[kRandomTableSize]; + + RandomVector() : seed_(777), seed_increment_(1) {} + + RandomVector(const RandomVector&) = delete; + RandomVector& operator=(const RandomVector&) = delete; + + void Reset(); + + void Generate(size_t length, int16_t* output); + + void IncreaseSeedIncrement(int16_t increase_by); + + // Accessors and mutators. + int16_t seed_increment() { return seed_increment_; } + void set_seed_increment(int16_t value) { seed_increment_ = value; } + + private: + uint32_t seed_; + int16_t seed_increment_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_RANDOM_VECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/random_vector_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/random_vector_unittest.cc new file mode 100644 index 0000000000..44479a6dd6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/random_vector_unittest.cc @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for RandomVector class. + +#include "modules/audio_coding/neteq/random_vector.h" + +#include "test/gtest.h" + +namespace webrtc { + +TEST(RandomVector, CreateAndDestroy) { + RandomVector random_vector; +} + +// TODO(hlundin): Write more tests. + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/red_payload_splitter.cc b/third_party/libwebrtc/modules/audio_coding/neteq/red_payload_splitter.cc new file mode 100644 index 0000000000..7438f25301 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/red_payload_splitter.cc @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/red_payload_splitter.h" + +#include <stddef.h> + +#include <cstdint> +#include <list> +#include <utility> +#include <vector> + +#include "modules/audio_coding/neteq/decoder_database.h" +#include "modules/audio_coding/neteq/packet.h" +#include "rtc_base/buffer.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +// The method loops through a list of packets {A, B, C, ...}. Each packet is +// split into its corresponding RED payloads, {A1, A2, ...}, which is +// temporarily held in the list `new_packets`. +// When the first packet in `packet_list` has been processed, the original +// packet is replaced by the new ones in `new_packets`, so that `packet_list` +// becomes: {A1, A2, ..., B, C, ...}. The method then continues with B, and C, +// until all the original packets have been replaced by their split payloads. +bool RedPayloadSplitter::SplitRed(PacketList* packet_list) { + // Too many RED blocks indicates that something is wrong. Clamp it at some + // reasonable value. + const size_t kMaxRedBlocks = 32; + bool ret = true; + PacketList::iterator it = packet_list->begin(); + while (it != packet_list->end()) { + const Packet& red_packet = *it; + RTC_DCHECK(!red_packet.payload.empty()); + const uint8_t* payload_ptr = red_packet.payload.data(); + size_t payload_length = red_packet.payload.size(); + + // Read RED headers (according to RFC 2198): + // + // 0 1 2 3 + // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // |F| block PT | timestamp offset | block length | + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // Last RED header: + // 0 1 2 3 4 5 6 7 + // +-+-+-+-+-+-+-+-+ + // |0| Block PT | + // +-+-+-+-+-+-+-+-+ + + struct RedHeader { + uint8_t payload_type; + uint32_t timestamp; + size_t payload_length; + }; + + std::vector<RedHeader> new_headers; + bool last_block = false; + size_t sum_length = 0; + while (!last_block) { + if (payload_length == 0) { + RTC_LOG(LS_WARNING) << "SplitRed header too short"; + return false; + } + RedHeader new_header; + // Check the F bit. If F == 0, this was the last block. + last_block = ((*payload_ptr & 0x80) == 0); + // Bits 1 through 7 are payload type. + new_header.payload_type = payload_ptr[0] & 0x7F; + if (last_block) { + // No more header data to read. + sum_length += kRedLastHeaderLength; // Account for RED header size. + new_header.timestamp = red_packet.timestamp; + new_header.payload_length = red_packet.payload.size() - sum_length; + payload_ptr += kRedLastHeaderLength; // Advance to first payload byte. + payload_length -= kRedLastHeaderLength; + } else { + if (payload_length < kRedHeaderLength) { + RTC_LOG(LS_WARNING) << "SplitRed header too short"; + return false; + } + // Bits 8 through 21 are timestamp offset. + int timestamp_offset = + (payload_ptr[1] << 6) + ((payload_ptr[2] & 0xFC) >> 2); + new_header.timestamp = red_packet.timestamp - timestamp_offset; + // Bits 22 through 31 are payload length. + new_header.payload_length = + ((payload_ptr[2] & 0x03) << 8) + payload_ptr[3]; + + sum_length += new_header.payload_length; + sum_length += kRedHeaderLength; // Account for RED header size. + + payload_ptr += kRedHeaderLength; // Advance to next RED header. + payload_length -= kRedHeaderLength; + } + // Store in new list of packets. + if (new_header.payload_length > 0) { + new_headers.push_back(new_header); + } + } + + if (new_headers.size() <= kMaxRedBlocks) { + // Populate the new packets with payload data. + // `payload_ptr` now points at the first payload byte. + PacketList new_packets; // An empty list to store the split packets in. + for (size_t i = 0; i != new_headers.size(); ++i) { + const auto& new_header = new_headers[i]; + size_t payload_length = new_header.payload_length; + if (payload_ptr + payload_length > + red_packet.payload.data() + red_packet.payload.size()) { + // The block lengths in the RED headers do not match the overall + // packet length. Something is corrupt. Discard this and the remaining + // payloads from this packet. + RTC_LOG(LS_WARNING) << "SplitRed length mismatch"; + ret = false; + break; + } + + Packet new_packet; + new_packet.timestamp = new_header.timestamp; + new_packet.payload_type = new_header.payload_type; + new_packet.sequence_number = red_packet.sequence_number; + new_packet.priority.red_level = + rtc::dchecked_cast<int>((new_headers.size() - 1) - i); + new_packet.payload.SetData(payload_ptr, payload_length); + new_packet.packet_info = RtpPacketInfo( + /*ssrc=*/red_packet.packet_info.ssrc(), + /*csrcs=*/std::vector<uint32_t>(), + /*rtp_timestamp=*/new_packet.timestamp, + red_packet.packet_info.audio_level(), + /*absolute_capture_time=*/absl::nullopt, + /*receive_time=*/red_packet.packet_info.receive_time()); + new_packets.push_front(std::move(new_packet)); + payload_ptr += payload_length; + } + // Insert new packets into original list, before the element pointed to by + // iterator `it`. + packet_list->splice(it, std::move(new_packets)); + } else { + RTC_LOG(LS_WARNING) << "SplitRed too many blocks: " << new_headers.size(); + ret = false; + } + // Remove `it` from the packet list. This operation effectively moves the + // iterator `it` to the next packet in the list. Thus, we do not have to + // increment it manually. + it = packet_list->erase(it); + } + return ret; +} + +void RedPayloadSplitter::CheckRedPayloads( + PacketList* packet_list, + const DecoderDatabase& decoder_database) { + int main_payload_type = -1; + for (auto it = packet_list->begin(); it != packet_list->end(); /* */) { + uint8_t this_payload_type = it->payload_type; + if (decoder_database.IsRed(this_payload_type)) { + it = packet_list->erase(it); + continue; + } + if (!decoder_database.IsDtmf(this_payload_type) && + !decoder_database.IsComfortNoise(this_payload_type)) { + if (main_payload_type == -1) { + // This is the first packet in the list which is non-DTMF non-CNG. + main_payload_type = this_payload_type; + } else { + if (this_payload_type != main_payload_type) { + // We do not allow redundant payloads of a different type. + // Remove `it` from the packet list. This operation effectively + // moves the iterator `it` to the next packet in the list. Thus, we + // do not have to increment it manually. + it = packet_list->erase(it); + continue; + } + } + } + ++it; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/red_payload_splitter.h b/third_party/libwebrtc/modules/audio_coding/neteq/red_payload_splitter.h new file mode 100644 index 0000000000..2f48e4b7d4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/red_payload_splitter.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_RED_PAYLOAD_SPLITTER_H_ +#define MODULES_AUDIO_CODING_NETEQ_RED_PAYLOAD_SPLITTER_H_ + +#include "modules/audio_coding/neteq/packet.h" + +namespace webrtc { + +class DecoderDatabase; + +static const size_t kRedHeaderLength = 4; // 4 bytes RED header. +static const size_t kRedLastHeaderLength = + 1; // reduced size for last RED header. +// This class handles splitting of RED payloads into smaller parts. +// Codec-specific packet splitting can be performed by +// AudioDecoder::ParsePayload. +class RedPayloadSplitter { + public: + RedPayloadSplitter() {} + + virtual ~RedPayloadSplitter() {} + + RedPayloadSplitter(const RedPayloadSplitter&) = delete; + RedPayloadSplitter& operator=(const RedPayloadSplitter&) = delete; + + // Splits each packet in `packet_list` into its separate RED payloads. Each + // RED payload is packetized into a Packet. The original elements in + // `packet_list` are properly deleted, and replaced by the new packets. + // Note that all packets in `packet_list` must be RED payloads, i.e., have + // RED headers according to RFC 2198 at the very beginning of the payload. + // Returns kOK or an error. + virtual bool SplitRed(PacketList* packet_list); + + // Checks all packets in `packet_list`. Packets that are DTMF events or + // comfort noise payloads are kept. Except that, only one single payload type + // is accepted. Any packet with another payload type is discarded. + virtual void CheckRedPayloads(PacketList* packet_list, + const DecoderDatabase& decoder_database); +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_RED_PAYLOAD_SPLITTER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/red_payload_splitter_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/red_payload_splitter_unittest.cc new file mode 100644 index 0000000000..a0ba5414ea --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/red_payload_splitter_unittest.cc @@ -0,0 +1,390 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for RedPayloadSplitter class. + +#include "modules/audio_coding/neteq/red_payload_splitter.h" + + +#include <memory> +#include <utility> // pair + +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "modules/audio_coding/neteq/decoder_database.h" +#include "modules/audio_coding/neteq/packet.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gtest.h" +#include "test/mock_audio_decoder_factory.h" + +using ::testing::Return; +using ::testing::ReturnNull; + +namespace webrtc { + +static const int kRedPayloadType = 100; +static const size_t kPayloadLength = 10; +static const uint16_t kSequenceNumber = 0; +static const uint32_t kBaseTimestamp = 0x12345678; + +// A possible Opus packet that contains FEC is the following. +// The frame is 20 ms in duration. +// +// 0 1 2 3 +// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// |0|0|0|0|1|0|0|0|x|1|x|x|x|x|x|x|x| | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +// | Compressed frame 1 (N-2 bytes)... : +// : | +// | | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +void CreateOpusFecPayload(uint8_t* payload, + size_t payload_length, + uint8_t payload_value) { + if (payload_length < 2) { + return; + } + payload[0] = 0x08; + payload[1] = 0x40; + memset(&payload[2], payload_value, payload_length - 2); +} + +// RED headers (according to RFC 2198): +// +// 0 1 2 3 +// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// |F| block PT | timestamp offset | block length | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// +// Last RED header: +// 0 1 2 3 4 5 6 7 +// +-+-+-+-+-+-+-+-+ +// |0| Block PT | +// +-+-+-+-+-+-+-+-+ + +// Creates a RED packet, with `num_payloads` payloads, with payload types given +// by the values in array `payload_types` (which must be of length +// `num_payloads`). Each redundant payload is `timestamp_offset` samples +// "behind" the the previous payload. +Packet CreateRedPayload(size_t num_payloads, + uint8_t* payload_types, + int timestamp_offset, + bool embed_opus_fec = false) { + Packet packet; + packet.payload_type = kRedPayloadType; + packet.timestamp = kBaseTimestamp; + packet.sequence_number = kSequenceNumber; + packet.payload.SetSize((kPayloadLength + 1) + + (num_payloads - 1) * + (kPayloadLength + kRedHeaderLength)); + uint8_t* payload_ptr = packet.payload.data(); + for (size_t i = 0; i < num_payloads; ++i) { + // Write the RED headers. + if (i == num_payloads - 1) { + // Special case for last payload. + *payload_ptr = payload_types[i] & 0x7F; // F = 0; + ++payload_ptr; + break; + } + *payload_ptr = payload_types[i] & 0x7F; + // Not the last block; set F = 1. + *payload_ptr |= 0x80; + ++payload_ptr; + int this_offset = + rtc::checked_cast<int>((num_payloads - i - 1) * timestamp_offset); + *payload_ptr = this_offset >> 6; + ++payload_ptr; + RTC_DCHECK_LE(kPayloadLength, 1023); // Max length described by 10 bits. + *payload_ptr = ((this_offset & 0x3F) << 2) | (kPayloadLength >> 8); + ++payload_ptr; + *payload_ptr = kPayloadLength & 0xFF; + ++payload_ptr; + } + for (size_t i = 0; i < num_payloads; ++i) { + // Write `i` to all bytes in each payload. + if (embed_opus_fec) { + CreateOpusFecPayload(payload_ptr, kPayloadLength, + static_cast<uint8_t>(i)); + } else { + memset(payload_ptr, static_cast<int>(i), kPayloadLength); + } + payload_ptr += kPayloadLength; + } + return packet; +} + +// Create a packet with all payload bytes set to `payload_value`. +Packet CreatePacket(uint8_t payload_type, + size_t payload_length, + uint8_t payload_value, + bool opus_fec = false) { + Packet packet; + packet.payload_type = payload_type; + packet.timestamp = kBaseTimestamp; + packet.sequence_number = kSequenceNumber; + packet.payload.SetSize(payload_length); + if (opus_fec) { + CreateOpusFecPayload(packet.payload.data(), packet.payload.size(), + payload_value); + } else { + memset(packet.payload.data(), payload_value, packet.payload.size()); + } + return packet; +} + +// Checks that `packet` has the attributes given in the remaining parameters. +void VerifyPacket(const Packet& packet, + size_t payload_length, + uint8_t payload_type, + uint16_t sequence_number, + uint32_t timestamp, + uint8_t payload_value, + Packet::Priority priority) { + EXPECT_EQ(payload_length, packet.payload.size()); + EXPECT_EQ(payload_type, packet.payload_type); + EXPECT_EQ(sequence_number, packet.sequence_number); + EXPECT_EQ(timestamp, packet.timestamp); + EXPECT_EQ(priority, packet.priority); + ASSERT_FALSE(packet.payload.empty()); + for (size_t i = 0; i < packet.payload.size(); ++i) { + ASSERT_EQ(payload_value, packet.payload.data()[i]); + } +} + +void VerifyPacket(const Packet& packet, + size_t payload_length, + uint8_t payload_type, + uint16_t sequence_number, + uint32_t timestamp, + uint8_t payload_value, + bool primary) { + return VerifyPacket(packet, payload_length, payload_type, sequence_number, + timestamp, payload_value, + Packet::Priority{0, primary ? 0 : 1}); +} + +// Start of test definitions. + +TEST(RedPayloadSplitter, CreateAndDestroy) { + RedPayloadSplitter* splitter = new RedPayloadSplitter; + delete splitter; +} + +// Packet A is split into A1 and A2. +TEST(RedPayloadSplitter, OnePacketTwoPayloads) { + uint8_t payload_types[] = {0, 0}; + const int kTimestampOffset = 160; + PacketList packet_list; + packet_list.push_back(CreateRedPayload(2, payload_types, kTimestampOffset)); + RedPayloadSplitter splitter; + EXPECT_TRUE(splitter.SplitRed(&packet_list)); + ASSERT_EQ(2u, packet_list.size()); + // Check first packet. The first in list should always be the primary payload. + VerifyPacket(packet_list.front(), kPayloadLength, payload_types[1], + kSequenceNumber, kBaseTimestamp, 1, true); + packet_list.pop_front(); + // Check second packet. + VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0], + kSequenceNumber, kBaseTimestamp - kTimestampOffset, 0, false); +} + +// Packets A and B are not split at all. Only the RED header in each packet is +// removed. +TEST(RedPayloadSplitter, TwoPacketsOnePayload) { + uint8_t payload_types[] = {0}; + const int kTimestampOffset = 160; + // Create first packet, with a single RED payload. + PacketList packet_list; + packet_list.push_back(CreateRedPayload(1, payload_types, kTimestampOffset)); + // Create second packet, with a single RED payload. + { + Packet packet = CreateRedPayload(1, payload_types, kTimestampOffset); + // Manually change timestamp and sequence number of second packet. + packet.timestamp += kTimestampOffset; + packet.sequence_number++; + packet_list.push_back(std::move(packet)); + } + RedPayloadSplitter splitter; + EXPECT_TRUE(splitter.SplitRed(&packet_list)); + ASSERT_EQ(2u, packet_list.size()); + // Check first packet. + VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0], + kSequenceNumber, kBaseTimestamp, 0, true); + packet_list.pop_front(); + // Check second packet. + VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0], + kSequenceNumber + 1, kBaseTimestamp + kTimestampOffset, 0, true); +} + +// Packets A and B are split into packets A1, A2, A3, B1, B2, B3, with +// attributes as follows: +// +// A1* A2 A3 B1* B2 B3 +// Payload type 0 1 2 0 1 2 +// Timestamp b b-o b-2o b+o b b-o +// Sequence number 0 0 0 1 1 1 +// +// b = kBaseTimestamp, o = kTimestampOffset, * = primary. +TEST(RedPayloadSplitter, TwoPacketsThreePayloads) { + uint8_t payload_types[] = {2, 1, 0}; // Primary is the last one. + const int kTimestampOffset = 160; + // Create first packet, with 3 RED payloads. + PacketList packet_list; + packet_list.push_back(CreateRedPayload(3, payload_types, kTimestampOffset)); + // Create first packet, with 3 RED payloads. + { + Packet packet = CreateRedPayload(3, payload_types, kTimestampOffset); + // Manually change timestamp and sequence number of second packet. + packet.timestamp += kTimestampOffset; + packet.sequence_number++; + packet_list.push_back(std::move(packet)); + } + RedPayloadSplitter splitter; + EXPECT_TRUE(splitter.SplitRed(&packet_list)); + ASSERT_EQ(6u, packet_list.size()); + // Check first packet, A1. + VerifyPacket(packet_list.front(), kPayloadLength, payload_types[2], + kSequenceNumber, kBaseTimestamp, 2, {0, 0}); + packet_list.pop_front(); + // Check second packet, A2. + VerifyPacket(packet_list.front(), kPayloadLength, payload_types[1], + kSequenceNumber, kBaseTimestamp - kTimestampOffset, 1, {0, 1}); + packet_list.pop_front(); + // Check third packet, A3. + VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0], + kSequenceNumber, kBaseTimestamp - 2 * kTimestampOffset, 0, + {0, 2}); + packet_list.pop_front(); + // Check fourth packet, B1. + VerifyPacket(packet_list.front(), kPayloadLength, payload_types[2], + kSequenceNumber + 1, kBaseTimestamp + kTimestampOffset, 2, + {0, 0}); + packet_list.pop_front(); + // Check fifth packet, B2. + VerifyPacket(packet_list.front(), kPayloadLength, payload_types[1], + kSequenceNumber + 1, kBaseTimestamp, 1, {0, 1}); + packet_list.pop_front(); + // Check sixth packet, B3. + VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0], + kSequenceNumber + 1, kBaseTimestamp - kTimestampOffset, 0, + {0, 2}); +} + +// Creates a list with 4 packets with these payload types: +// 0 = CNGnb +// 1 = PCMu +// 2 = DTMF (AVT) +// 3 = iLBC +// We expect the method CheckRedPayloads to discard the iLBC packet, since it +// is a non-CNG, non-DTMF payload of another type than the first speech payload +// found in the list (which is PCMu). +TEST(RedPayloadSplitter, CheckRedPayloads) { + PacketList packet_list; + for (uint8_t i = 0; i <= 3; ++i) { + // Create packet with payload type `i`, payload length 10 bytes, all 0. + packet_list.push_back(CreatePacket(i, 10, 0)); + } + + // Use a real DecoderDatabase object here instead of a mock, since it is + // easier to just register the payload types and let the actual implementation + // do its job. + DecoderDatabase decoder_database( + rtc::make_ref_counted<MockAudioDecoderFactory>(), absl::nullopt); + decoder_database.RegisterPayload(0, SdpAudioFormat("cn", 8000, 1)); + decoder_database.RegisterPayload(1, SdpAudioFormat("pcmu", 8000, 1)); + decoder_database.RegisterPayload(2, + SdpAudioFormat("telephone-event", 8000, 1)); + decoder_database.RegisterPayload(3, SdpAudioFormat("ilbc", 8000, 1)); + + RedPayloadSplitter splitter; + splitter.CheckRedPayloads(&packet_list, decoder_database); + + ASSERT_EQ(3u, packet_list.size()); // Should have dropped the last packet. + // Verify packets. The loop verifies that payload types 0, 1, and 2 are in the + // list. + for (int i = 0; i <= 2; ++i) { + VerifyPacket(packet_list.front(), 10, i, kSequenceNumber, kBaseTimestamp, 0, + true); + packet_list.pop_front(); + } + EXPECT_TRUE(packet_list.empty()); +} + +// This test creates a RED packet where the payloads also have the payload type +// for RED. That is, some kind of weird nested RED packet. This is not supported +// and the splitter should discard all packets. +TEST(RedPayloadSplitter, CheckRedPayloadsRecursiveRed) { + PacketList packet_list; + for (uint8_t i = 0; i <= 3; ++i) { + // Create packet with RED payload type, payload length 10 bytes, all 0. + packet_list.push_back(CreatePacket(kRedPayloadType, 10, 0)); + } + + // Use a real DecoderDatabase object here instead of a mock, since it is + // easier to just register the payload types and let the actual implementation + // do its job. + DecoderDatabase decoder_database( + rtc::make_ref_counted<MockAudioDecoderFactory>(), absl::nullopt); + decoder_database.RegisterPayload(kRedPayloadType, + SdpAudioFormat("red", 8000, 1)); + + RedPayloadSplitter splitter; + splitter.CheckRedPayloads(&packet_list, decoder_database); + + EXPECT_TRUE(packet_list.empty()); // Should have dropped all packets. +} + +// Packet A is split into A1, A2 and A3. But the length parameter is off, so +// the last payloads should be discarded. +TEST(RedPayloadSplitter, WrongPayloadLength) { + uint8_t payload_types[] = {0, 0, 0}; + const int kTimestampOffset = 160; + PacketList packet_list; + { + Packet packet = CreateRedPayload(3, payload_types, kTimestampOffset); + // Manually tamper with the payload length of the packet. + // This is one byte too short for the second payload (out of three). + // We expect only the first payload to be returned. + packet.payload.SetSize(packet.payload.size() - (kPayloadLength + 1)); + packet_list.push_back(std::move(packet)); + } + RedPayloadSplitter splitter; + EXPECT_FALSE(splitter.SplitRed(&packet_list)); + ASSERT_EQ(1u, packet_list.size()); + // Check first packet. + VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0], + kSequenceNumber, kBaseTimestamp - 2 * kTimestampOffset, 0, + {0, 2}); + packet_list.pop_front(); +} + +// Test that we reject packets too short to contain a RED header. +TEST(RedPayloadSplitter, RejectsIncompleteHeaders) { + RedPayloadSplitter splitter; + + uint8_t payload_types[] = {0, 0}; + const int kTimestampOffset = 160; + + PacketList packet_list; + + // Truncate the packet such that the first block can not be parsed. + packet_list.push_back(CreateRedPayload(2, payload_types, kTimestampOffset)); + packet_list.front().payload.SetSize(4); + EXPECT_FALSE(splitter.SplitRed(&packet_list)); + EXPECT_FALSE(packet_list.empty()); + + // Truncate the packet such that the first block can not be parsed. + packet_list.front().payload.SetSize(3); + EXPECT_FALSE(splitter.SplitRed(&packet_list)); + EXPECT_FALSE(packet_list.empty()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/reorder_optimizer.cc b/third_party/libwebrtc/modules/audio_coding/neteq/reorder_optimizer.cc new file mode 100644 index 0000000000..f6e073fc88 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/reorder_optimizer.cc @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/reorder_optimizer.h" + +#include <algorithm> +#include <limits> +#include <vector> + +namespace webrtc { + +namespace { + +constexpr int kDelayBuckets = 100; +constexpr int kBucketSizeMs = 20; + +} // namespace + +ReorderOptimizer::ReorderOptimizer(int forget_factor, + int ms_per_loss_percent, + absl::optional<int> start_forget_weight) + : histogram_(kDelayBuckets, forget_factor, start_forget_weight), + ms_per_loss_percent_(ms_per_loss_percent) {} + +void ReorderOptimizer::Update(int relative_delay_ms, + bool reordered, + int base_delay_ms) { + const int index = reordered ? relative_delay_ms / kBucketSizeMs : 0; + if (index < histogram_.NumBuckets()) { + // Maximum delay to register is 2000 ms. + histogram_.Add(index); + } + int bucket_index = MinimizeCostFunction(base_delay_ms); + optimal_delay_ms_ = (1 + bucket_index) * kBucketSizeMs; +} + +void ReorderOptimizer::Reset() { + histogram_.Reset(); + optimal_delay_ms_.reset(); +} + +int ReorderOptimizer::MinimizeCostFunction(int base_delay_ms) const { + const std::vector<int>& buckets = histogram_.buckets(); + + // Values are calculated in Q30. + int64_t loss_probability = 1 << 30; + int64_t min_cost = std::numeric_limits<int64_t>::max(); + int min_bucket = 0; + for (int i = 0; i < static_cast<int>(buckets.size()); ++i) { + loss_probability -= buckets[i]; + int64_t delay_ms = + static_cast<int64_t>(std::max(0, i * kBucketSizeMs - base_delay_ms)) + << 30; + int64_t cost = delay_ms + 100 * ms_per_loss_percent_ * loss_probability; + + if (cost < min_cost) { + min_cost = cost; + min_bucket = i; + } + if (loss_probability == 0) { + break; + } + } + + return min_bucket; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/reorder_optimizer.h b/third_party/libwebrtc/modules/audio_coding/neteq/reorder_optimizer.h new file mode 100644 index 0000000000..06f6bc7e50 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/reorder_optimizer.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_REORDER_OPTIMIZER_H_ +#define MODULES_AUDIO_CODING_NETEQ_REORDER_OPTIMIZER_H_ + +#include "absl/types/optional.h" +#include "modules/audio_coding/neteq/histogram.h" + +namespace webrtc { + +// Calculates an optimal delay to reduce the chance of missing reordered +// packets. The delay/loss trade-off can be tune using the `ms_per_loss_percent` +// parameter. +class ReorderOptimizer { + public: + ReorderOptimizer(int forget_factor, + int ms_per_loss_percent, + absl::optional<int> start_forget_weight); + + void Update(int relative_delay_ms, bool reordered, int base_delay_ms); + + absl::optional<int> GetOptimalDelayMs() const { return optimal_delay_ms_; } + + void Reset(); + + private: + int MinimizeCostFunction(int base_delay_ms) const; + + Histogram histogram_; + const int ms_per_loss_percent_; + absl::optional<int> optimal_delay_ms_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_REORDER_OPTIMIZER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/reorder_optimizer_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/reorder_optimizer_unittest.cc new file mode 100644 index 0000000000..aaa1062560 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/reorder_optimizer_unittest.cc @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/reorder_optimizer.h" + +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +constexpr int kForgetFactor = 32745; // 0.9993 in Q15. +constexpr int kMsPerLossPercent = 20; +constexpr int kStartForgetWeight = 1; + +} // namespace + +TEST(ReorderOptimizerTest, OnlyIncreaseDelayForReorderedPackets) { + ReorderOptimizer reorder_optimizer(kForgetFactor, kMsPerLossPercent, + kStartForgetWeight); + EXPECT_FALSE(reorder_optimizer.GetOptimalDelayMs()); + + // Delay should not increase for in-order packets. + reorder_optimizer.Update(60, /*reordered=*/false, 0); + EXPECT_EQ(reorder_optimizer.GetOptimalDelayMs(), 20); + + reorder_optimizer.Update(100, /*reordered=*/false, 0); + EXPECT_EQ(reorder_optimizer.GetOptimalDelayMs(), 20); + + reorder_optimizer.Update(80, /*reordered=*/true, 0); + EXPECT_EQ(reorder_optimizer.GetOptimalDelayMs(), 100); +} + +TEST(ReorderOptimizerTest, AvoidIncreasingDelayWhenProbabilityIsLow) { + ReorderOptimizer reorder_optimizer(kForgetFactor, kMsPerLossPercent, + kStartForgetWeight); + + reorder_optimizer.Update(40, /*reordered=*/true, 0); + reorder_optimizer.Update(40, /*reordered=*/true, 0); + reorder_optimizer.Update(40, /*reordered=*/true, 0); + EXPECT_EQ(reorder_optimizer.GetOptimalDelayMs(), 60); + + // The cost of the delay is too high relative the probability. + reorder_optimizer.Update(600, /*reordered=*/true, 0); + EXPECT_EQ(reorder_optimizer.GetOptimalDelayMs(), 60); +} + +TEST(ReorderOptimizerTest, BaseDelayIsSubtractedFromCost) { + constexpr int kBaseDelayMs = 200; + ReorderOptimizer reorder_optimizer(kForgetFactor, kMsPerLossPercent, + kStartForgetWeight); + + reorder_optimizer.Update(40, /*reordered=*/true, kBaseDelayMs); + reorder_optimizer.Update(40, /*reordered=*/true, kBaseDelayMs); + reorder_optimizer.Update(40, /*reordered=*/true, kBaseDelayMs); + EXPECT_EQ(reorder_optimizer.GetOptimalDelayMs(), 60); + + // The cost of the delay is too high relative the probability. + reorder_optimizer.Update(600, /*reordered=*/true, kBaseDelayMs); + EXPECT_EQ(reorder_optimizer.GetOptimalDelayMs(), 620); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/statistics_calculator.cc b/third_party/libwebrtc/modules/audio_coding/neteq/statistics_calculator.cc new file mode 100644 index 0000000000..52d3fa90f1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/statistics_calculator.cc @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/statistics_calculator.h" + +#include <string.h> // memset + +#include <algorithm> + +#include "absl/strings/string_view.h" +#include "modules/audio_coding/neteq/delay_manager.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { +size_t AddIntToSizeTWithLowerCap(int a, size_t b) { + const size_t ret = b + a; + // If a + b is negative, resulting in a negative wrap, cap it to zero instead. + static_assert(sizeof(size_t) >= sizeof(int), + "int must not be wider than size_t for this to work"); + return (a < 0 && ret > b) ? 0 : ret; +} + +constexpr int kInterruptionLenMs = 150; +} // namespace + +// Allocating the static const so that it can be passed by reference to +// RTC_DCHECK. +const size_t StatisticsCalculator::kLenWaitingTimes; + +StatisticsCalculator::PeriodicUmaLogger::PeriodicUmaLogger( + absl::string_view uma_name, + int report_interval_ms, + int max_value) + : uma_name_(uma_name), + report_interval_ms_(report_interval_ms), + max_value_(max_value), + timer_(0) {} + +StatisticsCalculator::PeriodicUmaLogger::~PeriodicUmaLogger() = default; + +void StatisticsCalculator::PeriodicUmaLogger::AdvanceClock(int step_ms) { + timer_ += step_ms; + if (timer_ < report_interval_ms_) { + return; + } + LogToUma(Metric()); + Reset(); + timer_ -= report_interval_ms_; + RTC_DCHECK_GE(timer_, 0); +} + +void StatisticsCalculator::PeriodicUmaLogger::LogToUma(int value) const { + RTC_HISTOGRAM_COUNTS_SPARSE(uma_name_, value, 1, max_value_, 50); +} + +StatisticsCalculator::PeriodicUmaCount::PeriodicUmaCount( + absl::string_view uma_name, + int report_interval_ms, + int max_value) + : PeriodicUmaLogger(uma_name, report_interval_ms, max_value) {} + +StatisticsCalculator::PeriodicUmaCount::~PeriodicUmaCount() { + // Log the count for the current (incomplete) interval. + LogToUma(Metric()); +} + +void StatisticsCalculator::PeriodicUmaCount::RegisterSample() { + ++counter_; +} + +int StatisticsCalculator::PeriodicUmaCount::Metric() const { + return counter_; +} + +void StatisticsCalculator::PeriodicUmaCount::Reset() { + counter_ = 0; +} + +StatisticsCalculator::PeriodicUmaAverage::PeriodicUmaAverage( + absl::string_view uma_name, + int report_interval_ms, + int max_value) + : PeriodicUmaLogger(uma_name, report_interval_ms, max_value) {} + +StatisticsCalculator::PeriodicUmaAverage::~PeriodicUmaAverage() { + // Log the average for the current (incomplete) interval. + LogToUma(Metric()); +} + +void StatisticsCalculator::PeriodicUmaAverage::RegisterSample(int value) { + sum_ += value; + ++counter_; +} + +int StatisticsCalculator::PeriodicUmaAverage::Metric() const { + return counter_ == 0 ? 0 : static_cast<int>(sum_ / counter_); +} + +void StatisticsCalculator::PeriodicUmaAverage::Reset() { + sum_ = 0.0; + counter_ = 0; +} + +StatisticsCalculator::StatisticsCalculator() + : preemptive_samples_(0), + accelerate_samples_(0), + expanded_speech_samples_(0), + expanded_noise_samples_(0), + timestamps_since_last_report_(0), + secondary_decoded_samples_(0), + discarded_secondary_packets_(0), + delayed_packet_outage_counter_( + "WebRTC.Audio.DelayedPacketOutageEventsPerMinute", + 60000, // 60 seconds report interval. + 100), + excess_buffer_delay_("WebRTC.Audio.AverageExcessBufferDelayMs", + 60000, // 60 seconds report interval. + 1000), + buffer_full_counter_("WebRTC.Audio.JitterBufferFullPerMinute", + 60000, // 60 seconds report interval. + 100) {} + +StatisticsCalculator::~StatisticsCalculator() = default; + +void StatisticsCalculator::Reset() { + preemptive_samples_ = 0; + accelerate_samples_ = 0; + expanded_speech_samples_ = 0; + expanded_noise_samples_ = 0; + secondary_decoded_samples_ = 0; + discarded_secondary_packets_ = 0; + waiting_times_.clear(); +} + +void StatisticsCalculator::ResetMcu() { + timestamps_since_last_report_ = 0; +} + +void StatisticsCalculator::ExpandedVoiceSamples(size_t num_samples, + bool is_new_concealment_event) { + expanded_speech_samples_ += num_samples; + ConcealedSamplesCorrection(rtc::dchecked_cast<int>(num_samples), true); + lifetime_stats_.concealment_events += is_new_concealment_event; +} + +void StatisticsCalculator::ExpandedNoiseSamples(size_t num_samples, + bool is_new_concealment_event) { + expanded_noise_samples_ += num_samples; + ConcealedSamplesCorrection(rtc::dchecked_cast<int>(num_samples), false); + lifetime_stats_.concealment_events += is_new_concealment_event; +} + +void StatisticsCalculator::ExpandedVoiceSamplesCorrection(int num_samples) { + expanded_speech_samples_ = + AddIntToSizeTWithLowerCap(num_samples, expanded_speech_samples_); + ConcealedSamplesCorrection(num_samples, true); +} + +void StatisticsCalculator::ExpandedNoiseSamplesCorrection(int num_samples) { + expanded_noise_samples_ = + AddIntToSizeTWithLowerCap(num_samples, expanded_noise_samples_); + ConcealedSamplesCorrection(num_samples, false); +} + +void StatisticsCalculator::DecodedOutputPlayed() { + decoded_output_played_ = true; +} + +void StatisticsCalculator::EndExpandEvent(int fs_hz) { + RTC_DCHECK_GE(lifetime_stats_.concealed_samples, + concealed_samples_at_event_end_); + const int event_duration_ms = + 1000 * + (lifetime_stats_.concealed_samples - concealed_samples_at_event_end_) / + fs_hz; + if (event_duration_ms >= kInterruptionLenMs && decoded_output_played_) { + lifetime_stats_.interruption_count++; + lifetime_stats_.total_interruption_duration_ms += event_duration_ms; + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.AudioInterruptionMs", event_duration_ms, + /*min=*/150, /*max=*/5000, /*bucket_count=*/50); + } + concealed_samples_at_event_end_ = lifetime_stats_.concealed_samples; +} + +void StatisticsCalculator::ConcealedSamplesCorrection(int num_samples, + bool is_voice) { + if (num_samples < 0) { + // Store negative correction to subtract from future positive additions. + // See also the function comment in the header file. + concealed_samples_correction_ -= num_samples; + if (!is_voice) { + silent_concealed_samples_correction_ -= num_samples; + } + return; + } + + const size_t canceled_out = + std::min(static_cast<size_t>(num_samples), concealed_samples_correction_); + concealed_samples_correction_ -= canceled_out; + lifetime_stats_.concealed_samples += num_samples - canceled_out; + + if (!is_voice) { + const size_t silent_canceled_out = std::min( + static_cast<size_t>(num_samples), silent_concealed_samples_correction_); + silent_concealed_samples_correction_ -= silent_canceled_out; + lifetime_stats_.silent_concealed_samples += + num_samples - silent_canceled_out; + } +} + +void StatisticsCalculator::PreemptiveExpandedSamples(size_t num_samples) { + preemptive_samples_ += num_samples; + operations_and_state_.preemptive_samples += num_samples; + lifetime_stats_.inserted_samples_for_deceleration += num_samples; +} + +void StatisticsCalculator::AcceleratedSamples(size_t num_samples) { + accelerate_samples_ += num_samples; + operations_and_state_.accelerate_samples += num_samples; + lifetime_stats_.removed_samples_for_acceleration += num_samples; +} + +void StatisticsCalculator::GeneratedNoiseSamples(size_t num_samples) { + lifetime_stats_.generated_noise_samples += num_samples; +} + +void StatisticsCalculator::PacketsDiscarded(size_t num_packets) { + lifetime_stats_.packets_discarded += num_packets; +} + +void StatisticsCalculator::SecondaryPacketsDiscarded(size_t num_packets) { + discarded_secondary_packets_ += num_packets; + lifetime_stats_.fec_packets_discarded += num_packets; +} + +void StatisticsCalculator::SecondaryPacketsReceived(size_t num_packets) { + lifetime_stats_.fec_packets_received += num_packets; +} + +void StatisticsCalculator::IncreaseCounter(size_t num_samples, int fs_hz) { + const int time_step_ms = + rtc::CheckedDivExact(static_cast<int>(1000 * num_samples), fs_hz); + delayed_packet_outage_counter_.AdvanceClock(time_step_ms); + excess_buffer_delay_.AdvanceClock(time_step_ms); + buffer_full_counter_.AdvanceClock(time_step_ms); + timestamps_since_last_report_ += static_cast<uint32_t>(num_samples); + if (timestamps_since_last_report_ > + static_cast<uint32_t>(fs_hz * kMaxReportPeriod)) { + timestamps_since_last_report_ = 0; + } + lifetime_stats_.total_samples_received += num_samples; +} + +void StatisticsCalculator::JitterBufferDelay( + size_t num_samples, + uint64_t waiting_time_ms, + uint64_t target_delay_ms, + uint64_t unlimited_target_delay_ms) { + lifetime_stats_.jitter_buffer_delay_ms += waiting_time_ms * num_samples; + lifetime_stats_.jitter_buffer_target_delay_ms += + target_delay_ms * num_samples; + lifetime_stats_.jitter_buffer_minimum_delay_ms += + unlimited_target_delay_ms * num_samples; + lifetime_stats_.jitter_buffer_emitted_count += num_samples; +} + +void StatisticsCalculator::SecondaryDecodedSamples(int num_samples) { + secondary_decoded_samples_ += num_samples; +} + +void StatisticsCalculator::FlushedPacketBuffer() { + operations_and_state_.packet_buffer_flushes++; + buffer_full_counter_.RegisterSample(); +} + +void StatisticsCalculator::ReceivedPacket() { + ++lifetime_stats_.jitter_buffer_packets_received; +} + +void StatisticsCalculator::RelativePacketArrivalDelay(size_t delay_ms) { + lifetime_stats_.relative_packet_arrival_delay_ms += delay_ms; +} + +void StatisticsCalculator::LogDelayedPacketOutageEvent(int num_samples, + int fs_hz) { + int outage_duration_ms = num_samples / (fs_hz / 1000); + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.DelayedPacketOutageEventMs", + outage_duration_ms, 1 /* min */, 2000 /* max */, + 100 /* bucket count */); + delayed_packet_outage_counter_.RegisterSample(); + lifetime_stats_.delayed_packet_outage_samples += num_samples; +} + +void StatisticsCalculator::StoreWaitingTime(int waiting_time_ms) { + excess_buffer_delay_.RegisterSample(waiting_time_ms); + RTC_DCHECK_LE(waiting_times_.size(), kLenWaitingTimes); + if (waiting_times_.size() == kLenWaitingTimes) { + // Erase first value. + waiting_times_.pop_front(); + } + waiting_times_.push_back(waiting_time_ms); + operations_and_state_.last_waiting_time_ms = waiting_time_ms; +} + +void StatisticsCalculator::GetNetworkStatistics(size_t samples_per_packet, + NetEqNetworkStatistics* stats) { + RTC_DCHECK(stats); + + stats->accelerate_rate = + CalculateQ14Ratio(accelerate_samples_, timestamps_since_last_report_); + + stats->preemptive_rate = + CalculateQ14Ratio(preemptive_samples_, timestamps_since_last_report_); + + stats->expand_rate = + CalculateQ14Ratio(expanded_speech_samples_ + expanded_noise_samples_, + timestamps_since_last_report_); + + stats->speech_expand_rate = CalculateQ14Ratio(expanded_speech_samples_, + timestamps_since_last_report_); + + stats->secondary_decoded_rate = CalculateQ14Ratio( + secondary_decoded_samples_, timestamps_since_last_report_); + + const size_t discarded_secondary_samples = + discarded_secondary_packets_ * samples_per_packet; + stats->secondary_discarded_rate = + CalculateQ14Ratio(discarded_secondary_samples, + static_cast<uint32_t>(discarded_secondary_samples + + secondary_decoded_samples_)); + + if (waiting_times_.size() == 0) { + stats->mean_waiting_time_ms = -1; + stats->median_waiting_time_ms = -1; + stats->min_waiting_time_ms = -1; + stats->max_waiting_time_ms = -1; + } else { + std::sort(waiting_times_.begin(), waiting_times_.end()); + // Find mid-point elements. If the size is odd, the two values + // `middle_left` and `middle_right` will both be the one middle element; if + // the size is even, they will be the the two neighboring elements at the + // middle of the list. + const int middle_left = waiting_times_[(waiting_times_.size() - 1) / 2]; + const int middle_right = waiting_times_[waiting_times_.size() / 2]; + // Calculate the average of the two. (Works also for odd sizes.) + stats->median_waiting_time_ms = (middle_left + middle_right) / 2; + stats->min_waiting_time_ms = waiting_times_.front(); + stats->max_waiting_time_ms = waiting_times_.back(); + double sum = 0; + for (auto time : waiting_times_) { + sum += time; + } + stats->mean_waiting_time_ms = static_cast<int>(sum / waiting_times_.size()); + } + + // Reset counters. + ResetMcu(); + Reset(); +} + +NetEqLifetimeStatistics StatisticsCalculator::GetLifetimeStatistics() const { + return lifetime_stats_; +} + +NetEqOperationsAndState StatisticsCalculator::GetOperationsAndState() const { + return operations_and_state_; +} + +uint16_t StatisticsCalculator::CalculateQ14Ratio(size_t numerator, + uint32_t denominator) { + if (numerator == 0) { + return 0; + } else if (numerator < denominator) { + // Ratio must be smaller than 1 in Q14. + RTC_DCHECK_LT((numerator << 14) / denominator, (1 << 14)); + return static_cast<uint16_t>((numerator << 14) / denominator); + } else { + // Will not produce a ratio larger than 1, since this is probably an error. + return 1 << 14; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/statistics_calculator.h b/third_party/libwebrtc/modules/audio_coding/neteq/statistics_calculator.h new file mode 100644 index 0000000000..33a22d02dd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/statistics_calculator.h @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_STATISTICS_CALCULATOR_H_ +#define MODULES_AUDIO_CODING_NETEQ_STATISTICS_CALCULATOR_H_ + +#include <deque> +#include <string> + +#include "absl/strings/string_view.h" +#include "api/neteq/neteq.h" + +namespace webrtc { + +class DelayManager; + +// This class handles various network statistics in NetEq. +class StatisticsCalculator { + public: + StatisticsCalculator(); + + virtual ~StatisticsCalculator(); + + StatisticsCalculator(const StatisticsCalculator&) = delete; + StatisticsCalculator& operator=(const StatisticsCalculator&) = delete; + + // Resets most of the counters. + void Reset(); + + // Resets the counters that are not handled by Reset(). + void ResetMcu(); + + // Reports that `num_samples` samples were produced through expansion, and + // that the expansion produced other than just noise samples. + void ExpandedVoiceSamples(size_t num_samples, bool is_new_concealment_event); + + // Reports that `num_samples` samples were produced through expansion, and + // that the expansion produced only noise samples. + void ExpandedNoiseSamples(size_t num_samples, bool is_new_concealment_event); + + // Corrects the statistics for number of samples produced through non-noise + // expansion by adding `num_samples` (negative or positive) to the current + // value. The result is capped to zero to avoid negative values. + void ExpandedVoiceSamplesCorrection(int num_samples); + + // Same as ExpandedVoiceSamplesCorrection but for noise samples. + void ExpandedNoiseSamplesCorrection(int num_samples); + + void DecodedOutputPlayed(); + + // Mark end of expand event; triggers some stats to be reported. + void EndExpandEvent(int fs_hz); + + // Reports that `num_samples` samples were produced through preemptive + // expansion. + void PreemptiveExpandedSamples(size_t num_samples); + + // Reports that `num_samples` samples were removed through accelerate. + void AcceleratedSamples(size_t num_samples); + + // Reports that `num_samples` comfort noise samples were generated. + void GeneratedNoiseSamples(size_t num_samples); + + // Reports that `num_packets` packets were discarded. + virtual void PacketsDiscarded(size_t num_packets); + + // Reports that `num_packets` secondary (FEC) packets were discarded. + virtual void SecondaryPacketsDiscarded(size_t num_packets); + + // Reports that `num_packets` secondary (FEC) packets were received. + virtual void SecondaryPacketsReceived(size_t num_packets); + + // Increases the report interval counter with `num_samples` at a sample rate + // of `fs_hz`. This is how the StatisticsCalculator gets notified that current + // time is increasing. + void IncreaseCounter(size_t num_samples, int fs_hz); + + // Update jitter buffer delay counter. + void JitterBufferDelay(size_t num_samples, + uint64_t waiting_time_ms, + uint64_t target_delay_ms, + uint64_t unlimited_target_delay_ms); + + // Stores new packet waiting time in waiting time statistics. + void StoreWaitingTime(int waiting_time_ms); + + // Reports that `num_samples` samples were decoded from secondary packets. + void SecondaryDecodedSamples(int num_samples); + + // Reports that the packet buffer was flushed. + void FlushedPacketBuffer(); + + // Reports that the jitter buffer received a packet. + void ReceivedPacket(); + + // Reports that a received packet was delayed by `delay_ms` milliseconds. + virtual void RelativePacketArrivalDelay(size_t delay_ms); + + // Logs a delayed packet outage event of `num_samples` expanded at a sample + // rate of `fs_hz`. A delayed packet outage event is defined as an expand + // period caused not by an actual packet loss, but by a delayed packet. + virtual void LogDelayedPacketOutageEvent(int num_samples, int fs_hz); + + // Returns the current network statistics in `stats`. The number of samples + // per packet is `samples_per_packet`. The method does not populate + // `preferred_buffer_size_ms`, `jitter_peaks_found` or `clockdrift_ppm`; use + // the PopulateDelayManagerStats method for those. + void GetNetworkStatistics(size_t samples_per_packet, + NetEqNetworkStatistics* stats); + + // Returns a copy of this class's lifetime statistics. These statistics are + // never reset. + NetEqLifetimeStatistics GetLifetimeStatistics() const; + + NetEqOperationsAndState GetOperationsAndState() const; + + private: + static const int kMaxReportPeriod = 60; // Seconds before auto-reset. + static const size_t kLenWaitingTimes = 100; + + class PeriodicUmaLogger { + public: + PeriodicUmaLogger(absl::string_view uma_name, + int report_interval_ms, + int max_value); + virtual ~PeriodicUmaLogger(); + void AdvanceClock(int step_ms); + + protected: + void LogToUma(int value) const; + virtual int Metric() const = 0; + virtual void Reset() = 0; + + const std::string uma_name_; + const int report_interval_ms_; + const int max_value_; + int timer_ = 0; + }; + + class PeriodicUmaCount final : public PeriodicUmaLogger { + public: + PeriodicUmaCount(absl::string_view uma_name, + int report_interval_ms, + int max_value); + ~PeriodicUmaCount() override; + void RegisterSample(); + + protected: + int Metric() const override; + void Reset() override; + + private: + int counter_ = 0; + }; + + class PeriodicUmaAverage final : public PeriodicUmaLogger { + public: + PeriodicUmaAverage(absl::string_view uma_name, + int report_interval_ms, + int max_value); + ~PeriodicUmaAverage() override; + void RegisterSample(int value); + + protected: + int Metric() const override; + void Reset() override; + + private: + double sum_ = 0.0; + int counter_ = 0; + }; + + // Corrects the concealed samples counter in lifetime_stats_. The value of + // num_samples_ is added directly to the stat if the correction is positive. + // If the correction is negative, it is cached and will be subtracted against + // future additions to the counter. This is meant to be called from + // Expanded{Voice,Noise}Samples{Correction}. + void ConcealedSamplesCorrection(int num_samples, bool is_voice); + + // Calculates numerator / denominator, and returns the value in Q14. + static uint16_t CalculateQ14Ratio(size_t numerator, uint32_t denominator); + + NetEqLifetimeStatistics lifetime_stats_; + NetEqOperationsAndState operations_and_state_; + size_t concealed_samples_correction_ = 0; + size_t silent_concealed_samples_correction_ = 0; + size_t preemptive_samples_; + size_t accelerate_samples_; + size_t expanded_speech_samples_; + size_t expanded_noise_samples_; + size_t concealed_samples_at_event_end_ = 0; + uint32_t timestamps_since_last_report_; + std::deque<int> waiting_times_; + uint32_t secondary_decoded_samples_; + size_t discarded_secondary_packets_; + PeriodicUmaCount delayed_packet_outage_counter_; + PeriodicUmaAverage excess_buffer_delay_; + PeriodicUmaCount buffer_full_counter_; + bool decoded_output_played_ = false; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_STATISTICS_CALCULATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/statistics_calculator_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/statistics_calculator_unittest.cc new file mode 100644 index 0000000000..491cd83dc4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/statistics_calculator_unittest.cc @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/statistics_calculator.h" + +#include "test/gtest.h" + +namespace webrtc { + +TEST(LifetimeStatistics, TotalSamplesReceived) { + StatisticsCalculator stats; + for (int i = 0; i < 10; ++i) { + stats.IncreaseCounter(480, 48000); // 10 ms at 48 kHz. + } + EXPECT_EQ(10 * 480u, stats.GetLifetimeStatistics().total_samples_received); +} + +TEST(LifetimeStatistics, SamplesConcealed) { + StatisticsCalculator stats; + stats.ExpandedVoiceSamples(100, false); + stats.ExpandedNoiseSamples(17, false); + EXPECT_EQ(100u + 17u, stats.GetLifetimeStatistics().concealed_samples); +} + +// This test verifies that a negative correction of concealed_samples does not +// result in a decrease in the stats value (because stats-consuming applications +// would not expect the value to decrease). Instead, the correction should be +// made to future increments to the stat. +TEST(LifetimeStatistics, SamplesConcealedCorrection) { + StatisticsCalculator stats; + stats.ExpandedVoiceSamples(100, false); + EXPECT_EQ(100u, stats.GetLifetimeStatistics().concealed_samples); + stats.ExpandedVoiceSamplesCorrection(-10); + // Do not subtract directly, but keep the correction for later. + EXPECT_EQ(100u, stats.GetLifetimeStatistics().concealed_samples); + stats.ExpandedVoiceSamplesCorrection(20); + // The total correction is 20 - 10. + EXPECT_EQ(110u, stats.GetLifetimeStatistics().concealed_samples); + + // Also test correction done to the next ExpandedVoiceSamples call. + stats.ExpandedVoiceSamplesCorrection(-17); + EXPECT_EQ(110u, stats.GetLifetimeStatistics().concealed_samples); + stats.ExpandedVoiceSamples(100, false); + EXPECT_EQ(110u + 100u - 17u, stats.GetLifetimeStatistics().concealed_samples); +} + +// This test verifies that neither "accelerate" nor "pre-emptive expand" reults +// in a modification to concealed_samples stats. Only PLC operations (i.e., +// "expand" and "merge") should affect the stat. +TEST(LifetimeStatistics, NoUpdateOnTimeStretch) { + StatisticsCalculator stats; + stats.ExpandedVoiceSamples(100, false); + stats.AcceleratedSamples(4711); + stats.PreemptiveExpandedSamples(17); + stats.ExpandedVoiceSamples(100, false); + EXPECT_EQ(200u, stats.GetLifetimeStatistics().concealed_samples); +} + +TEST(StatisticsCalculator, ExpandedSamplesCorrection) { + StatisticsCalculator stats; + NetEqNetworkStatistics stats_output; + constexpr int kSampleRateHz = 48000; + constexpr int k10MsSamples = kSampleRateHz / 100; + constexpr int kPacketSizeMs = 20; + constexpr size_t kSamplesPerPacket = kPacketSizeMs * kSampleRateHz / 1000; + + // Advance time by 10 ms. + stats.IncreaseCounter(k10MsSamples, kSampleRateHz); + + stats.GetNetworkStatistics(kSamplesPerPacket, &stats_output); + + EXPECT_EQ(0u, stats_output.expand_rate); + EXPECT_EQ(0u, stats_output.speech_expand_rate); + + // Correct with a negative value. + stats.ExpandedVoiceSamplesCorrection(-100); + stats.ExpandedNoiseSamplesCorrection(-100); + stats.IncreaseCounter(k10MsSamples, kSampleRateHz); + stats.GetNetworkStatistics(kSamplesPerPacket, &stats_output); + // Expect no change, since negative values are disallowed. + EXPECT_EQ(0u, stats_output.expand_rate); + EXPECT_EQ(0u, stats_output.speech_expand_rate); + + // Correct with a positive value. + stats.ExpandedVoiceSamplesCorrection(50); + stats.ExpandedNoiseSamplesCorrection(200); + stats.IncreaseCounter(k10MsSamples, kSampleRateHz); + stats.GetNetworkStatistics(kSamplesPerPacket, &stats_output); + // Calculate expected rates in Q14. Expand rate is noise + voice, while + // speech expand rate is only voice. + EXPECT_EQ(((50u + 200u) << 14) / k10MsSamples, stats_output.expand_rate); + EXPECT_EQ((50u << 14) / k10MsSamples, stats_output.speech_expand_rate); +} + +TEST(StatisticsCalculator, RelativePacketArrivalDelay) { + StatisticsCalculator stats; + + stats.RelativePacketArrivalDelay(50); + NetEqLifetimeStatistics stats_output = stats.GetLifetimeStatistics(); + EXPECT_EQ(50u, stats_output.relative_packet_arrival_delay_ms); + + stats.RelativePacketArrivalDelay(20); + stats_output = stats.GetLifetimeStatistics(); + EXPECT_EQ(70u, stats_output.relative_packet_arrival_delay_ms); +} + +TEST(StatisticsCalculator, ReceivedPacket) { + StatisticsCalculator stats; + + stats.ReceivedPacket(); + NetEqLifetimeStatistics stats_output = stats.GetLifetimeStatistics(); + EXPECT_EQ(1u, stats_output.jitter_buffer_packets_received); + + stats.ReceivedPacket(); + stats_output = stats.GetLifetimeStatistics(); + EXPECT_EQ(2u, stats_output.jitter_buffer_packets_received); +} + +TEST(StatisticsCalculator, InterruptionCounter) { + constexpr int fs_khz = 48; + constexpr int fs_hz = fs_khz * 1000; + StatisticsCalculator stats; + stats.DecodedOutputPlayed(); + stats.EndExpandEvent(fs_hz); + auto lts = stats.GetLifetimeStatistics(); + EXPECT_EQ(0, lts.interruption_count); + EXPECT_EQ(0, lts.total_interruption_duration_ms); + + // Add an event that is shorter than 150 ms. Should not be logged. + stats.ExpandedVoiceSamples(10 * fs_khz, false); // 10 ms. + stats.ExpandedNoiseSamples(139 * fs_khz, false); // 139 ms. + stats.EndExpandEvent(fs_hz); + lts = stats.GetLifetimeStatistics(); + EXPECT_EQ(0, lts.interruption_count); + + // Add an event that is longer than 150 ms. Should be logged. + stats.ExpandedVoiceSamples(140 * fs_khz, false); // 140 ms. + stats.ExpandedNoiseSamples(11 * fs_khz, false); // 11 ms. + stats.EndExpandEvent(fs_hz); + lts = stats.GetLifetimeStatistics(); + EXPECT_EQ(1, lts.interruption_count); + EXPECT_EQ(151, lts.total_interruption_duration_ms); + + // Add one more long event. + stats.ExpandedVoiceSamples(100 * fs_khz, false); // 100 ms. + stats.ExpandedNoiseSamples(5000 * fs_khz, false); // 5000 ms. + stats.EndExpandEvent(fs_hz); + lts = stats.GetLifetimeStatistics(); + EXPECT_EQ(2, lts.interruption_count); + EXPECT_EQ(5100 + 151, lts.total_interruption_duration_ms); +} + +TEST(StatisticsCalculator, InterruptionCounterDoNotLogBeforeDecoding) { + constexpr int fs_khz = 48; + constexpr int fs_hz = fs_khz * 1000; + StatisticsCalculator stats; + + // Add an event that is longer than 150 ms. Should normally be logged, but we + // have not called DecodedOutputPlayed() yet, so it shouldn't this time. + stats.ExpandedVoiceSamples(151 * fs_khz, false); // 151 ms. + stats.EndExpandEvent(fs_hz); + auto lts = stats.GetLifetimeStatistics(); + EXPECT_EQ(0, lts.interruption_count); + + // Call DecodedOutputPlayed(). Logging should happen after this. + stats.DecodedOutputPlayed(); + + // Add one more long event. + stats.ExpandedVoiceSamples(151 * fs_khz, false); // 151 ms. + stats.EndExpandEvent(fs_hz); + lts = stats.GetLifetimeStatistics(); + EXPECT_EQ(1, lts.interruption_count); +} + +TEST(StatisticsCalculator, DiscardedPackets) { + StatisticsCalculator statistics_calculator; + EXPECT_EQ(0u, + statistics_calculator.GetLifetimeStatistics().packets_discarded); + + statistics_calculator.PacketsDiscarded(1); + EXPECT_EQ(1u, + statistics_calculator.GetLifetimeStatistics().packets_discarded); + + statistics_calculator.PacketsDiscarded(10); + EXPECT_EQ(11u, + statistics_calculator.GetLifetimeStatistics().packets_discarded); + + // Calling `SecondaryPacketsDiscarded` does not modify `packets_discarded`. + statistics_calculator.SecondaryPacketsDiscarded(1); + EXPECT_EQ(11u, + statistics_calculator.GetLifetimeStatistics().packets_discarded); + + // Calling `FlushedPacketBuffer` does not modify `packets_discarded`. + statistics_calculator.FlushedPacketBuffer(); + EXPECT_EQ(11u, + statistics_calculator.GetLifetimeStatistics().packets_discarded); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/sync_buffer.cc b/third_party/libwebrtc/modules/audio_coding/neteq/sync_buffer.cc new file mode 100644 index 0000000000..7d7cac7157 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/sync_buffer.cc @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/sync_buffer.h" + +#include <algorithm> // Access to min. + +#include "rtc_base/checks.h" + +namespace webrtc { + +size_t SyncBuffer::FutureLength() const { + return Size() - next_index_; +} + +void SyncBuffer::PushBack(const AudioMultiVector& append_this) { + size_t samples_added = append_this.Size(); + AudioMultiVector::PushBack(append_this); + AudioMultiVector::PopFront(samples_added); + if (samples_added <= next_index_) { + next_index_ -= samples_added; + } else { + // This means that we are pushing out future data that was never used. + // RTC_DCHECK_NOTREACHED(); + // TODO(hlundin): This assert must be disabled to support 60 ms frames. + // This should not happen even for 60 ms frames, but it does. Investigate + // why. + next_index_ = 0; + } + dtmf_index_ -= std::min(dtmf_index_, samples_added); +} + +void SyncBuffer::PushBackInterleaved(const rtc::BufferT<int16_t>& append_this) { + const size_t size_before_adding = Size(); + AudioMultiVector::PushBackInterleaved(append_this); + const size_t samples_added_per_channel = Size() - size_before_adding; + RTC_DCHECK_EQ(samples_added_per_channel * Channels(), append_this.size()); + AudioMultiVector::PopFront(samples_added_per_channel); + next_index_ -= std::min(next_index_, samples_added_per_channel); + dtmf_index_ -= std::min(dtmf_index_, samples_added_per_channel); +} + +void SyncBuffer::PushFrontZeros(size_t length) { + InsertZerosAtIndex(length, 0); +} + +void SyncBuffer::InsertZerosAtIndex(size_t length, size_t position) { + position = std::min(position, Size()); + length = std::min(length, Size() - position); + AudioMultiVector::PopBack(length); + for (size_t channel = 0; channel < Channels(); ++channel) { + channels_[channel]->InsertZerosAt(length, position); + } + if (next_index_ >= position) { + // We are moving the `next_index_` sample. + set_next_index(next_index_ + length); // Overflow handled by subfunction. + } + if (dtmf_index_ > 0 && dtmf_index_ >= position) { + // We are moving the `dtmf_index_` sample. + set_dtmf_index(dtmf_index_ + length); // Overflow handled by subfunction. + } +} + +void SyncBuffer::ReplaceAtIndex(const AudioMultiVector& insert_this, + size_t length, + size_t position) { + position = std::min(position, Size()); // Cap `position` in the valid range. + length = std::min(length, Size() - position); + AudioMultiVector::OverwriteAt(insert_this, length, position); +} + +void SyncBuffer::ReplaceAtIndex(const AudioMultiVector& insert_this, + size_t position) { + ReplaceAtIndex(insert_this, insert_this.Size(), position); +} + +void SyncBuffer::GetNextAudioInterleaved(size_t requested_len, + AudioFrame* output) { + RTC_DCHECK(output); + const size_t samples_to_read = std::min(FutureLength(), requested_len); + output->ResetWithoutMuting(); + const size_t tot_samples_read = ReadInterleavedFromIndex( + next_index_, samples_to_read, output->mutable_data()); + const size_t samples_read_per_channel = tot_samples_read / Channels(); + next_index_ += samples_read_per_channel; + output->num_channels_ = Channels(); + output->samples_per_channel_ = samples_read_per_channel; +} + +void SyncBuffer::IncreaseEndTimestamp(uint32_t increment) { + end_timestamp_ += increment; +} + +void SyncBuffer::Flush() { + Zeros(Size()); + next_index_ = Size(); + end_timestamp_ = 0; + dtmf_index_ = 0; +} + +void SyncBuffer::set_next_index(size_t value) { + // Cannot set `next_index_` larger than the size of the buffer. + next_index_ = std::min(value, Size()); +} + +void SyncBuffer::set_dtmf_index(size_t value) { + // Cannot set `dtmf_index_` larger than the size of the buffer. + dtmf_index_ = std::min(value, Size()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/sync_buffer.h b/third_party/libwebrtc/modules/audio_coding/neteq/sync_buffer.h new file mode 100644 index 0000000000..cf56c432e3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/sync_buffer.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_SYNC_BUFFER_H_ +#define MODULES_AUDIO_CODING_NETEQ_SYNC_BUFFER_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <vector> + +#include "api/audio/audio_frame.h" +#include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "modules/audio_coding/neteq/audio_vector.h" +#include "rtc_base/buffer.h" + +namespace webrtc { + +class SyncBuffer : public AudioMultiVector { + public: + SyncBuffer(size_t channels, size_t length) + : AudioMultiVector(channels, length), + next_index_(length), + end_timestamp_(0), + dtmf_index_(0) {} + + SyncBuffer(const SyncBuffer&) = delete; + SyncBuffer& operator=(const SyncBuffer&) = delete; + + // Returns the number of samples yet to play out from the buffer. + size_t FutureLength() const; + + // Adds the contents of `append_this` to the back of the SyncBuffer. Removes + // the same number of samples from the beginning of the SyncBuffer, to + // maintain a constant buffer size. The `next_index_` is updated to reflect + // the move of the beginning of "future" data. + void PushBack(const AudioMultiVector& append_this) override; + + // Like PushBack, but reads the samples channel-interleaved from the input. + void PushBackInterleaved(const rtc::BufferT<int16_t>& append_this); + + // Adds `length` zeros to the beginning of each channel. Removes + // the same number of samples from the end of the SyncBuffer, to + // maintain a constant buffer size. The `next_index_` is updated to reflect + // the move of the beginning of "future" data. + // Note that this operation may delete future samples that are waiting to + // be played. + void PushFrontZeros(size_t length); + + // Inserts `length` zeros into each channel at index `position`. The size of + // the SyncBuffer is kept constant, which means that the last `length` + // elements in each channel will be purged. + virtual void InsertZerosAtIndex(size_t length, size_t position); + + // Overwrites each channel in this SyncBuffer with values taken from + // `insert_this`. The values are taken from the beginning of `insert_this` and + // are inserted starting at `position`. `length` values are written into each + // channel. The size of the SyncBuffer is kept constant. That is, if `length` + // and `position` are selected such that the new data would extend beyond the + // end of the current SyncBuffer, the buffer is not extended. + // The `next_index_` is not updated. + virtual void ReplaceAtIndex(const AudioMultiVector& insert_this, + size_t length, + size_t position); + + // Same as the above method, but where all of `insert_this` is written (with + // the same constraints as above, that the SyncBuffer is not extended). + virtual void ReplaceAtIndex(const AudioMultiVector& insert_this, + size_t position); + + // Reads `requested_len` samples from each channel and writes them interleaved + // into `output`. The `next_index_` is updated to point to the sample to read + // next time. The AudioFrame `output` is first reset, and the `data_`, + // `num_channels_`, and `samples_per_channel_` fields are updated. + void GetNextAudioInterleaved(size_t requested_len, AudioFrame* output); + + // Adds `increment` to `end_timestamp_`. + void IncreaseEndTimestamp(uint32_t increment); + + // Flushes the buffer. The buffer will contain only zeros after the flush, and + // `next_index_` will point to the end, like when the buffer was first + // created. + void Flush(); + + const AudioVector& Channel(size_t n) const { return *channels_[n]; } + AudioVector& Channel(size_t n) { return *channels_[n]; } + + // Accessors and mutators. + size_t next_index() const { return next_index_; } + void set_next_index(size_t value); + uint32_t end_timestamp() const { return end_timestamp_; } + void set_end_timestamp(uint32_t value) { end_timestamp_ = value; } + size_t dtmf_index() const { return dtmf_index_; } + void set_dtmf_index(size_t value); + + private: + size_t next_index_; + uint32_t end_timestamp_; // The timestamp of the last sample in the buffer. + size_t dtmf_index_; // Index to the first non-DTMF sample in the buffer. +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_SYNC_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/sync_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/sync_buffer_unittest.cc new file mode 100644 index 0000000000..bdcd92446b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/sync_buffer_unittest.cc @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/sync_buffer.h" + +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(SyncBuffer, CreateAndDestroy) { + // Create a SyncBuffer with two channels and 10 samples each. + static const size_t kLen = 10; + static const size_t kChannels = 2; + SyncBuffer sync_buffer(kChannels, kLen); + EXPECT_EQ(kChannels, sync_buffer.Channels()); + EXPECT_EQ(kLen, sync_buffer.Size()); + // When the buffer is empty, the next index to play out is at the end. + EXPECT_EQ(kLen, sync_buffer.next_index()); + // Verify that all elements are zero. + for (size_t channel = 0; channel < kChannels; ++channel) { + for (size_t i = 0; i < kLen; ++i) { + EXPECT_EQ(0, sync_buffer[channel][i]); + } + } +} + +TEST(SyncBuffer, SetNextIndex) { + // Create a SyncBuffer with two channels and 100 samples each. + static const size_t kLen = 100; + static const size_t kChannels = 2; + SyncBuffer sync_buffer(kChannels, kLen); + sync_buffer.set_next_index(0); + EXPECT_EQ(0u, sync_buffer.next_index()); + sync_buffer.set_next_index(kLen / 2); + EXPECT_EQ(kLen / 2, sync_buffer.next_index()); + sync_buffer.set_next_index(kLen); + EXPECT_EQ(kLen, sync_buffer.next_index()); + // Try to set larger than the buffer size; should cap at buffer size. + sync_buffer.set_next_index(kLen + 1); + EXPECT_EQ(kLen, sync_buffer.next_index()); +} + +TEST(SyncBuffer, PushBackAndFlush) { + // Create a SyncBuffer with two channels and 100 samples each. + static const size_t kLen = 100; + static const size_t kChannels = 2; + SyncBuffer sync_buffer(kChannels, kLen); + static const size_t kNewLen = 10; + AudioMultiVector new_data(kChannels, kNewLen); + // Populate `new_data`. + for (size_t channel = 0; channel < kChannels; ++channel) { + for (size_t i = 0; i < kNewLen; ++i) { + new_data[channel][i] = rtc::checked_cast<int16_t>(i); + } + } + // Push back `new_data` into `sync_buffer`. This operation should pop out + // data from the front of `sync_buffer`, so that the size of the buffer + // remains the same. The `next_index_` should also move with the same length. + sync_buffer.PushBack(new_data); + ASSERT_EQ(kLen, sync_buffer.Size()); + // Verify that `next_index_` moved accordingly. + EXPECT_EQ(kLen - kNewLen, sync_buffer.next_index()); + // Verify the new contents. + for (size_t channel = 0; channel < kChannels; ++channel) { + for (size_t i = 0; i < kNewLen; ++i) { + EXPECT_EQ(new_data[channel][i], + sync_buffer[channel][sync_buffer.next_index() + i]); + } + } + + // Now flush the buffer, and verify that it is all zeros, and that next_index + // points to the end. + sync_buffer.Flush(); + ASSERT_EQ(kLen, sync_buffer.Size()); + EXPECT_EQ(kLen, sync_buffer.next_index()); + for (size_t channel = 0; channel < kChannels; ++channel) { + for (size_t i = 0; i < kLen; ++i) { + EXPECT_EQ(0, sync_buffer[channel][i]); + } + } +} + +TEST(SyncBuffer, PushFrontZeros) { + // Create a SyncBuffer with two channels and 100 samples each. + static const size_t kLen = 100; + static const size_t kChannels = 2; + SyncBuffer sync_buffer(kChannels, kLen); + static const size_t kNewLen = 10; + AudioMultiVector new_data(kChannels, kNewLen); + // Populate `new_data`. + for (size_t channel = 0; channel < kChannels; ++channel) { + for (size_t i = 0; i < kNewLen; ++i) { + new_data[channel][i] = rtc::checked_cast<int16_t>(1000 + i); + } + } + sync_buffer.PushBack(new_data); + EXPECT_EQ(kLen, sync_buffer.Size()); + + // Push `kNewLen` - 1 zeros into each channel in the front of the SyncBuffer. + sync_buffer.PushFrontZeros(kNewLen - 1); + EXPECT_EQ(kLen, sync_buffer.Size()); // Size should remain the same. + // Verify that `next_index_` moved accordingly. Should be at the end - 1. + EXPECT_EQ(kLen - 1, sync_buffer.next_index()); + // Verify the zeros. + for (size_t channel = 0; channel < kChannels; ++channel) { + for (size_t i = 0; i < kNewLen - 1; ++i) { + EXPECT_EQ(0, sync_buffer[channel][i]); + } + } + // Verify that the correct data is at the end of the SyncBuffer. + for (size_t channel = 0; channel < kChannels; ++channel) { + EXPECT_EQ(1000, sync_buffer[channel][sync_buffer.next_index()]); + } +} + +TEST(SyncBuffer, GetNextAudioInterleaved) { + // Create a SyncBuffer with two channels and 100 samples each. + static const size_t kLen = 100; + static const size_t kChannels = 2; + SyncBuffer sync_buffer(kChannels, kLen); + static const size_t kNewLen = 10; + AudioMultiVector new_data(kChannels, kNewLen); + // Populate `new_data`. + for (size_t channel = 0; channel < kChannels; ++channel) { + for (size_t i = 0; i < kNewLen; ++i) { + new_data[channel][i] = rtc::checked_cast<int16_t>(i); + } + } + // Push back `new_data` into `sync_buffer`. This operation should pop out + // data from the front of `sync_buffer`, so that the size of the buffer + // remains the same. The `next_index_` should also move with the same length. + sync_buffer.PushBack(new_data); + + // Read to interleaved output. Read in two batches, where each read operation + // should automatically update the `net_index_` in the SyncBuffer. + // Note that `samples_read` is the number of samples read from each channel. + // That is, the number of samples written to `output` is + // `samples_read` * `kChannels`. + AudioFrame output1; + sync_buffer.GetNextAudioInterleaved(kNewLen / 2, &output1); + EXPECT_EQ(kChannels, output1.num_channels_); + EXPECT_EQ(kNewLen / 2, output1.samples_per_channel_); + + AudioFrame output2; + sync_buffer.GetNextAudioInterleaved(kNewLen / 2, &output2); + EXPECT_EQ(kChannels, output2.num_channels_); + EXPECT_EQ(kNewLen / 2, output2.samples_per_channel_); + + // Verify the data. + const int16_t* output_ptr = output1.data(); + for (size_t i = 0; i < kNewLen / 2; ++i) { + for (size_t channel = 0; channel < kChannels; ++channel) { + EXPECT_EQ(new_data[channel][i], *output_ptr); + ++output_ptr; + } + } + output_ptr = output2.data(); + for (size_t i = kNewLen / 2; i < kNewLen; ++i) { + for (size_t channel = 0; channel < kChannels; ++channel) { + EXPECT_EQ(new_data[channel][i], *output_ptr); + ++output_ptr; + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/delay_tool/parse_delay_file.m b/third_party/libwebrtc/modules/audio_coding/neteq/test/delay_tool/parse_delay_file.m new file mode 100644 index 0000000000..031d8a39ee --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/delay_tool/parse_delay_file.m @@ -0,0 +1,201 @@ +% +% Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. +% +% Use of this source code is governed by a BSD-style license +% that can be found in the LICENSE file in the root of the source +% tree. An additional intellectual property rights grant can be found +% in the file PATENTS. All contributing project authors may +% be found in the AUTHORS file in the root of the source tree. +% + +function outStruct = parse_delay_file(file) + +fid = fopen(file, 'rb'); +if fid == -1 + error('Cannot open file %s', file); +end + +textline = fgetl(fid); +if ~strncmp(textline, '#!NetEQ_Delay_Logging', 21) + error('Wrong file format'); +end + +ver = sscanf(textline, '#!NetEQ_Delay_Logging%d.%d'); +if ~all(ver == [2; 0]) + error('Wrong version of delay logging function') +end + + +start_pos = ftell(fid); +fseek(fid, -12, 'eof'); +textline = fgetl(fid); +if ~strncmp(textline, 'End of file', 21) + error('File ending is not correct. Seems like the simulation ended abnormally.'); +end + +fseek(fid,-12-4, 'eof'); +Npackets = fread(fid, 1, 'int32'); +fseek(fid, start_pos, 'bof'); + +rtpts = zeros(Npackets, 1); +seqno = zeros(Npackets, 1); +pt = zeros(Npackets, 1); +plen = zeros(Npackets, 1); +recin_t = nan*ones(Npackets, 1); +decode_t = nan*ones(Npackets, 1); +playout_delay = zeros(Npackets, 1); +optbuf = zeros(Npackets, 1); + +fs_ix = 1; +clock = 0; +ts_ix = 1; +ended = 0; +late_packets = 0; +fs_now = 8000; +last_decode_k = 0; +tot_expand = 0; +tot_accelerate = 0; +tot_preemptive = 0; + +while not(ended) + signal = fread(fid, 1, '*int32'); + + switch signal + case 3 % NETEQ_DELAY_LOGGING_SIGNAL_CLOCK + clock = fread(fid, 1, '*float32'); + + % keep on reading batches of M until the signal is no longer "3" + % read int32 + float32 in one go + % this is to save execution time + temp = [3; 0]; + M = 120; + while all(temp(1,:) == 3) + fp = ftell(fid); + temp = fread(fid, [2 M], '*int32'); + end + + % back up to last clock event + fseek(fid, fp - ftell(fid) + ... + (find(temp(1,:) ~= 3, 1 ) - 2) * 2 * 4 + 4, 'cof'); + % read the last clock value + clock = fread(fid, 1, '*float32'); + + case 1 % NETEQ_DELAY_LOGGING_SIGNAL_RECIN + temp_ts = fread(fid, 1, 'uint32'); + + if late_packets > 0 + temp_ix = ts_ix - 1; + while (temp_ix >= 1) && (rtpts(temp_ix) ~= temp_ts) + % TODO(hlundin): use matlab vector search instead? + temp_ix = temp_ix - 1; + end + + if temp_ix >= 1 + % the ts was found in the vector + late_packets = late_packets - 1; + else + temp_ix = ts_ix; + ts_ix = ts_ix + 1; + end + else + temp_ix = ts_ix; + ts_ix = ts_ix + 1; + end + + rtpts(temp_ix) = temp_ts; + seqno(temp_ix) = fread(fid, 1, 'uint16'); + pt(temp_ix) = fread(fid, 1, 'int32'); + plen(temp_ix) = fread(fid, 1, 'int16'); + recin_t(temp_ix) = clock; + + case 2 % NETEQ_DELAY_LOGGING_SIGNAL_FLUSH + % do nothing + + case 4 % NETEQ_DELAY_LOGGING_SIGNAL_EOF + ended = 1; + + case 5 % NETEQ_DELAY_LOGGING_SIGNAL_DECODE + last_decode_ts = fread(fid, 1, 'uint32'); + temp_delay = fread(fid, 1, 'uint16'); + + k = find(rtpts(1:(ts_ix - 1))==last_decode_ts,1,'last'); + if ~isempty(k) + decode_t(k) = clock; + playout_delay(k) = temp_delay + ... + 5 * fs_now / 8000; % add overlap length + last_decode_k = k; + end + + case 6 % NETEQ_DELAY_LOGGING_SIGNAL_CHANGE_FS + fsvec(fs_ix) = fread(fid, 1, 'uint16'); + fschange_ts(fs_ix) = last_decode_ts; + fs_now = fsvec(fs_ix); + fs_ix = fs_ix + 1; + + case 7 % NETEQ_DELAY_LOGGING_SIGNAL_MERGE_INFO + playout_delay(last_decode_k) = playout_delay(last_decode_k) ... + + fread(fid, 1, 'int32'); + + case 8 % NETEQ_DELAY_LOGGING_SIGNAL_EXPAND_INFO + temp = fread(fid, 1, 'int32'); + if last_decode_k ~= 0 + tot_expand = tot_expand + temp / (fs_now / 1000); + end + + case 9 % NETEQ_DELAY_LOGGING_SIGNAL_ACCELERATE_INFO + temp = fread(fid, 1, 'int32'); + if last_decode_k ~= 0 + tot_accelerate = tot_accelerate + temp / (fs_now / 1000); + end + + case 10 % NETEQ_DELAY_LOGGING_SIGNAL_PREEMPTIVE_INFO + temp = fread(fid, 1, 'int32'); + if last_decode_k ~= 0 + tot_preemptive = tot_preemptive + temp / (fs_now / 1000); + end + + case 11 % NETEQ_DELAY_LOGGING_SIGNAL_OPTBUF + optbuf(last_decode_k) = fread(fid, 1, 'int32'); + + case 12 % NETEQ_DELAY_LOGGING_SIGNAL_DECODE_ONE_DESC + last_decode_ts = fread(fid, 1, 'uint32'); + k = ts_ix - 1; + + while (k >= 1) && (rtpts(k) ~= last_decode_ts) + % TODO(hlundin): use matlab vector search instead? + k = k - 1; + end + + if k < 1 + % packet not received yet + k = ts_ix; + rtpts(ts_ix) = last_decode_ts; + late_packets = late_packets + 1; + end + + decode_t(k) = clock; + playout_delay(k) = fread(fid, 1, 'uint16') + ... + 5 * fs_now / 8000; % add overlap length + last_decode_k = k; + + end + +end + + +fclose(fid); + +outStruct = struct(... + 'ts', rtpts, ... + 'sn', seqno, ... + 'pt', pt,... + 'plen', plen,... + 'arrival', recin_t,... + 'decode', decode_t,... + 'fs', fsvec(:),... + 'fschange_ts', fschange_ts(:),... + 'playout_delay', playout_delay,... + 'tot_expand', tot_expand,... + 'tot_accelerate', tot_accelerate,... + 'tot_preemptive', tot_preemptive,... + 'optbuf', optbuf); diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/delay_tool/plot_neteq_delay.m b/third_party/libwebrtc/modules/audio_coding/neteq/test/delay_tool/plot_neteq_delay.m new file mode 100644 index 0000000000..86d533fbeb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/delay_tool/plot_neteq_delay.m @@ -0,0 +1,197 @@ +% +% Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. +% +% Use of this source code is governed by a BSD-style license +% that can be found in the LICENSE file in the root of the source +% tree. An additional intellectual property rights grant can be found +% in the file PATENTS. All contributing project authors may +% be found in the AUTHORS file in the root of the source tree. +% + +function [delay_struct, delayvalues] = plot_neteq_delay(delayfile, varargin) + +% InfoStruct = plot_neteq_delay(delayfile) +% InfoStruct = plot_neteq_delay(delayfile, 'skipdelay', skip_seconds) +% +% Henrik Lundin, 2006-11-17 +% Henrik Lundin, 2011-05-17 +% + +try + s = parse_delay_file(delayfile); +catch + error(lasterr); +end + +delayskip=0; +noplot=0; +arg_ptr=1; +delaypoints=[]; + +s.sn=unwrap_seqno(s.sn); + +while arg_ptr+1 <= nargin + switch lower(varargin{arg_ptr}) + case {'skipdelay', 'delayskip'} + % skip a number of seconds in the beginning when calculating delays + delayskip = varargin{arg_ptr+1}; + arg_ptr = arg_ptr + 2; + case 'noplot' + noplot=1; + arg_ptr = arg_ptr + 1; + case {'get_delay', 'getdelay'} + % return a vector of delay values for the points in the given vector + delaypoints = varargin{arg_ptr+1}; + arg_ptr = arg_ptr + 2; + otherwise + warning('Unknown switch %s\n', varargin{arg_ptr}); + arg_ptr = arg_ptr + 1; + end +end + +% find lost frames that were covered by one-descriptor decoding +one_desc_ix=find(isnan(s.arrival)); +for k=1:length(one_desc_ix) + ix=find(s.ts==max(s.ts(s.ts(one_desc_ix(k))>s.ts))); + s.sn(one_desc_ix(k))=s.sn(ix)+1; + s.pt(one_desc_ix(k))=s.pt(ix); + s.arrival(one_desc_ix(k))=s.arrival(ix)+s.decode(one_desc_ix(k))-s.decode(ix); +end + +% remove duplicate received frames that were never decoded (RED codec) +if length(unique(s.ts(isfinite(s.ts)))) < length(s.ts(isfinite(s.ts))) + ix=find(isfinite(s.decode)); + s.sn=s.sn(ix); + s.ts=s.ts(ix); + s.arrival=s.arrival(ix); + s.playout_delay=s.playout_delay(ix); + s.pt=s.pt(ix); + s.optbuf=s.optbuf(ix); + plen=plen(ix); + s.decode=s.decode(ix); +end + +% find non-unique sequence numbers +[~,un_ix]=unique(s.sn); +nonun_ix=setdiff(1:length(s.sn),un_ix); +if ~isempty(nonun_ix) + warning('RTP sequence numbers are in error'); +end + +% sort vectors +[s.sn,sort_ix]=sort(s.sn); +s.ts=s.ts(sort_ix); +s.arrival=s.arrival(sort_ix); +s.decode=s.decode(sort_ix); +s.playout_delay=s.playout_delay(sort_ix); +s.pt=s.pt(sort_ix); + +send_t=s.ts-s.ts(1); +if length(s.fs)<1 + warning('No info about sample rate found in file. Using default 8000.'); + s.fs(1)=8000; + s.fschange_ts(1)=min(s.ts); +elseif s.fschange_ts(1)>min(s.ts) + s.fschange_ts(1)=min(s.ts); +end + +end_ix=length(send_t); +for k=length(s.fs):-1:1 + start_ix=find(s.ts==s.fschange_ts(k)); + send_t(start_ix:end_ix)=send_t(start_ix:end_ix)/s.fs(k)*1000; + s.playout_delay(start_ix:end_ix)=s.playout_delay(start_ix:end_ix)/s.fs(k)*1000; + s.optbuf(start_ix:end_ix)=s.optbuf(start_ix:end_ix)/s.fs(k)*1000; + end_ix=start_ix-1; +end + +tot_time=max(send_t)-min(send_t); + +seq_ix=s.sn-min(s.sn)+1; +send_t=send_t+max(min(s.arrival-send_t),0); + +plot_send_t=nan*ones(max(seq_ix),1); +plot_send_t(seq_ix)=send_t; +plot_nw_delay=nan*ones(max(seq_ix),1); +plot_nw_delay(seq_ix)=s.arrival-send_t; + +cng_ix=find(s.pt~=13); % find those packets that are not CNG/SID + +if noplot==0 + h=plot(plot_send_t/1000,plot_nw_delay); + set(h,'color',0.75*[1 1 1]); + hold on + if any(s.optbuf~=0) + peak_ix=find(s.optbuf(cng_ix)<0); % peak mode is labeled with negative values + no_peak_ix=find(s.optbuf(cng_ix)>0); %setdiff(1:length(cng_ix),peak_ix); + h1=plot(send_t(cng_ix(peak_ix))/1000,... + s.arrival(cng_ix(peak_ix))+abs(s.optbuf(cng_ix(peak_ix)))-send_t(cng_ix(peak_ix)),... + 'r.'); + h2=plot(send_t(cng_ix(no_peak_ix))/1000,... + s.arrival(cng_ix(no_peak_ix))+abs(s.optbuf(cng_ix(no_peak_ix)))-send_t(cng_ix(no_peak_ix)),... + 'g.'); + set([h1, h2],'markersize',1) + end + %h=plot(send_t(seq_ix)/1000,s.decode+s.playout_delay-send_t(seq_ix)); + h=plot(send_t(cng_ix)/1000,s.decode(cng_ix)+s.playout_delay(cng_ix)-send_t(cng_ix)); + set(h,'linew',1.5); + hold off + ax1=axis; + axis tight + ax2=axis; + axis([ax2(1:3) ax1(4)]) +end + + +% calculate delays and other parameters + +delayskip_ix = find(send_t-send_t(1)>=delayskip*1000, 1 ); + +use_ix = intersect(cng_ix,... % use those that are not CNG/SID frames... + intersect(find(isfinite(s.decode)),... % ... that did arrive ... + (delayskip_ix:length(s.decode))')); % ... and are sent after delayskip seconds + +mean_delay = mean(s.decode(use_ix)+s.playout_delay(use_ix)-send_t(use_ix)); +neteq_delay = mean(s.decode(use_ix)+s.playout_delay(use_ix)-s.arrival(use_ix)); + +Npack=max(s.sn(delayskip_ix:end))-min(s.sn(delayskip_ix:end))+1; +nw_lossrate=(Npack-length(s.sn(delayskip_ix:end)))/Npack; +neteq_lossrate=(length(s.sn(delayskip_ix:end))-length(use_ix))/Npack; + +delay_struct=struct('mean_delay',mean_delay,'neteq_delay',neteq_delay,... + 'nw_lossrate',nw_lossrate,'neteq_lossrate',neteq_lossrate,... + 'tot_expand',round(s.tot_expand),'tot_accelerate',round(s.tot_accelerate),... + 'tot_preemptive',round(s.tot_preemptive),'tot_time',tot_time,... + 'filename',delayfile,'units','ms','fs',unique(s.fs)); + +if not(isempty(delaypoints)) + delayvalues=interp1(send_t(cng_ix),... + s.decode(cng_ix)+s.playout_delay(cng_ix)-send_t(cng_ix),... + delaypoints,'nearest',NaN); +else + delayvalues=[]; +end + + + +% SUBFUNCTIONS % + +function y=unwrap_seqno(x) + +jumps=find(abs((diff(x)-1))>65000); + +while ~isempty(jumps) + n=jumps(1); + if x(n+1)-x(n) < 0 + % negative jump + x(n+1:end)=x(n+1:end)+65536; + else + % positive jump + x(n+1:end)=x(n+1:end)-65536; + end + + jumps=find(abs((diff(x(n+1:end))-1))>65000); +end + +y=x; + +return; diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_decoding_test.cc b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_decoding_test.cc new file mode 100644 index 0000000000..e6c1809fb6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_decoding_test.cc @@ -0,0 +1,423 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/test/neteq_decoding_test.h" + +#include "absl/strings/string_view.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/rtp_headers.h" +#include "modules/audio_coding/neteq/default_neteq_factory.h" +#include "modules/audio_coding/neteq/test/result_sink.h" +#include "rtc_base/strings/string_builder.h" +#include "test/testsupport/file_utils.h" + +#ifdef WEBRTC_NETEQ_UNITTEST_BITEXACT +RTC_PUSH_IGNORING_WUNDEF() +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_coding/neteq/neteq_unittest.pb.h" +#else +#include "modules/audio_coding/neteq/neteq_unittest.pb.h" +#endif +RTC_POP_IGNORING_WUNDEF() +#endif + +namespace webrtc { + +namespace { + +void LoadDecoders(webrtc::NetEq* neteq) { + ASSERT_EQ(true, + neteq->RegisterPayloadType(0, SdpAudioFormat("pcmu", 8000, 1))); + ASSERT_EQ(true, + neteq->RegisterPayloadType(8, SdpAudioFormat("pcma", 8000, 1))); +#ifdef WEBRTC_CODEC_ILBC + ASSERT_EQ(true, + neteq->RegisterPayloadType(102, SdpAudioFormat("ilbc", 8000, 1))); +#endif +#if defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX) + ASSERT_EQ(true, + neteq->RegisterPayloadType(103, SdpAudioFormat("isac", 16000, 1))); +#endif +#ifdef WEBRTC_CODEC_ISAC + ASSERT_EQ(true, + neteq->RegisterPayloadType(104, SdpAudioFormat("isac", 32000, 1))); +#endif +#ifdef WEBRTC_CODEC_OPUS + ASSERT_EQ(true, + neteq->RegisterPayloadType( + 111, SdpAudioFormat("opus", 48000, 2, {{"stereo", "0"}}))); +#endif + ASSERT_EQ(true, + neteq->RegisterPayloadType(93, SdpAudioFormat("L16", 8000, 1))); + ASSERT_EQ(true, + neteq->RegisterPayloadType(94, SdpAudioFormat("L16", 16000, 1))); + ASSERT_EQ(true, + neteq->RegisterPayloadType(95, SdpAudioFormat("L16", 32000, 1))); + ASSERT_EQ(true, + neteq->RegisterPayloadType(13, SdpAudioFormat("cn", 8000, 1))); + ASSERT_EQ(true, + neteq->RegisterPayloadType(98, SdpAudioFormat("cn", 16000, 1))); +} + +} // namespace + +const int NetEqDecodingTest::kTimeStepMs; +const size_t NetEqDecodingTest::kBlockSize8kHz; +const size_t NetEqDecodingTest::kBlockSize16kHz; +const size_t NetEqDecodingTest::kBlockSize32kHz; +const int NetEqDecodingTest::kInitSampleRateHz; + +NetEqDecodingTest::NetEqDecodingTest() + : clock_(0), + config_(), + output_sample_rate_(kInitSampleRateHz), + algorithmic_delay_ms_(0) { + config_.sample_rate_hz = kInitSampleRateHz; +} + +void NetEqDecodingTest::SetUp() { + auto decoder_factory = CreateBuiltinAudioDecoderFactory(); + neteq_ = DefaultNetEqFactory().CreateNetEq(config_, decoder_factory, &clock_); + NetEqNetworkStatistics stat; + ASSERT_EQ(0, neteq_->NetworkStatistics(&stat)); + algorithmic_delay_ms_ = stat.current_buffer_size_ms; + ASSERT_TRUE(neteq_); + LoadDecoders(neteq_.get()); +} + +void NetEqDecodingTest::TearDown() {} + +void NetEqDecodingTest::OpenInputFile(absl::string_view rtp_file) { + rtp_source_.reset(test::RtpFileSource::Create(rtp_file)); +} + +void NetEqDecodingTest::Process() { + // Check if time to receive. + while (packet_ && clock_.TimeInMilliseconds() >= packet_->time_ms()) { + if (packet_->payload_length_bytes() > 0) { +#ifndef WEBRTC_CODEC_ISAC + // Ignore payload type 104 (iSAC-swb) if ISAC is not supported. + if (packet_->header().payloadType != 104) +#endif + ASSERT_EQ( + 0, neteq_->InsertPacket( + packet_->header(), + rtc::ArrayView<const uint8_t>( + packet_->payload(), packet_->payload_length_bytes()))); + } + // Get next packet. + packet_ = rtp_source_->NextPacket(); + } + + // Get audio from NetEq. + bool muted; + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_FALSE(muted); + ASSERT_TRUE((out_frame_.samples_per_channel_ == kBlockSize8kHz) || + (out_frame_.samples_per_channel_ == kBlockSize16kHz) || + (out_frame_.samples_per_channel_ == kBlockSize32kHz) || + (out_frame_.samples_per_channel_ == kBlockSize48kHz)); + output_sample_rate_ = out_frame_.sample_rate_hz_; + EXPECT_EQ(output_sample_rate_, neteq_->last_output_sample_rate_hz()); + + // Increase time. + clock_.AdvanceTimeMilliseconds(kTimeStepMs); +} + +void NetEqDecodingTest::DecodeAndCompare( + absl::string_view rtp_file, + absl::string_view output_checksum, + absl::string_view network_stats_checksum, + bool gen_ref) { + OpenInputFile(rtp_file); + + std::string ref_out_file = + gen_ref ? webrtc::test::OutputPath() + "neteq_universal_ref.pcm" : ""; + ResultSink output(ref_out_file); + + std::string stat_out_file = + gen_ref ? webrtc::test::OutputPath() + "neteq_network_stats.dat" : ""; + ResultSink network_stats(stat_out_file); + + packet_ = rtp_source_->NextPacket(); + int i = 0; + uint64_t last_concealed_samples = 0; + uint64_t last_total_samples_received = 0; + while (packet_) { + rtc::StringBuilder ss; + ss << "Lap number " << i++ << " in DecodeAndCompare while loop"; + SCOPED_TRACE(ss.str()); // Print out the parameter values on failure. + ASSERT_NO_FATAL_FAILURE(Process()); + ASSERT_NO_FATAL_FAILURE( + output.AddResult(out_frame_.data(), out_frame_.samples_per_channel_)); + + // Query the network statistics API once per second + if (clock_.TimeInMilliseconds() % 1000 == 0) { + // Process NetworkStatistics. + NetEqNetworkStatistics current_network_stats; + ASSERT_EQ(0, neteq_->NetworkStatistics(¤t_network_stats)); + ASSERT_NO_FATAL_FAILURE(network_stats.AddResult(current_network_stats)); + + // Verify that liftime stats and network stats report similar loss + // concealment rates. + auto lifetime_stats = neteq_->GetLifetimeStatistics(); + const uint64_t delta_concealed_samples = + lifetime_stats.concealed_samples - last_concealed_samples; + last_concealed_samples = lifetime_stats.concealed_samples; + const uint64_t delta_total_samples_received = + lifetime_stats.total_samples_received - last_total_samples_received; + last_total_samples_received = lifetime_stats.total_samples_received; + // The tolerance is 1% but expressed in Q14. + EXPECT_NEAR( + (delta_concealed_samples << 14) / delta_total_samples_received, + current_network_stats.expand_rate, (2 << 14) / 100.0); + } + } + + SCOPED_TRACE("Check output audio."); + output.VerifyChecksum(output_checksum); + SCOPED_TRACE("Check network stats."); + network_stats.VerifyChecksum(network_stats_checksum); +} + +void NetEqDecodingTest::PopulateRtpInfo(int frame_index, + int timestamp, + RTPHeader* rtp_info) { + rtp_info->sequenceNumber = frame_index; + rtp_info->timestamp = timestamp; + rtp_info->ssrc = 0x1234; // Just an arbitrary SSRC. + rtp_info->payloadType = 94; // PCM16b WB codec. + rtp_info->markerBit = false; +} + +void NetEqDecodingTest::PopulateCng(int frame_index, + int timestamp, + RTPHeader* rtp_info, + uint8_t* payload, + size_t* payload_len) { + rtp_info->sequenceNumber = frame_index; + rtp_info->timestamp = timestamp; + rtp_info->ssrc = 0x1234; // Just an arbitrary SSRC. + rtp_info->payloadType = 98; // WB CNG. + rtp_info->markerBit = false; + payload[0] = 64; // Noise level -64 dBov, quite arbitrarily chosen. + *payload_len = 1; // Only noise level, no spectral parameters. +} + +void NetEqDecodingTest::WrapTest(uint16_t start_seq_no, + uint32_t start_timestamp, + const std::set<uint16_t>& drop_seq_numbers, + bool expect_seq_no_wrap, + bool expect_timestamp_wrap) { + uint16_t seq_no = start_seq_no; + uint32_t timestamp = start_timestamp; + const int kBlocksPerFrame = 3; // Number of 10 ms blocks per frame. + const int kFrameSizeMs = kBlocksPerFrame * kTimeStepMs; + const int kSamples = kBlockSize16kHz * kBlocksPerFrame; + const size_t kPayloadBytes = kSamples * sizeof(int16_t); + double next_input_time_ms = 0.0; + + // Insert speech for 2 seconds. + const int kSpeechDurationMs = 2000; + uint16_t last_seq_no; + uint32_t last_timestamp; + bool timestamp_wrapped = false; + bool seq_no_wrapped = false; + for (double t_ms = 0; t_ms < kSpeechDurationMs; t_ms += 10) { + // Each turn in this for loop is 10 ms. + while (next_input_time_ms <= t_ms) { + // Insert one 30 ms speech frame. + uint8_t payload[kPayloadBytes] = {0}; + RTPHeader rtp_info; + PopulateRtpInfo(seq_no, timestamp, &rtp_info); + if (drop_seq_numbers.find(seq_no) == drop_seq_numbers.end()) { + // This sequence number was not in the set to drop. Insert it. + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload)); + } + NetEqNetworkStatistics network_stats; + ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats)); + + EXPECT_LE(network_stats.preferred_buffer_size_ms, 80); + EXPECT_LE(network_stats.current_buffer_size_ms, + 80 + algorithmic_delay_ms_); + last_seq_no = seq_no; + last_timestamp = timestamp; + + ++seq_no; + timestamp += kSamples; + next_input_time_ms += static_cast<double>(kFrameSizeMs); + + seq_no_wrapped |= seq_no < last_seq_no; + timestamp_wrapped |= timestamp < last_timestamp; + } + // Pull out data once. + AudioFrame output; + bool muted; + ASSERT_EQ(0, neteq_->GetAudio(&output, &muted)); + ASSERT_EQ(kBlockSize16kHz, output.samples_per_channel_); + ASSERT_EQ(1u, output.num_channels_); + + // Expect delay (in samples) to be less than 2 packets. + absl::optional<uint32_t> playout_timestamp = neteq_->GetPlayoutTimestamp(); + ASSERT_TRUE(playout_timestamp); + EXPECT_LE(timestamp - *playout_timestamp, + static_cast<uint32_t>(kSamples * 2)); + } + // Make sure we have actually tested wrap-around. + ASSERT_EQ(expect_seq_no_wrap, seq_no_wrapped); + ASSERT_EQ(expect_timestamp_wrap, timestamp_wrapped); +} + +void NetEqDecodingTest::LongCngWithClockDrift(double drift_factor, + double network_freeze_ms, + bool pull_audio_during_freeze, + int delay_tolerance_ms, + int max_time_to_speech_ms) { + uint16_t seq_no = 0; + uint32_t timestamp = 0; + const int kFrameSizeMs = 30; + const size_t kSamples = kFrameSizeMs * 16; + const size_t kPayloadBytes = kSamples * 2; + double next_input_time_ms = 0.0; + double t_ms; + bool muted; + + // Insert speech for 5 seconds. + const int kSpeechDurationMs = 5000; + for (t_ms = 0; t_ms < kSpeechDurationMs; t_ms += 10) { + // Each turn in this for loop is 10 ms. + while (next_input_time_ms <= t_ms) { + // Insert one 30 ms speech frame. + uint8_t payload[kPayloadBytes] = {0}; + RTPHeader rtp_info; + PopulateRtpInfo(seq_no, timestamp, &rtp_info); + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload)); + ++seq_no; + timestamp += kSamples; + next_input_time_ms += static_cast<double>(kFrameSizeMs) * drift_factor; + } + // Pull out data once. + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + } + + EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_); + absl::optional<uint32_t> playout_timestamp = neteq_->GetPlayoutTimestamp(); + ASSERT_TRUE(playout_timestamp); + int32_t delay_before = timestamp - *playout_timestamp; + + // Insert CNG for 1 minute (= 60000 ms). + const int kCngPeriodMs = 100; + const int kCngPeriodSamples = kCngPeriodMs * 16; // Period in 16 kHz samples. + const int kCngDurationMs = 60000; + for (; t_ms < kSpeechDurationMs + kCngDurationMs; t_ms += 10) { + // Each turn in this for loop is 10 ms. + while (next_input_time_ms <= t_ms) { + // Insert one CNG frame each 100 ms. + uint8_t payload[kPayloadBytes]; + size_t payload_len; + RTPHeader rtp_info; + PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len); + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, rtc::ArrayView<const uint8_t>( + payload, payload_len))); + ++seq_no; + timestamp += kCngPeriodSamples; + next_input_time_ms += static_cast<double>(kCngPeriodMs) * drift_factor; + } + // Pull out data once. + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + } + + EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_); + + if (network_freeze_ms > 0) { + // First keep pulling audio for `network_freeze_ms` without inserting + // any data, then insert CNG data corresponding to `network_freeze_ms` + // without pulling any output audio. + const double loop_end_time = t_ms + network_freeze_ms; + for (; t_ms < loop_end_time; t_ms += 10) { + // Pull out data once. + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_); + } + bool pull_once = pull_audio_during_freeze; + // If `pull_once` is true, GetAudio will be called once half-way through + // the network recovery period. + double pull_time_ms = (t_ms + next_input_time_ms) / 2; + while (next_input_time_ms <= t_ms) { + if (pull_once && next_input_time_ms >= pull_time_ms) { + pull_once = false; + // Pull out data once. + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_); + t_ms += 10; + } + // Insert one CNG frame each 100 ms. + uint8_t payload[kPayloadBytes]; + size_t payload_len; + RTPHeader rtp_info; + PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len); + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, rtc::ArrayView<const uint8_t>( + payload, payload_len))); + ++seq_no; + timestamp += kCngPeriodSamples; + next_input_time_ms += kCngPeriodMs * drift_factor; + } + } + + // Insert speech again until output type is speech. + double speech_restart_time_ms = t_ms; + while (out_frame_.speech_type_ != AudioFrame::kNormalSpeech) { + // Each turn in this for loop is 10 ms. + while (next_input_time_ms <= t_ms) { + // Insert one 30 ms speech frame. + uint8_t payload[kPayloadBytes] = {0}; + RTPHeader rtp_info; + PopulateRtpInfo(seq_no, timestamp, &rtp_info); + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload)); + ++seq_no; + timestamp += kSamples; + next_input_time_ms += kFrameSizeMs * drift_factor; + } + // Pull out data once. + ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted)); + ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_); + // Increase clock. + t_ms += 10; + } + + // Check that the speech starts again within reasonable time. + double time_until_speech_returns_ms = t_ms - speech_restart_time_ms; + EXPECT_LT(time_until_speech_returns_ms, max_time_to_speech_ms); + playout_timestamp = neteq_->GetPlayoutTimestamp(); + ASSERT_TRUE(playout_timestamp); + int32_t delay_after = timestamp - *playout_timestamp; + // Compare delay before and after, and make sure it differs less than 20 ms. + EXPECT_LE(delay_after, delay_before + delay_tolerance_ms * 16); + EXPECT_GE(delay_after, delay_before - delay_tolerance_ms * 16); +} + +void NetEqDecodingTestTwoInstances::SetUp() { + NetEqDecodingTest::SetUp(); + config2_ = config_; +} + +void NetEqDecodingTestTwoInstances::CreateSecondInstance() { + auto decoder_factory = CreateBuiltinAudioDecoderFactory(); + neteq2_ = + DefaultNetEqFactory().CreateNetEq(config2_, decoder_factory, &clock_); + ASSERT_TRUE(neteq2_); + LoadDecoders(neteq2_.get()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_decoding_test.h b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_decoding_test.h new file mode 100644 index 0000000000..456c397fdd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_decoding_test.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TEST_NETEQ_DECODING_TEST_H_ +#define MODULES_AUDIO_CODING_NETEQ_TEST_NETEQ_DECODING_TEST_H_ + +#include <memory> +#include <set> +#include <string> + +#include "absl/strings/string_view.h" +#include "api/audio/audio_frame.h" +#include "api/neteq/neteq.h" +#include "api/rtp_headers.h" +#include "modules/audio_coding/neteq/tools/packet.h" +#include "modules/audio_coding/neteq/tools/rtp_file_source.h" +#include "system_wrappers/include/clock.h" +#include "test/gtest.h" + +namespace webrtc { + +class NetEqDecodingTest : public ::testing::Test { + protected: + // NetEQ must be polled for data once every 10 ms. + // Thus, none of the constants below can be changed. + static constexpr int kTimeStepMs = 10; + static constexpr size_t kBlockSize8kHz = kTimeStepMs * 8; + static constexpr size_t kBlockSize16kHz = kTimeStepMs * 16; + static constexpr size_t kBlockSize32kHz = kTimeStepMs * 32; + static constexpr size_t kBlockSize48kHz = kTimeStepMs * 48; + static constexpr int kInitSampleRateHz = 8000; + + NetEqDecodingTest(); + virtual void SetUp(); + virtual void TearDown(); + void OpenInputFile(absl::string_view rtp_file); + void Process(); + + void DecodeAndCompare(absl::string_view rtp_file, + absl::string_view output_checksum, + absl::string_view network_stats_checksum, + bool gen_ref); + + static void PopulateRtpInfo(int frame_index, + int timestamp, + RTPHeader* rtp_info); + static void PopulateCng(int frame_index, + int timestamp, + RTPHeader* rtp_info, + uint8_t* payload, + size_t* payload_len); + + void WrapTest(uint16_t start_seq_no, + uint32_t start_timestamp, + const std::set<uint16_t>& drop_seq_numbers, + bool expect_seq_no_wrap, + bool expect_timestamp_wrap); + + void LongCngWithClockDrift(double drift_factor, + double network_freeze_ms, + bool pull_audio_during_freeze, + int delay_tolerance_ms, + int max_time_to_speech_ms); + + SimulatedClock clock_; + std::unique_ptr<NetEq> neteq_; + NetEq::Config config_; + std::unique_ptr<test::RtpFileSource> rtp_source_; + std::unique_ptr<test::Packet> packet_; + AudioFrame out_frame_; + int output_sample_rate_; + int algorithmic_delay_ms_; +}; + +class NetEqDecodingTestTwoInstances : public NetEqDecodingTest { + public: + NetEqDecodingTestTwoInstances() : NetEqDecodingTest() {} + + void SetUp() override; + + void CreateSecondInstance(); + + protected: + std::unique_ptr<NetEq> neteq2_; + NetEq::Config config2_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TEST_NETEQ_DECODING_TEST_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_ilbc_quality_test.cc b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_ilbc_quality_test.cc new file mode 100644 index 0000000000..1004141f16 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_ilbc_quality_test.cc @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <memory> + +#include "absl/flags/flag.h" +#include "modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.h" +#include "modules/audio_coding/neteq/tools/neteq_quality_test.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/testsupport/file_utils.h" + +ABSL_FLAG(int, frame_size_ms, 20, "Codec frame size (milliseconds)."); + +using ::testing::InitGoogleTest; + +namespace webrtc { +namespace test { +namespace { +static const int kInputSampleRateKhz = 8; +static const int kOutputSampleRateKhz = 8; +} // namespace + +class NetEqIlbcQualityTest : public NetEqQualityTest { + protected: + NetEqIlbcQualityTest() + : NetEqQualityTest(absl::GetFlag(FLAGS_frame_size_ms), + kInputSampleRateKhz, + kOutputSampleRateKhz, + SdpAudioFormat("ilbc", 8000, 1)) { + // Flag validation + RTC_CHECK(absl::GetFlag(FLAGS_frame_size_ms) == 20 || + absl::GetFlag(FLAGS_frame_size_ms) == 30 || + absl::GetFlag(FLAGS_frame_size_ms) == 40 || + absl::GetFlag(FLAGS_frame_size_ms) == 60) + << "Invalid frame size, should be 20, 30, 40, or 60 ms."; + } + + void SetUp() override { + ASSERT_EQ(1u, channels_) << "iLBC supports only mono audio."; + AudioEncoderIlbcConfig config; + config.frame_size_ms = absl::GetFlag(FLAGS_frame_size_ms); + encoder_.reset(new AudioEncoderIlbcImpl(config, 102)); + NetEqQualityTest::SetUp(); + } + + int EncodeBlock(int16_t* in_data, + size_t block_size_samples, + rtc::Buffer* payload, + size_t max_bytes) override { + const size_t kFrameSizeSamples = 80; // Samples per 10 ms. + size_t encoded_samples = 0; + uint32_t dummy_timestamp = 0; + AudioEncoder::EncodedInfo info; + do { + info = encoder_->Encode(dummy_timestamp, + rtc::ArrayView<const int16_t>( + in_data + encoded_samples, kFrameSizeSamples), + payload); + encoded_samples += kFrameSizeSamples; + } while (info.encoded_bytes == 0); + return rtc::checked_cast<int>(info.encoded_bytes); + } + + private: + std::unique_ptr<AudioEncoderIlbcImpl> encoder_; +}; + +TEST_F(NetEqIlbcQualityTest, Test) { + Simulate(); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_isac_quality_test.cc b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_isac_quality_test.cc new file mode 100644 index 0000000000..6a096c307c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_isac_quality_test.cc @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "absl/flags/flag.h" +#include "modules/audio_coding/codecs/isac/fix/include/isacfix.h" +#include "modules/audio_coding/neteq/tools/neteq_quality_test.h" + +ABSL_FLAG(int, bit_rate_kbps, 32, "Target bit rate (kbps)."); + +using ::testing::InitGoogleTest; + +namespace webrtc { +namespace test { +namespace { +static const int kIsacBlockDurationMs = 30; +static const int kIsacInputSamplingKhz = 16; +static const int kIsacOutputSamplingKhz = 16; +} // namespace + +class NetEqIsacQualityTest : public NetEqQualityTest { + protected: + NetEqIsacQualityTest(); + void SetUp() override; + void TearDown() override; + int EncodeBlock(int16_t* in_data, + size_t block_size_samples, + rtc::Buffer* payload, + size_t max_bytes) override; + + private: + ISACFIX_MainStruct* isac_encoder_; + int bit_rate_kbps_; +}; + +NetEqIsacQualityTest::NetEqIsacQualityTest() + : NetEqQualityTest(kIsacBlockDurationMs, + kIsacInputSamplingKhz, + kIsacOutputSamplingKhz, + SdpAudioFormat("isac", 16000, 1)), + isac_encoder_(NULL), + bit_rate_kbps_(absl::GetFlag(FLAGS_bit_rate_kbps)) { + // Flag validation + RTC_CHECK(absl::GetFlag(FLAGS_bit_rate_kbps) >= 10 && + absl::GetFlag(FLAGS_bit_rate_kbps) <= 32) + << "Invalid bit rate, should be between 10 and 32 kbps."; +} + +void NetEqIsacQualityTest::SetUp() { + ASSERT_EQ(1u, channels_) << "iSAC supports only mono audio."; + // Create encoder memory. + WebRtcIsacfix_Create(&isac_encoder_); + ASSERT_TRUE(isac_encoder_ != NULL); + EXPECT_EQ(0, WebRtcIsacfix_EncoderInit(isac_encoder_, 1)); + // Set bitrate and block length. + EXPECT_EQ(0, WebRtcIsacfix_Control(isac_encoder_, bit_rate_kbps_ * 1000, + kIsacBlockDurationMs)); + NetEqQualityTest::SetUp(); +} + +void NetEqIsacQualityTest::TearDown() { + // Free memory. + EXPECT_EQ(0, WebRtcIsacfix_Free(isac_encoder_)); + NetEqQualityTest::TearDown(); +} + +int NetEqIsacQualityTest::EncodeBlock(int16_t* in_data, + size_t block_size_samples, + rtc::Buffer* payload, + size_t max_bytes) { + // ISAC takes 10 ms for every call. + const int subblocks = kIsacBlockDurationMs / 10; + const int subblock_length = 10 * kIsacInputSamplingKhz; + int value = 0; + + int pointer = 0; + for (int idx = 0; idx < subblocks; idx++, pointer += subblock_length) { + // The Isac encoder does not perform encoding (and returns 0) until it + // receives a sequence of sub-blocks that amount to the frame duration. + EXPECT_EQ(0, value); + payload->AppendData(max_bytes, [&](rtc::ArrayView<uint8_t> payload) { + value = WebRtcIsacfix_Encode(isac_encoder_, &in_data[pointer], + payload.data()); + return (value >= 0) ? static_cast<size_t>(value) : 0; + }); + } + EXPECT_GT(value, 0); + return value; +} + +TEST_F(NetEqIsacQualityTest, Test) { + Simulate(); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_opus_quality_test.cc b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_opus_quality_test.cc new file mode 100644 index 0000000000..5a2df24ef6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_opus_quality_test.cc @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "absl/flags/flag.h" +#include "modules/audio_coding/codecs/opus/opus_inst.h" +#include "modules/audio_coding/codecs/opus/opus_interface.h" +#include "modules/audio_coding/neteq/tools/neteq_quality_test.h" + +ABSL_FLAG(int, bit_rate_kbps, 32, "Target bit rate (kbps)."); + +ABSL_FLAG(int, + complexity, + 10, + "Complexity: 0 ~ 10 -- defined as in Opus" + "specification."); + +ABSL_FLAG(int, maxplaybackrate, 48000, "Maximum playback rate (Hz)."); + +ABSL_FLAG(int, application, 0, "Application mode: 0 -- VOIP, 1 -- Audio."); + +ABSL_FLAG(int, reported_loss_rate, 10, "Reported percentile of packet loss."); + +ABSL_FLAG(bool, fec, false, "Enable FEC for encoding (-nofec to disable)."); + +ABSL_FLAG(bool, dtx, false, "Enable DTX for encoding (-nodtx to disable)."); + +ABSL_FLAG(int, sub_packets, 1, "Number of sub packets to repacketize."); + +using ::testing::InitGoogleTest; + +namespace webrtc { +namespace test { +namespace { + +static const int kOpusBlockDurationMs = 20; +static const int kOpusSamplingKhz = 48; +} // namespace + +class NetEqOpusQualityTest : public NetEqQualityTest { + protected: + NetEqOpusQualityTest(); + void SetUp() override; + void TearDown() override; + int EncodeBlock(int16_t* in_data, + size_t block_size_samples, + rtc::Buffer* payload, + size_t max_bytes) override; + + private: + WebRtcOpusEncInst* opus_encoder_; + OpusRepacketizer* repacketizer_; + size_t sub_block_size_samples_; + int bit_rate_kbps_; + bool fec_; + bool dtx_; + int complexity_; + int maxplaybackrate_; + int target_loss_rate_; + int sub_packets_; + int application_; +}; + +NetEqOpusQualityTest::NetEqOpusQualityTest() + : NetEqQualityTest(kOpusBlockDurationMs * absl::GetFlag(FLAGS_sub_packets), + kOpusSamplingKhz, + kOpusSamplingKhz, + SdpAudioFormat("opus", 48000, 2)), + opus_encoder_(NULL), + repacketizer_(NULL), + sub_block_size_samples_( + static_cast<size_t>(kOpusBlockDurationMs * kOpusSamplingKhz)), + bit_rate_kbps_(absl::GetFlag(FLAGS_bit_rate_kbps)), + fec_(absl::GetFlag(FLAGS_fec)), + dtx_(absl::GetFlag(FLAGS_dtx)), + complexity_(absl::GetFlag(FLAGS_complexity)), + maxplaybackrate_(absl::GetFlag(FLAGS_maxplaybackrate)), + target_loss_rate_(absl::GetFlag(FLAGS_reported_loss_rate)), + sub_packets_(absl::GetFlag(FLAGS_sub_packets)) { + // Flag validation + RTC_CHECK(absl::GetFlag(FLAGS_bit_rate_kbps) >= 6 && + absl::GetFlag(FLAGS_bit_rate_kbps) <= 510) + << "Invalid bit rate, should be between 6 and 510 kbps."; + + RTC_CHECK(absl::GetFlag(FLAGS_complexity) >= -1 && + absl::GetFlag(FLAGS_complexity) <= 10) + << "Invalid complexity setting, should be between 0 and 10."; + + RTC_CHECK(absl::GetFlag(FLAGS_application) == 0 || + absl::GetFlag(FLAGS_application) == 1) + << "Invalid application mode, should be 0 or 1."; + + RTC_CHECK(absl::GetFlag(FLAGS_reported_loss_rate) >= 0 && + absl::GetFlag(FLAGS_reported_loss_rate) <= 100) + << "Invalid packet loss percentile, should be between 0 and 100."; + + RTC_CHECK(absl::GetFlag(FLAGS_sub_packets) >= 1 && + absl::GetFlag(FLAGS_sub_packets) <= 3) + << "Invalid number of sub packets, should be between 1 and 3."; + + // Redefine decoder type if input is stereo. + if (channels_ > 1) { + audio_format_ = SdpAudioFormat("opus", 48000, 2, + SdpAudioFormat::Parameters{{"stereo", "1"}}); + } + application_ = absl::GetFlag(FLAGS_application); +} + +void NetEqOpusQualityTest::SetUp() { + // Create encoder memory. + WebRtcOpus_EncoderCreate(&opus_encoder_, channels_, application_, 48000); + ASSERT_TRUE(opus_encoder_); + + // Create repacketizer. + repacketizer_ = opus_repacketizer_create(); + ASSERT_TRUE(repacketizer_); + + // Set bitrate. + EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, bit_rate_kbps_ * 1000)); + if (fec_) { + EXPECT_EQ(0, WebRtcOpus_EnableFec(opus_encoder_)); + } + if (dtx_) { + EXPECT_EQ(0, WebRtcOpus_EnableDtx(opus_encoder_)); + } + EXPECT_EQ(0, WebRtcOpus_SetComplexity(opus_encoder_, complexity_)); + EXPECT_EQ(0, WebRtcOpus_SetMaxPlaybackRate(opus_encoder_, maxplaybackrate_)); + EXPECT_EQ(0, WebRtcOpus_SetPacketLossRate(opus_encoder_, target_loss_rate_)); + NetEqQualityTest::SetUp(); +} + +void NetEqOpusQualityTest::TearDown() { + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + opus_repacketizer_destroy(repacketizer_); + NetEqQualityTest::TearDown(); +} + +int NetEqOpusQualityTest::EncodeBlock(int16_t* in_data, + size_t block_size_samples, + rtc::Buffer* payload, + size_t max_bytes) { + EXPECT_EQ(block_size_samples, sub_block_size_samples_ * sub_packets_); + int16_t* pointer = in_data; + int value; + opus_repacketizer_init(repacketizer_); + for (int idx = 0; idx < sub_packets_; idx++) { + payload->AppendData(max_bytes, [&](rtc::ArrayView<uint8_t> payload) { + value = WebRtcOpus_Encode(opus_encoder_, pointer, sub_block_size_samples_, + max_bytes, payload.data()); + + Log() << "Encoded a frame with Opus mode " + << (value == 0 ? 0 : payload[0] >> 3) << std::endl; + + return (value >= 0) ? static_cast<size_t>(value) : 0; + }); + + if (OPUS_OK != + opus_repacketizer_cat(repacketizer_, payload->data(), value)) { + opus_repacketizer_init(repacketizer_); + // If the repacketization fails, we discard this frame. + return 0; + } + pointer += sub_block_size_samples_ * channels_; + } + value = opus_repacketizer_out(repacketizer_, payload->data(), + static_cast<opus_int32>(max_bytes)); + EXPECT_GE(value, 0); + return value; +} + +TEST_F(NetEqOpusQualityTest, Test) { + Simulate(); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_pcm16b_quality_test.cc b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_pcm16b_quality_test.cc new file mode 100644 index 0000000000..c3e160cb66 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_pcm16b_quality_test.cc @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <memory> + +#include "absl/flags/flag.h" +#include "modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.h" +#include "modules/audio_coding/neteq/tools/neteq_quality_test.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/testsupport/file_utils.h" + +ABSL_FLAG(int, frame_size_ms, 20, "Codec frame size (milliseconds)."); + +using ::testing::InitGoogleTest; + +namespace webrtc { +namespace test { +namespace { +static const int kInputSampleRateKhz = 48; +static const int kOutputSampleRateKhz = 48; +} // namespace + +class NetEqPcm16bQualityTest : public NetEqQualityTest { + protected: + NetEqPcm16bQualityTest() + : NetEqQualityTest(absl::GetFlag(FLAGS_frame_size_ms), + kInputSampleRateKhz, + kOutputSampleRateKhz, + SdpAudioFormat("l16", 48000, 1)) { + // Flag validation + RTC_CHECK(absl::GetFlag(FLAGS_frame_size_ms) >= 10 && + absl::GetFlag(FLAGS_frame_size_ms) <= 60 && + (absl::GetFlag(FLAGS_frame_size_ms) % 10) == 0) + << "Invalid frame size, should be 10, 20, ..., 60 ms."; + } + + void SetUp() override { + AudioEncoderPcm16B::Config config; + config.frame_size_ms = absl::GetFlag(FLAGS_frame_size_ms); + config.sample_rate_hz = 48000; + config.num_channels = channels_; + encoder_.reset(new AudioEncoderPcm16B(config)); + NetEqQualityTest::SetUp(); + } + + int EncodeBlock(int16_t* in_data, + size_t block_size_samples, + rtc::Buffer* payload, + size_t max_bytes) override { + const size_t kFrameSizeSamples = 480; // Samples per 10 ms. + size_t encoded_samples = 0; + uint32_t dummy_timestamp = 0; + AudioEncoder::EncodedInfo info; + do { + info = encoder_->Encode(dummy_timestamp, + rtc::ArrayView<const int16_t>( + in_data + encoded_samples, kFrameSizeSamples), + payload); + encoded_samples += kFrameSizeSamples; + } while (info.encoded_bytes == 0); + return rtc::checked_cast<int>(info.encoded_bytes); + } + + private: + std::unique_ptr<AudioEncoderPcm16B> encoder_; +}; + +TEST_F(NetEqPcm16bQualityTest, Test) { + Simulate(); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_pcmu_quality_test.cc b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_pcmu_quality_test.cc new file mode 100644 index 0000000000..d22170c623 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_pcmu_quality_test.cc @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <memory> + +#include "absl/flags/flag.h" +#include "modules/audio_coding/codecs/g711/audio_encoder_pcm.h" +#include "modules/audio_coding/neteq/tools/neteq_quality_test.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/testsupport/file_utils.h" + +ABSL_FLAG(int, frame_size_ms, 20, "Codec frame size (milliseconds)."); + +using ::testing::InitGoogleTest; + +namespace webrtc { +namespace test { +namespace { +static const int kInputSampleRateKhz = 8; +static const int kOutputSampleRateKhz = 8; +} // namespace + +class NetEqPcmuQualityTest : public NetEqQualityTest { + protected: + NetEqPcmuQualityTest() + : NetEqQualityTest(absl::GetFlag(FLAGS_frame_size_ms), + kInputSampleRateKhz, + kOutputSampleRateKhz, + SdpAudioFormat("pcmu", 8000, 1)) { + // Flag validation + RTC_CHECK(absl::GetFlag(FLAGS_frame_size_ms) >= 10 && + absl::GetFlag(FLAGS_frame_size_ms) <= 60 && + (absl::GetFlag(FLAGS_frame_size_ms) % 10) == 0) + << "Invalid frame size, should be 10, 20, ..., 60 ms."; + } + + void SetUp() override { + ASSERT_EQ(1u, channels_) << "PCMu supports only mono audio."; + AudioEncoderPcmU::Config config; + config.frame_size_ms = absl::GetFlag(FLAGS_frame_size_ms); + encoder_.reset(new AudioEncoderPcmU(config)); + NetEqQualityTest::SetUp(); + } + + int EncodeBlock(int16_t* in_data, + size_t block_size_samples, + rtc::Buffer* payload, + size_t max_bytes) override { + const size_t kFrameSizeSamples = 80; // Samples per 10 ms. + size_t encoded_samples = 0; + uint32_t dummy_timestamp = 0; + AudioEncoder::EncodedInfo info; + do { + info = encoder_->Encode(dummy_timestamp, + rtc::ArrayView<const int16_t>( + in_data + encoded_samples, kFrameSizeSamples), + payload); + encoded_samples += kFrameSizeSamples; + } while (info.encoded_bytes == 0); + return rtc::checked_cast<int>(info.encoded_bytes); + } + + private: + std::unique_ptr<AudioEncoderPcmU> encoder_; +}; + +TEST_F(NetEqPcmuQualityTest, Test) { + Simulate(); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_performance_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_performance_unittest.cc new file mode 100644 index 0000000000..c06772af26 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_performance_unittest.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/neteq_performance_test.h" +#include "system_wrappers/include/field_trial.h" +#include "test/gtest.h" +#include "test/testsupport/perf_test.h" + +// Runs a test with 10% packet losses and 10% clock drift, to exercise +// both loss concealment and time-stretching code. +TEST(NetEqPerformanceTest, 10_Pl_10_Drift) { + const int kSimulationTimeMs = 10000000; + const int kQuickSimulationTimeMs = 100000; + const int kLossPeriod = 10; // Drop every 10th packet. + const double kDriftFactor = 0.1; + int64_t runtime = webrtc::test::NetEqPerformanceTest::Run( + webrtc::field_trial::IsEnabled("WebRTC-QuickPerfTest") + ? kQuickSimulationTimeMs + : kSimulationTimeMs, + kLossPeriod, kDriftFactor); + ASSERT_GT(runtime, 0); + webrtc::test::PrintResult("neteq_performance", "", "10_pl_10_drift", runtime, + "ms", true); +} + +// Runs a test with neither packet losses nor clock drift, to put +// emphasis on the "good-weather" code path, which is presumably much +// more lightweight. +TEST(NetEqPerformanceTest, 0_Pl_0_Drift) { + const int kSimulationTimeMs = 10000000; + const int kQuickSimulationTimeMs = 100000; + const int kLossPeriod = 0; // No losses. + const double kDriftFactor = 0.0; // No clock drift. + int64_t runtime = webrtc::test::NetEqPerformanceTest::Run( + webrtc::field_trial::IsEnabled("WebRTC-QuickPerfTest") + ? kQuickSimulationTimeMs + : kSimulationTimeMs, + kLossPeriod, kDriftFactor); + ASSERT_GT(runtime, 0); + webrtc::test::PrintResult("neteq_performance", "", "0_pl_0_drift", runtime, + "ms", true); +} diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_speed_test.cc b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_speed_test.cc new file mode 100644 index 0000000000..a72b2009eb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/neteq_speed_test.cc @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdio.h> + +#include <iostream> +#include <vector> + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "modules/audio_coding/neteq/tools/neteq_performance_test.h" +#include "rtc_base/checks.h" + +// Define command line flags. +ABSL_FLAG(int, runtime_ms, 10000, "Simulated runtime in ms."); +ABSL_FLAG(int, lossrate, 10, "Packet lossrate; drop every N packets."); +ABSL_FLAG(float, drift, 0.1f, "Clockdrift factor."); + +int main(int argc, char* argv[]) { + std::vector<char*> args = absl::ParseCommandLine(argc, argv); + std::string program_name = args[0]; + std::string usage = + "Tool for measuring the speed of NetEq.\n" + "Usage: " + + program_name + + " [options]\n\n" + " --runtime_ms=N runtime in ms; default is 10000 ms\n" + " --lossrate=N drop every N packets; default is 10\n" + " --drift=F clockdrift factor between 0.0 and 1.0; " + "default is 0.1\n"; + if (args.size() != 1) { + printf("%s", usage.c_str()); + return 1; + } + RTC_CHECK_GT(absl::GetFlag(FLAGS_runtime_ms), 0); + RTC_CHECK_GE(absl::GetFlag(FLAGS_lossrate), 0); + RTC_CHECK(absl::GetFlag(FLAGS_drift) >= 0.0 && + absl::GetFlag(FLAGS_drift) < 1.0); + + int64_t result = webrtc::test::NetEqPerformanceTest::Run( + absl::GetFlag(FLAGS_runtime_ms), absl::GetFlag(FLAGS_lossrate), + absl::GetFlag(FLAGS_drift)); + if (result <= 0) { + std::cout << "There was an error" << std::endl; + return -1; + } + + std::cout << "Simulation done" << std::endl; + std::cout << "Runtime = " << result << " ms" << std::endl; + return 0; +} diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/result_sink.cc b/third_party/libwebrtc/modules/audio_coding/neteq/test/result_sink.cc new file mode 100644 index 0000000000..f5d50dc859 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/result_sink.cc @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/test/result_sink.h" + +#include <string> + +#include "absl/strings/string_view.h" +#include "rtc_base/ignore_wundef.h" +#include "rtc_base/message_digest.h" +#include "rtc_base/string_encode.h" +#include "test/gtest.h" + +#ifdef WEBRTC_NETEQ_UNITTEST_BITEXACT +RTC_PUSH_IGNORING_WUNDEF() +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_coding/neteq/neteq_unittest.pb.h" +#else +#include "modules/audio_coding/neteq/neteq_unittest.pb.h" +#endif +RTC_POP_IGNORING_WUNDEF() +#endif + +namespace webrtc { + +#ifdef WEBRTC_NETEQ_UNITTEST_BITEXACT +void Convert(const webrtc::NetEqNetworkStatistics& stats_raw, + webrtc::neteq_unittest::NetEqNetworkStatistics* stats) { + stats->set_current_buffer_size_ms(stats_raw.current_buffer_size_ms); + stats->set_preferred_buffer_size_ms(stats_raw.preferred_buffer_size_ms); + stats->set_jitter_peaks_found(stats_raw.jitter_peaks_found); + stats->set_expand_rate(stats_raw.expand_rate); + stats->set_speech_expand_rate(stats_raw.speech_expand_rate); + stats->set_preemptive_rate(stats_raw.preemptive_rate); + stats->set_accelerate_rate(stats_raw.accelerate_rate); + stats->set_secondary_decoded_rate(stats_raw.secondary_decoded_rate); + stats->set_secondary_discarded_rate(stats_raw.secondary_discarded_rate); + stats->set_mean_waiting_time_ms(stats_raw.mean_waiting_time_ms); + stats->set_median_waiting_time_ms(stats_raw.median_waiting_time_ms); + stats->set_min_waiting_time_ms(stats_raw.min_waiting_time_ms); + stats->set_max_waiting_time_ms(stats_raw.max_waiting_time_ms); +} + +void AddMessage(FILE* file, + rtc::MessageDigest* digest, + absl::string_view message) { + int32_t size = message.length(); + if (file) + ASSERT_EQ(1u, fwrite(&size, sizeof(size), 1, file)); + digest->Update(&size, sizeof(size)); + + if (file) + ASSERT_EQ(static_cast<size_t>(size), + fwrite(message.data(), sizeof(char), size, file)); + digest->Update(message.data(), sizeof(char) * size); +} + +#endif // WEBRTC_NETEQ_UNITTEST_BITEXACT + +ResultSink::ResultSink(absl::string_view output_file) + : output_fp_(nullptr), + digest_(rtc::MessageDigestFactory::Create(rtc::DIGEST_SHA_1)) { + if (!output_file.empty()) { + output_fp_ = fopen(std::string(output_file).c_str(), "wb"); + EXPECT_TRUE(output_fp_ != NULL); + } +} + +ResultSink::~ResultSink() { + if (output_fp_) + fclose(output_fp_); +} + +void ResultSink::AddResult(const NetEqNetworkStatistics& stats_raw) { +#ifdef WEBRTC_NETEQ_UNITTEST_BITEXACT + neteq_unittest::NetEqNetworkStatistics stats; + Convert(stats_raw, &stats); + + std::string stats_string; + ASSERT_TRUE(stats.SerializeToString(&stats_string)); + AddMessage(output_fp_, digest_.get(), stats_string); +#else + FAIL() << "Writing to reference file requires Proto Buffer."; +#endif // WEBRTC_NETEQ_UNITTEST_BITEXACT +} + +void ResultSink::VerifyChecksum(absl::string_view checksum) { + std::string buffer; + buffer.resize(digest_->Size()); + digest_->Finish(buffer.data(), buffer.size()); + const std::string result = rtc::hex_encode(buffer); + if (checksum.size() == result.size()) { + EXPECT_EQ(checksum, result); + } else { + // Check result is one the '|'-separated checksums. + EXPECT_NE(checksum.find(result), absl::string_view::npos) + << result << " should be one of these:\n" + << checksum; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/test/result_sink.h b/third_party/libwebrtc/modules/audio_coding/neteq/test/result_sink.h new file mode 100644 index 0000000000..c6923d7a7f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/test/result_sink.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TEST_RESULT_SINK_H_ +#define MODULES_AUDIO_CODING_NETEQ_TEST_RESULT_SINK_H_ + +#include <cstdio> +#include <memory> +#include <string> + +#include "absl/strings/string_view.h" +#include "api/neteq/neteq.h" +#include "rtc_base/message_digest.h" + +namespace webrtc { + +class ResultSink { + public: + explicit ResultSink(absl::string_view output_file); + ~ResultSink(); + + template <typename T> + void AddResult(const T* test_results, size_t length); + + void AddResult(const NetEqNetworkStatistics& stats); + + void VerifyChecksum(absl::string_view ref_check_sum); + + private: + FILE* output_fp_; + std::unique_ptr<rtc::MessageDigest> digest_; +}; + +template <typename T> +void ResultSink::AddResult(const T* test_results, size_t length) { + if (output_fp_) { + ASSERT_EQ(length, fwrite(test_results, sizeof(T), length, output_fp_)); + } + digest_->Update(test_results, sizeof(T) * length); +} + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TEST_RESULT_SINK_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/time_stretch.cc b/third_party/libwebrtc/modules/audio_coding/neteq/time_stretch.cc new file mode 100644 index 0000000000..b89be0608d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/time_stretch.cc @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/time_stretch.h" + +#include <algorithm> // min, max +#include <memory> + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_coding/neteq/background_noise.h" +#include "modules/audio_coding/neteq/cross_correlation.h" +#include "modules/audio_coding/neteq/dsp_helper.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +TimeStretch::ReturnCodes TimeStretch::Process(const int16_t* input, + size_t input_len, + bool fast_mode, + AudioMultiVector* output, + size_t* length_change_samples) { + // Pre-calculate common multiplication with `fs_mult_`. + size_t fs_mult_120 = + static_cast<size_t>(fs_mult_ * 120); // Corresponds to 15 ms. + + const int16_t* signal; + std::unique_ptr<int16_t[]> signal_array; + size_t signal_len; + if (num_channels_ == 1) { + signal = input; + signal_len = input_len; + } else { + // We want `signal` to be only the first channel of `input`, which is + // interleaved. Thus, we take the first sample, skip forward `num_channels` + // samples, and continue like that. + signal_len = input_len / num_channels_; + signal_array.reset(new int16_t[signal_len]); + signal = signal_array.get(); + size_t j = kRefChannel; + for (size_t i = 0; i < signal_len; ++i) { + signal_array[i] = input[j]; + j += num_channels_; + } + } + + // Find maximum absolute value of input signal. + max_input_value_ = WebRtcSpl_MaxAbsValueW16(signal, signal_len); + + // Downsample to 4 kHz sample rate and calculate auto-correlation. + DspHelper::DownsampleTo4kHz(signal, signal_len, kDownsampledLen, + sample_rate_hz_, true /* compensate delay*/, + downsampled_input_); + AutoCorrelation(); + + // Find the strongest correlation peak. + static const size_t kNumPeaks = 1; + size_t peak_index; + int16_t peak_value; + DspHelper::PeakDetection(auto_correlation_, kCorrelationLen, kNumPeaks, + fs_mult_, &peak_index, &peak_value); + // Assert that `peak_index` stays within boundaries. + RTC_DCHECK_LE(peak_index, (2 * kCorrelationLen - 1) * fs_mult_); + + // Compensate peak_index for displaced starting position. The displacement + // happens in AutoCorrelation(). Here, `kMinLag` is in the down-sampled 4 kHz + // domain, while the `peak_index` is in the original sample rate; hence, the + // multiplication by fs_mult_ * 2. + peak_index += kMinLag * fs_mult_ * 2; + // Assert that `peak_index` stays within boundaries. + RTC_DCHECK_GE(peak_index, static_cast<size_t>(20 * fs_mult_)); + RTC_DCHECK_LE(peak_index, + 20 * fs_mult_ + (2 * kCorrelationLen - 1) * fs_mult_); + + // Calculate scaling to ensure that `peak_index` samples can be square-summed + // without overflowing. + int scaling = 31 - WebRtcSpl_NormW32(max_input_value_ * max_input_value_) - + WebRtcSpl_NormW32(static_cast<int32_t>(peak_index)); + scaling = std::max(0, scaling); + + // `vec1` starts at 15 ms minus one pitch period. + const int16_t* vec1 = &signal[fs_mult_120 - peak_index]; + // `vec2` start at 15 ms. + const int16_t* vec2 = &signal[fs_mult_120]; + // Calculate energies for `vec1` and `vec2`, assuming they both contain + // `peak_index` samples. + int32_t vec1_energy = + WebRtcSpl_DotProductWithScale(vec1, vec1, peak_index, scaling); + int32_t vec2_energy = + WebRtcSpl_DotProductWithScale(vec2, vec2, peak_index, scaling); + + // Calculate cross-correlation between `vec1` and `vec2`. + int32_t cross_corr = + WebRtcSpl_DotProductWithScale(vec1, vec2, peak_index, scaling); + + // Check if the signal seems to be active speech or not (simple VAD). + bool active_speech = + SpeechDetection(vec1_energy, vec2_energy, peak_index, scaling); + + int16_t best_correlation; + if (!active_speech) { + SetParametersForPassiveSpeech(signal_len, &best_correlation, &peak_index); + } else { + // Calculate correlation: + // cross_corr / sqrt(vec1_energy * vec2_energy). + + // Start with calculating scale values. + int energy1_scale = std::max(0, 16 - WebRtcSpl_NormW32(vec1_energy)); + int energy2_scale = std::max(0, 16 - WebRtcSpl_NormW32(vec2_energy)); + + // Make sure total scaling is even (to simplify scale factor after sqrt). + if ((energy1_scale + energy2_scale) & 1) { + // The sum is odd. + energy1_scale += 1; + } + + // Scale energies to int16_t. + int16_t vec1_energy_int16 = + static_cast<int16_t>(vec1_energy >> energy1_scale); + int16_t vec2_energy_int16 = + static_cast<int16_t>(vec2_energy >> energy2_scale); + + // Calculate square-root of energy product. + int16_t sqrt_energy_prod = + WebRtcSpl_SqrtFloor(vec1_energy_int16 * vec2_energy_int16); + + // Calculate cross_corr / sqrt(en1*en2) in Q14. + int temp_scale = 14 - (energy1_scale + energy2_scale) / 2; + cross_corr = WEBRTC_SPL_SHIFT_W32(cross_corr, temp_scale); + cross_corr = std::max(0, cross_corr); // Don't use if negative. + best_correlation = WebRtcSpl_DivW32W16(cross_corr, sqrt_energy_prod); + // Make sure `best_correlation` is no larger than 1 in Q14. + best_correlation = std::min(static_cast<int16_t>(16384), best_correlation); + } + + // Check accelerate criteria and stretch the signal. + ReturnCodes return_value = + CheckCriteriaAndStretch(input, input_len, peak_index, best_correlation, + active_speech, fast_mode, output); + switch (return_value) { + case kSuccess: + *length_change_samples = peak_index; + break; + case kSuccessLowEnergy: + *length_change_samples = peak_index; + break; + case kNoStretch: + case kError: + *length_change_samples = 0; + break; + } + return return_value; +} + +void TimeStretch::AutoCorrelation() { + // Calculate correlation from lag kMinLag to lag kMaxLag in 4 kHz domain. + int32_t auto_corr[kCorrelationLen]; + CrossCorrelationWithAutoShift( + &downsampled_input_[kMaxLag], &downsampled_input_[kMaxLag - kMinLag], + kCorrelationLen, kMaxLag - kMinLag, -1, auto_corr); + + // Normalize correlation to 14 bits and write to `auto_correlation_`. + int32_t max_corr = WebRtcSpl_MaxAbsValueW32(auto_corr, kCorrelationLen); + int scaling = std::max(0, 17 - WebRtcSpl_NormW32(max_corr)); + WebRtcSpl_VectorBitShiftW32ToW16(auto_correlation_, kCorrelationLen, + auto_corr, scaling); +} + +bool TimeStretch::SpeechDetection(int32_t vec1_energy, + int32_t vec2_energy, + size_t peak_index, + int scaling) const { + // Check if the signal seems to be active speech or not (simple VAD). + // If (vec1_energy + vec2_energy) / (2 * peak_index) <= + // 8 * background_noise_energy, then we say that the signal contains no + // active speech. + // Rewrite the inequality as: + // (vec1_energy + vec2_energy) / 16 <= peak_index * background_noise_energy. + // The two sides of the inequality will be denoted `left_side` and + // `right_side`. + int32_t left_side = rtc::saturated_cast<int32_t>( + (static_cast<int64_t>(vec1_energy) + vec2_energy) / 16); + int32_t right_side; + if (background_noise_.initialized()) { + right_side = background_noise_.Energy(kRefChannel); + } else { + // If noise parameters have not been estimated, use a fixed threshold. + right_side = 75000; + } + int right_scale = 16 - WebRtcSpl_NormW32(right_side); + right_scale = std::max(0, right_scale); + left_side = left_side >> right_scale; + right_side = + rtc::dchecked_cast<int32_t>(peak_index) * (right_side >> right_scale); + + // Scale `left_side` properly before comparing with `right_side`. + // (`scaling` is the scale factor before energy calculation, thus the scale + // factor for the energy is 2 * scaling.) + if (WebRtcSpl_NormW32(left_side) < 2 * scaling) { + // Cannot scale only `left_side`, must scale `right_side` too. + int temp_scale = WebRtcSpl_NormW32(left_side); + left_side = left_side << temp_scale; + right_side = right_side >> (2 * scaling - temp_scale); + } else { + left_side = left_side << 2 * scaling; + } + return left_side > right_side; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/time_stretch.h b/third_party/libwebrtc/modules/audio_coding/neteq/time_stretch.h new file mode 100644 index 0000000000..f0ddaebeca --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/time_stretch.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_ +#define MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_ + +#include <string.h> // memset, size_t + +#include "modules/audio_coding/neteq/audio_multi_vector.h" + +namespace webrtc { + +// Forward declarations. +class BackgroundNoise; + +// This is the base class for Accelerate and PreemptiveExpand. This class +// cannot be instantiated, but must be used through either of the derived +// classes. +class TimeStretch { + public: + enum ReturnCodes { + kSuccess = 0, + kSuccessLowEnergy = 1, + kNoStretch = 2, + kError = -1 + }; + + TimeStretch(int sample_rate_hz, + size_t num_channels, + const BackgroundNoise& background_noise) + : sample_rate_hz_(sample_rate_hz), + fs_mult_(sample_rate_hz / 8000), + num_channels_(num_channels), + background_noise_(background_noise), + max_input_value_(0) { + RTC_DCHECK(sample_rate_hz_ == 8000 || sample_rate_hz_ == 16000 || + sample_rate_hz_ == 32000 || sample_rate_hz_ == 48000); + RTC_DCHECK_GT(num_channels_, 0); + memset(auto_correlation_, 0, sizeof(auto_correlation_)); + } + + virtual ~TimeStretch() {} + + TimeStretch(const TimeStretch&) = delete; + TimeStretch& operator=(const TimeStretch&) = delete; + + // This method performs the processing common to both Accelerate and + // PreemptiveExpand. + ReturnCodes Process(const int16_t* input, + size_t input_len, + bool fast_mode, + AudioMultiVector* output, + size_t* length_change_samples); + + protected: + // Sets the parameters `best_correlation` and `peak_index` to suitable + // values when the signal contains no active speech. This method must be + // implemented by the sub-classes. + virtual void SetParametersForPassiveSpeech(size_t input_length, + int16_t* best_correlation, + size_t* peak_index) const = 0; + + // Checks the criteria for performing the time-stretching operation and, + // if possible, performs the time-stretching. This method must be implemented + // by the sub-classes. + virtual ReturnCodes CheckCriteriaAndStretch( + const int16_t* input, + size_t input_length, + size_t peak_index, + int16_t best_correlation, + bool active_speech, + bool fast_mode, + AudioMultiVector* output) const = 0; + + static const size_t kCorrelationLen = 50; + static const size_t kLogCorrelationLen = 6; // >= log2(kCorrelationLen). + static const size_t kMinLag = 10; + static const size_t kMaxLag = 60; + static const size_t kDownsampledLen = kCorrelationLen + kMaxLag; + static const int kCorrelationThreshold = 14746; // 0.9 in Q14. + static constexpr size_t kRefChannel = 0; // First channel is reference. + + const int sample_rate_hz_; + const int fs_mult_; // Sample rate multiplier = sample_rate_hz_ / 8000. + const size_t num_channels_; + const BackgroundNoise& background_noise_; + int16_t max_input_value_; + int16_t downsampled_input_[kDownsampledLen]; + // Adding 1 to the size of `auto_correlation_` because of how it is used + // by the peak-detection algorithm. + int16_t auto_correlation_[kCorrelationLen + 1]; + + private: + // Calculates the auto-correlation of `downsampled_input_` and writes the + // result to `auto_correlation_`. + void AutoCorrelation(); + + // Performs a simple voice-activity detection based on the input parameters. + bool SpeechDetection(int32_t vec1_energy, + int32_t vec2_energy, + size_t peak_index, + int scaling) const; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/time_stretch_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/time_stretch_unittest.cc new file mode 100644 index 0000000000..da3a98229a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/time_stretch_unittest.cc @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for Accelerate and PreemptiveExpand classes. + +#include <map> +#include <memory> + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_coding/neteq/accelerate.h" +#include "modules/audio_coding/neteq/background_noise.h" +#include "modules/audio_coding/neteq/preemptive_expand.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" +#include "rtc_base/checks.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +namespace { +const size_t kNumChannels = 1; +} + +TEST(TimeStretch, CreateAndDestroy) { + const int kSampleRate = 8000; + const int kOverlapSamples = 5 * kSampleRate / 8000; + BackgroundNoise bgn(kNumChannels); + Accelerate accelerate(kSampleRate, kNumChannels, bgn); + PreemptiveExpand preemptive_expand(kSampleRate, kNumChannels, bgn, + kOverlapSamples); +} + +TEST(TimeStretch, CreateUsingFactory) { + const int kSampleRate = 8000; + const int kOverlapSamples = 5 * kSampleRate / 8000; + BackgroundNoise bgn(kNumChannels); + + AccelerateFactory accelerate_factory; + Accelerate* accelerate = + accelerate_factory.Create(kSampleRate, kNumChannels, bgn); + EXPECT_TRUE(accelerate != NULL); + delete accelerate; + + PreemptiveExpandFactory preemptive_expand_factory; + PreemptiveExpand* preemptive_expand = preemptive_expand_factory.Create( + kSampleRate, kNumChannels, bgn, kOverlapSamples); + EXPECT_TRUE(preemptive_expand != NULL); + delete preemptive_expand; +} + +class TimeStretchTest : public ::testing::Test { + protected: + TimeStretchTest() + : input_file_(new test::InputAudioFile( + test::ResourcePath("audio_coding/testfile32kHz", "pcm"))), + sample_rate_hz_(32000), + block_size_(30 * sample_rate_hz_ / 1000), // 30 ms + audio_(new int16_t[block_size_]), + background_noise_(kNumChannels) {} + + const int16_t* Next30Ms() { + RTC_CHECK(input_file_->Read(block_size_, audio_.get())); + return audio_.get(); + } + + // Returns the total length change (in samples) that the accelerate operation + // resulted in during the run. + size_t TestAccelerate(size_t loops, bool fast_mode) { + Accelerate accelerate(sample_rate_hz_, kNumChannels, background_noise_); + size_t total_length_change = 0; + for (size_t i = 0; i < loops; ++i) { + AudioMultiVector output(kNumChannels); + size_t length_change; + UpdateReturnStats(accelerate.Process(Next30Ms(), block_size_, fast_mode, + &output, &length_change)); + total_length_change += length_change; + } + return total_length_change; + } + + void UpdateReturnStats(TimeStretch::ReturnCodes ret) { + switch (ret) { + case TimeStretch::kSuccess: + case TimeStretch::kSuccessLowEnergy: + case TimeStretch::kNoStretch: + ++return_stats_[ret]; + break; + case TimeStretch::kError: + FAIL() << "Process returned an error"; + } + } + + std::unique_ptr<test::InputAudioFile> input_file_; + const int sample_rate_hz_; + const size_t block_size_; + std::unique_ptr<int16_t[]> audio_; + std::map<TimeStretch::ReturnCodes, int> return_stats_; + BackgroundNoise background_noise_; +}; + +TEST_F(TimeStretchTest, Accelerate) { + // TestAccelerate returns the total length change in samples. + EXPECT_EQ(15268U, TestAccelerate(100, false)); + EXPECT_EQ(9, return_stats_[TimeStretch::kSuccess]); + EXPECT_EQ(58, return_stats_[TimeStretch::kSuccessLowEnergy]); + EXPECT_EQ(33, return_stats_[TimeStretch::kNoStretch]); +} + +TEST_F(TimeStretchTest, AccelerateFastMode) { + // TestAccelerate returns the total length change in samples. + EXPECT_EQ(21400U, TestAccelerate(100, true)); + EXPECT_EQ(31, return_stats_[TimeStretch::kSuccess]); + EXPECT_EQ(58, return_stats_[TimeStretch::kSuccessLowEnergy]); + EXPECT_EQ(11, return_stats_[TimeStretch::kNoStretch]); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/timestamp_scaler.cc b/third_party/libwebrtc/modules/audio_coding/neteq/timestamp_scaler.cc new file mode 100644 index 0000000000..59177d027f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/timestamp_scaler.cc @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/timestamp_scaler.h" + +#include "api/audio_codecs/audio_format.h" +#include "modules/audio_coding/neteq/decoder_database.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +void TimestampScaler::Reset() { + first_packet_received_ = false; +} + +void TimestampScaler::ToInternal(Packet* packet) { + if (!packet) { + return; + } + packet->timestamp = ToInternal(packet->timestamp, packet->payload_type); +} + +void TimestampScaler::ToInternal(PacketList* packet_list) { + PacketList::iterator it; + for (it = packet_list->begin(); it != packet_list->end(); ++it) { + ToInternal(&(*it)); + } +} + +uint32_t TimestampScaler::ToInternal(uint32_t external_timestamp, + uint8_t rtp_payload_type) { + const DecoderDatabase::DecoderInfo* info = + decoder_database_.GetDecoderInfo(rtp_payload_type); + if (!info) { + // Payload type is unknown. Do not scale. + return external_timestamp; + } + if (!(info->IsComfortNoise() || info->IsDtmf())) { + // Do not change the timestamp scaling settings for DTMF or CNG. + numerator_ = info->SampleRateHz(); + if (info->GetFormat().clockrate_hz == 0) { + // If the clockrate is invalid (i.e. with an old-style external codec) + // we cannot do any timestamp scaling. + denominator_ = numerator_; + } else { + denominator_ = info->GetFormat().clockrate_hz; + } + } + if (numerator_ != denominator_) { + // We have a scale factor != 1. + if (!first_packet_received_) { + external_ref_ = external_timestamp; + internal_ref_ = external_timestamp; + first_packet_received_ = true; + } + const int64_t external_diff = int64_t{external_timestamp} - external_ref_; + RTC_DCHECK_GT(denominator_, 0); + external_ref_ = external_timestamp; + internal_ref_ += (external_diff * numerator_) / denominator_; + return internal_ref_; + } else { + // No scaling. + return external_timestamp; + } +} + +uint32_t TimestampScaler::ToExternal(uint32_t internal_timestamp) const { + if (!first_packet_received_ || (numerator_ == denominator_)) { + // Not initialized, or scale factor is 1. + return internal_timestamp; + } else { + const int64_t internal_diff = int64_t{internal_timestamp} - internal_ref_; + RTC_DCHECK_GT(numerator_, 0); + // Do not update references in this method. + // Switch `denominator_` and `numerator_` to convert the other way. + return external_ref_ + (internal_diff * denominator_) / numerator_; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/timestamp_scaler.h b/third_party/libwebrtc/modules/audio_coding/neteq/timestamp_scaler.h new file mode 100644 index 0000000000..f42ce7207a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/timestamp_scaler.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TIMESTAMP_SCALER_H_ +#define MODULES_AUDIO_CODING_NETEQ_TIMESTAMP_SCALER_H_ + +#include "modules/audio_coding/neteq/packet.h" + +namespace webrtc { + +// Forward declaration. +class DecoderDatabase; + +// This class scales timestamps for codecs that need timestamp scaling. +// This is done for codecs where one RTP timestamp does not correspond to +// one sample. +class TimestampScaler { + public: + explicit TimestampScaler(const DecoderDatabase& decoder_database) + : first_packet_received_(false), + numerator_(1), + denominator_(1), + external_ref_(0), + internal_ref_(0), + decoder_database_(decoder_database) {} + + virtual ~TimestampScaler() {} + + TimestampScaler(const TimestampScaler&) = delete; + TimestampScaler& operator=(const TimestampScaler&) = delete; + + // Start over. + virtual void Reset(); + + // Scale the timestamp in `packet` from external to internal. + virtual void ToInternal(Packet* packet); + + // Scale the timestamp for all packets in `packet_list` from external to + // internal. + virtual void ToInternal(PacketList* packet_list); + + // Returns the internal equivalent of `external_timestamp`, given the + // RTP payload type `rtp_payload_type`. + virtual uint32_t ToInternal(uint32_t external_timestamp, + uint8_t rtp_payload_type); + + // Scales back to external timestamp. This is the inverse of ToInternal(). + virtual uint32_t ToExternal(uint32_t internal_timestamp) const; + + private: + bool first_packet_received_; + int numerator_; + int denominator_; + uint32_t external_ref_; + uint32_t internal_ref_; + const DecoderDatabase& decoder_database_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TIMESTAMP_SCALER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/timestamp_scaler_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/timestamp_scaler_unittest.cc new file mode 100644 index 0000000000..c2bb4dd95f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/timestamp_scaler_unittest.cc @@ -0,0 +1,324 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/timestamp_scaler.h" + +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "modules/audio_coding/neteq/mock/mock_decoder_database.h" +#include "modules/audio_coding/neteq/packet.h" +#include "test/gmock.h" +#include "test/gtest.h" + +using ::testing::_; +using ::testing::Return; +using ::testing::ReturnNull; + +namespace webrtc { + +TEST(TimestampScaler, TestNoScaling) { + MockDecoderDatabase db; + auto factory = CreateBuiltinAudioDecoderFactory(); + // Use PCMu, because it doesn't use scaled timestamps. + const DecoderDatabase::DecoderInfo info(SdpAudioFormat("pcmu", 8000, 1), + absl::nullopt, factory.get()); + static const uint8_t kRtpPayloadType = 0; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillRepeatedly(Return(&info)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + for (uint32_t timestamp = 0xFFFFFFFF - 5; timestamp != 5; ++timestamp) { + // Scale to internal timestamp. + EXPECT_EQ(timestamp, scaler.ToInternal(timestamp, kRtpPayloadType)); + // Scale back. + EXPECT_EQ(timestamp, scaler.ToExternal(timestamp)); + } + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +TEST(TimestampScaler, TestNoScalingLargeStep) { + MockDecoderDatabase db; + auto factory = CreateBuiltinAudioDecoderFactory(); + // Use PCMu, because it doesn't use scaled timestamps. + const DecoderDatabase::DecoderInfo info(SdpAudioFormat("pcmu", 8000, 1), + absl::nullopt, factory.get()); + static const uint8_t kRtpPayloadType = 0; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillRepeatedly(Return(&info)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + static const uint32_t kStep = 160; + uint32_t start_timestamp = 0; + // `external_timestamp` will be a large positive value. + start_timestamp = start_timestamp - 5 * kStep; + for (uint32_t timestamp = start_timestamp; timestamp != 5 * kStep; + timestamp += kStep) { + // Scale to internal timestamp. + EXPECT_EQ(timestamp, scaler.ToInternal(timestamp, kRtpPayloadType)); + // Scale back. + EXPECT_EQ(timestamp, scaler.ToExternal(timestamp)); + } + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +TEST(TimestampScaler, TestG722) { + MockDecoderDatabase db; + auto factory = CreateBuiltinAudioDecoderFactory(); + // Use G722, which has a factor 2 scaling. + const DecoderDatabase::DecoderInfo info(SdpAudioFormat("g722", 8000, 1), + absl::nullopt, factory.get()); + static const uint8_t kRtpPayloadType = 17; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillRepeatedly(Return(&info)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + uint32_t external_timestamp = 0xFFFFFFFF - 5; + uint32_t internal_timestamp = external_timestamp; + for (; external_timestamp != 5; ++external_timestamp) { + // Scale to internal timestamp. + EXPECT_EQ(internal_timestamp, + scaler.ToInternal(external_timestamp, kRtpPayloadType)); + // Scale back. + EXPECT_EQ(external_timestamp, scaler.ToExternal(internal_timestamp)); + internal_timestamp += 2; + } + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +TEST(TimestampScaler, TestG722LargeStep) { + MockDecoderDatabase db; + auto factory = CreateBuiltinAudioDecoderFactory(); + // Use G722, which has a factor 2 scaling. + const DecoderDatabase::DecoderInfo info(SdpAudioFormat("g722", 8000, 1), + absl::nullopt, factory.get()); + static const uint8_t kRtpPayloadType = 17; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillRepeatedly(Return(&info)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + static const uint32_t kStep = 320; + uint32_t external_timestamp = 0; + // `external_timestamp` will be a large positive value. + external_timestamp = external_timestamp - 5 * kStep; + uint32_t internal_timestamp = external_timestamp; + for (; external_timestamp != 5 * kStep; external_timestamp += kStep) { + // Scale to internal timestamp. + EXPECT_EQ(internal_timestamp, + scaler.ToInternal(external_timestamp, kRtpPayloadType)); + // Scale back. + EXPECT_EQ(external_timestamp, scaler.ToExternal(internal_timestamp)); + // Internal timestamp should be incremented with twice the step. + internal_timestamp += 2 * kStep; + } + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +TEST(TimestampScaler, TestG722WithCng) { + MockDecoderDatabase db; + auto factory = CreateBuiltinAudioDecoderFactory(); + // Use G722, which has a factor 2 scaling. + const DecoderDatabase::DecoderInfo info_g722(SdpAudioFormat("g722", 8000, 1), + absl::nullopt, factory.get()); + const DecoderDatabase::DecoderInfo info_cng(SdpAudioFormat("cn", 16000, 1), + absl::nullopt, factory.get()); + static const uint8_t kRtpPayloadTypeG722 = 17; + static const uint8_t kRtpPayloadTypeCng = 13; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadTypeG722)) + .WillRepeatedly(Return(&info_g722)); + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadTypeCng)) + .WillRepeatedly(Return(&info_cng)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + uint32_t external_timestamp = 0xFFFFFFFF - 5; + uint32_t internal_timestamp = external_timestamp; + bool next_is_cng = false; + for (; external_timestamp != 5; ++external_timestamp) { + // Alternate between G.722 and CNG every other packet. + if (next_is_cng) { + // Scale to internal timestamp. + EXPECT_EQ(internal_timestamp, + scaler.ToInternal(external_timestamp, kRtpPayloadTypeCng)); + next_is_cng = false; + } else { + // Scale to internal timestamp. + EXPECT_EQ(internal_timestamp, + scaler.ToInternal(external_timestamp, kRtpPayloadTypeG722)); + next_is_cng = true; + } + // Scale back. + EXPECT_EQ(external_timestamp, scaler.ToExternal(internal_timestamp)); + internal_timestamp += 2; + } + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +// Make sure that the method ToInternal(Packet* packet) is wired up correctly. +// Since it is simply calling the other ToInternal method, we are not doing +// as many tests here. +TEST(TimestampScaler, TestG722Packet) { + MockDecoderDatabase db; + auto factory = CreateBuiltinAudioDecoderFactory(); + // Use G722, which has a factor 2 scaling. + const DecoderDatabase::DecoderInfo info(SdpAudioFormat("g722", 8000, 1), + absl::nullopt, factory.get()); + static const uint8_t kRtpPayloadType = 17; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillRepeatedly(Return(&info)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + uint32_t external_timestamp = 0xFFFFFFFF - 5; + uint32_t internal_timestamp = external_timestamp; + Packet packet; + packet.payload_type = kRtpPayloadType; + for (; external_timestamp != 5; ++external_timestamp) { + packet.timestamp = external_timestamp; + // Scale to internal timestamp. + scaler.ToInternal(&packet); + EXPECT_EQ(internal_timestamp, packet.timestamp); + internal_timestamp += 2; + } + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +// Make sure that the method ToInternal(PacketList* packet_list) is wired up +// correctly. Since it is simply calling the ToInternal(Packet* packet) method, +// we are not doing as many tests here. +TEST(TimestampScaler, TestG722PacketList) { + MockDecoderDatabase db; + auto factory = CreateBuiltinAudioDecoderFactory(); + // Use G722, which has a factor 2 scaling. + const DecoderDatabase::DecoderInfo info(SdpAudioFormat("g722", 8000, 1), + absl::nullopt, factory.get()); + static const uint8_t kRtpPayloadType = 17; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillRepeatedly(Return(&info)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + uint32_t external_timestamp = 0xFFFFFFFF - 5; + uint32_t internal_timestamp = external_timestamp; + PacketList packet_list; + { + Packet packet1; + packet1.payload_type = kRtpPayloadType; + packet1.timestamp = external_timestamp; + Packet packet2; + packet2.payload_type = kRtpPayloadType; + packet2.timestamp = external_timestamp + 10; + packet_list.push_back(std::move(packet1)); + packet_list.push_back(std::move(packet2)); + } + + scaler.ToInternal(&packet_list); + EXPECT_EQ(internal_timestamp, packet_list.front().timestamp); + packet_list.pop_front(); + EXPECT_EQ(internal_timestamp + 20, packet_list.front().timestamp); + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +TEST(TimestampScaler, TestG722Reset) { + MockDecoderDatabase db; + auto factory = CreateBuiltinAudioDecoderFactory(); + // Use G722, which has a factor 2 scaling. + const DecoderDatabase::DecoderInfo info(SdpAudioFormat("g722", 8000, 1), + absl::nullopt, factory.get()); + static const uint8_t kRtpPayloadType = 17; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillRepeatedly(Return(&info)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + uint32_t external_timestamp = 0xFFFFFFFF - 5; + uint32_t internal_timestamp = external_timestamp; + for (; external_timestamp != 5; ++external_timestamp) { + // Scale to internal timestamp. + EXPECT_EQ(internal_timestamp, + scaler.ToInternal(external_timestamp, kRtpPayloadType)); + // Scale back. + EXPECT_EQ(external_timestamp, scaler.ToExternal(internal_timestamp)); + internal_timestamp += 2; + } + // Reset the scaler. After this, we expect the internal and external to start + // over at the same value again. + scaler.Reset(); + internal_timestamp = external_timestamp; + for (; external_timestamp != 15; ++external_timestamp) { + // Scale to internal timestamp. + EXPECT_EQ(internal_timestamp, + scaler.ToInternal(external_timestamp, kRtpPayloadType)); + // Scale back. + EXPECT_EQ(external_timestamp, scaler.ToExternal(internal_timestamp)); + internal_timestamp += 2; + } + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +// TODO(minyue): This test becomes trivial since Opus does not need a timestamp +// scaler. Therefore, this test may be removed in future. There is no harm to +// keep it, since it can be taken as a test case for the situation of a trivial +// timestamp scaler. +TEST(TimestampScaler, TestOpusLargeStep) { + MockDecoderDatabase db; + auto factory = CreateBuiltinAudioDecoderFactory(); + const DecoderDatabase::DecoderInfo info(SdpAudioFormat("opus", 48000, 2), + absl::nullopt, factory.get()); + static const uint8_t kRtpPayloadType = 17; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillRepeatedly(Return(&info)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + static const uint32_t kStep = 960; + uint32_t external_timestamp = 0; + // `external_timestamp` will be a large positive value. + external_timestamp = external_timestamp - 5 * kStep; + uint32_t internal_timestamp = external_timestamp; + for (; external_timestamp != 5 * kStep; external_timestamp += kStep) { + // Scale to internal timestamp. + EXPECT_EQ(internal_timestamp, + scaler.ToInternal(external_timestamp, kRtpPayloadType)); + // Scale back. + EXPECT_EQ(external_timestamp, scaler.ToExternal(internal_timestamp)); + internal_timestamp += kStep; + } + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +TEST(TimestampScaler, Failures) { + static const uint8_t kRtpPayloadType = 17; + MockDecoderDatabase db; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillOnce(ReturnNull()); // Return NULL to indicate unknown payload type. + + TimestampScaler scaler(db); + uint32_t timestamp = 4711; // Some number. + EXPECT_EQ(timestamp, scaler.ToInternal(timestamp, kRtpPayloadType)); + + Packet* packet = NULL; + scaler.ToInternal(packet); // Should not crash. That's all we can test. + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/DEPS b/third_party/libwebrtc/modules/audio_coding/neteq/tools/DEPS new file mode 100644 index 0000000000..4db1e1d6e5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/DEPS @@ -0,0 +1,3 @@ +include_rules = [ + "+logging/rtc_event_log", +] diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/README.md b/third_party/libwebrtc/modules/audio_coding/neteq/tools/README.md new file mode 100644 index 0000000000..e7bd95c285 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/README.md @@ -0,0 +1,17 @@ +# NetEQ RTP Play tool + +## Testing of the command line arguments +The command line tool `neteq_rtpplay` can be tested by running `neteq_rtpplay_test.sh`, which is not use on try bots, but it can be used before submitting any CLs that may break the behavior of the command line arguments of `neteq_rtpplay`. + +Run `neteq_rtpplay_test.sh` as follows from the `src/` folder: +``` +src$ ./modules/audio_coding/neteq/tools/neteq_rtpplay_test.sh \ + out/Default/neteq_rtpplay \ + resources/audio_coding/neteq_opus.rtp \ + resources/short_mixed_mono_48.pcm +``` + +You can replace the RTP and PCM files with any other compatible files. +If you get an error using the files indicated above, try running `gclient sync`. + +Requirements: `awk` and `md5sum`. diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_checksum.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_checksum.h new file mode 100644 index 0000000000..42e3a3a3a0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_checksum.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_AUDIO_CHECKSUM_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_AUDIO_CHECKSUM_H_ + +#include <memory> +#include <string> + +#include "modules/audio_coding/neteq/tools/audio_sink.h" +#include "rtc_base/buffer.h" +#include "rtc_base/message_digest.h" +#include "rtc_base/string_encode.h" +#include "rtc_base/system/arch.h" + +namespace webrtc { +namespace test { + +class AudioChecksum : public AudioSink { + public: + AudioChecksum() + : checksum_(rtc::MessageDigestFactory::Create(rtc::DIGEST_MD5)), + checksum_result_(checksum_->Size()), + finished_(false) {} + + AudioChecksum(const AudioChecksum&) = delete; + AudioChecksum& operator=(const AudioChecksum&) = delete; + + bool WriteArray(const int16_t* audio, size_t num_samples) override { + if (finished_) + return false; + +#ifndef WEBRTC_ARCH_LITTLE_ENDIAN +#error "Big-endian gives a different checksum" +#endif + checksum_->Update(audio, num_samples * sizeof(*audio)); + return true; + } + + // Finalizes the computations, and returns the checksum. + std::string Finish() { + if (!finished_) { + finished_ = true; + checksum_->Finish(checksum_result_.data(), checksum_result_.size()); + } + return rtc::hex_encode(checksum_result_); + } + + private: + std::unique_ptr<rtc::MessageDigest> checksum_; + rtc::Buffer checksum_result_; + bool finished_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_AUDIO_CHECKSUM_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_loop.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_loop.cc new file mode 100644 index 0000000000..514e6eb2ee --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_loop.cc @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/audio_loop.h" + +#include <stdio.h> +#include <string.h> + +#include "absl/strings/string_view.h" + +namespace webrtc { +namespace test { + +bool AudioLoop::Init(absl::string_view file_name, + size_t max_loop_length_samples, + size_t block_length_samples) { + FILE* fp = fopen(std::string(file_name).c_str(), "rb"); + if (!fp) + return false; + + audio_array_.reset( + new int16_t[max_loop_length_samples + block_length_samples]); + size_t samples_read = + fread(audio_array_.get(), sizeof(int16_t), max_loop_length_samples, fp); + fclose(fp); + + // Block length must be shorter than the loop length. + if (block_length_samples > samples_read) + return false; + + // Add an extra block length of samples to the end of the array, starting + // over again from the beginning of the array. This is done to simplify + // the reading process when reading over the end of the loop. + memcpy(&audio_array_[samples_read], audio_array_.get(), + block_length_samples * sizeof(int16_t)); + + loop_length_samples_ = samples_read; + block_length_samples_ = block_length_samples; + next_index_ = 0; + return true; +} + +rtc::ArrayView<const int16_t> AudioLoop::GetNextBlock() { + // Check that the AudioLoop is initialized. + if (block_length_samples_ == 0) + return rtc::ArrayView<const int16_t>(); + + const int16_t* output_ptr = &audio_array_[next_index_]; + next_index_ = (next_index_ + block_length_samples_) % loop_length_samples_; + return rtc::ArrayView<const int16_t>(output_ptr, block_length_samples_); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_loop.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_loop.h new file mode 100644 index 0000000000..f5f0b59011 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_loop.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_AUDIO_LOOP_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_AUDIO_LOOP_H_ + +#include <memory> +#include <string> + +#include "absl/strings/string_view.h" +#include "api/array_view.h" + +namespace webrtc { +namespace test { + +// Class serving as an infinite source of audio, realized by looping an audio +// clip. +class AudioLoop { + public: + AudioLoop() + : next_index_(0), loop_length_samples_(0), block_length_samples_(0) {} + + virtual ~AudioLoop() {} + + AudioLoop(const AudioLoop&) = delete; + AudioLoop& operator=(const AudioLoop&) = delete; + + // Initializes the AudioLoop by reading from `file_name`. The loop will be no + // longer than `max_loop_length_samples`, if the length of the file is + // greater. Otherwise, the loop length is the same as the file length. + // The audio will be delivered in blocks of `block_length_samples`. + // Returns false if the initialization failed, otherwise true. + bool Init(absl::string_view file_name, + size_t max_loop_length_samples, + size_t block_length_samples); + + // Returns a (pointer,size) pair for the next block of audio. The size is + // equal to the `block_length_samples` Init() argument. + rtc::ArrayView<const int16_t> GetNextBlock(); + + private: + size_t next_index_; + size_t loop_length_samples_; + size_t block_length_samples_; + std::unique_ptr<int16_t[]> audio_array_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_AUDIO_LOOP_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_sink.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_sink.cc new file mode 100644 index 0000000000..7d7af7ef9f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_sink.cc @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/audio_sink.h" + +namespace webrtc { +namespace test { + +bool AudioSinkFork::WriteArray(const int16_t* audio, size_t num_samples) { + return left_sink_->WriteArray(audio, num_samples) && + right_sink_->WriteArray(audio, num_samples); +} + +bool VoidAudioSink::WriteArray(const int16_t* audio, size_t num_samples) { + return true; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_sink.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_sink.h new file mode 100644 index 0000000000..53729fa920 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/audio_sink.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_AUDIO_SINK_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_AUDIO_SINK_H_ + +#include "api/audio/audio_frame.h" + +namespace webrtc { +namespace test { + +// Interface class for an object receiving raw output audio from test +// applications. +class AudioSink { + public: + AudioSink() {} + virtual ~AudioSink() {} + + AudioSink(const AudioSink&) = delete; + AudioSink& operator=(const AudioSink&) = delete; + + // Writes `num_samples` from `audio` to the AudioSink. Returns true if + // successful, otherwise false. + virtual bool WriteArray(const int16_t* audio, size_t num_samples) = 0; + + // Writes `audio_frame` to the AudioSink. Returns true if successful, + // otherwise false. + bool WriteAudioFrame(const AudioFrame& audio_frame) { + return WriteArray(audio_frame.data(), audio_frame.samples_per_channel_ * + audio_frame.num_channels_); + } +}; + +// Forks the output audio to two AudioSink objects. +class AudioSinkFork : public AudioSink { + public: + AudioSinkFork(AudioSink* left, AudioSink* right) + : left_sink_(left), right_sink_(right) {} + + AudioSinkFork(const AudioSinkFork&) = delete; + AudioSinkFork& operator=(const AudioSinkFork&) = delete; + + bool WriteArray(const int16_t* audio, size_t num_samples) override; + + private: + AudioSink* left_sink_; + AudioSink* right_sink_; +}; + +// An AudioSink implementation that does nothing. +class VoidAudioSink : public AudioSink { + public: + VoidAudioSink() = default; + + VoidAudioSink(const VoidAudioSink&) = delete; + VoidAudioSink& operator=(const VoidAudioSink&) = delete; + + bool WriteArray(const int16_t* audio, size_t num_samples) override; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_AUDIO_SINK_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/constant_pcm_packet_source.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/constant_pcm_packet_source.cc new file mode 100644 index 0000000000..18a910365f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/constant_pcm_packet_source.cc @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/constant_pcm_packet_source.h" + +#include <algorithm> + +#include "modules/audio_coding/codecs/pcm16b/pcm16b.h" +#include "modules/audio_coding/neteq/tools/packet.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +ConstantPcmPacketSource::ConstantPcmPacketSource(size_t payload_len_samples, + int16_t sample_value, + int sample_rate_hz, + int payload_type) + : payload_len_samples_(payload_len_samples), + packet_len_bytes_(2 * payload_len_samples_ + kHeaderLenBytes), + samples_per_ms_(sample_rate_hz / 1000), + next_arrival_time_ms_(0.0), + payload_type_(payload_type), + seq_number_(0), + timestamp_(0), + payload_ssrc_(0xABCD1234) { + size_t encoded_len = WebRtcPcm16b_Encode(&sample_value, 1, encoded_sample_); + RTC_CHECK_EQ(2U, encoded_len); +} + +std::unique_ptr<Packet> ConstantPcmPacketSource::NextPacket() { + RTC_CHECK_GT(packet_len_bytes_, kHeaderLenBytes); + rtc::CopyOnWriteBuffer packet_buffer(packet_len_bytes_); + uint8_t* packet_memory = packet_buffer.MutableData(); + // Fill the payload part of the packet memory with the pre-encoded value. + for (unsigned i = 0; i < 2 * payload_len_samples_; ++i) + packet_memory[kHeaderLenBytes + i] = encoded_sample_[i % 2]; + WriteHeader(packet_memory); + // `packet` assumes ownership of `packet_memory`. + auto packet = + std::make_unique<Packet>(std::move(packet_buffer), next_arrival_time_ms_); + next_arrival_time_ms_ += payload_len_samples_ / samples_per_ms_; + return packet; +} + +void ConstantPcmPacketSource::WriteHeader(uint8_t* packet_memory) { + packet_memory[0] = 0x80; + packet_memory[1] = static_cast<uint8_t>(payload_type_); + packet_memory[2] = seq_number_ >> 8; + packet_memory[3] = seq_number_ & 0xFF; + packet_memory[4] = timestamp_ >> 24; + packet_memory[5] = (timestamp_ >> 16) & 0xFF; + packet_memory[6] = (timestamp_ >> 8) & 0xFF; + packet_memory[7] = timestamp_ & 0xFF; + packet_memory[8] = payload_ssrc_ >> 24; + packet_memory[9] = (payload_ssrc_ >> 16) & 0xFF; + packet_memory[10] = (payload_ssrc_ >> 8) & 0xFF; + packet_memory[11] = payload_ssrc_ & 0xFF; + ++seq_number_; + timestamp_ += static_cast<uint32_t>(payload_len_samples_); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/constant_pcm_packet_source.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/constant_pcm_packet_source.h new file mode 100644 index 0000000000..ab4f5c2281 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/constant_pcm_packet_source.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_CONSTANT_PCM_PACKET_SOURCE_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_CONSTANT_PCM_PACKET_SOURCE_H_ + +#include <stdio.h> + +#include <string> + +#include "modules/audio_coding/neteq/tools/packet_source.h" + +namespace webrtc { +namespace test { + +// This class implements a packet source that delivers PCM16b encoded packets +// with a constant sample value. The payload length, constant sample value, +// sample rate, and payload type are all set in the constructor. +class ConstantPcmPacketSource : public PacketSource { + public: + ConstantPcmPacketSource(size_t payload_len_samples, + int16_t sample_value, + int sample_rate_hz, + int payload_type); + + ConstantPcmPacketSource(const ConstantPcmPacketSource&) = delete; + ConstantPcmPacketSource& operator=(const ConstantPcmPacketSource&) = delete; + + std::unique_ptr<Packet> NextPacket() override; + + private: + void WriteHeader(uint8_t* packet_memory); + + const size_t kHeaderLenBytes = 12; + const size_t payload_len_samples_; + const size_t packet_len_bytes_; + uint8_t encoded_sample_[2]; + const int samples_per_ms_; + double next_arrival_time_ms_; + const int payload_type_; + uint16_t seq_number_; + uint32_t timestamp_; + const uint32_t payload_ssrc_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_CONSTANT_PCM_PACKET_SOURCE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/encode_neteq_input.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/encode_neteq_input.cc new file mode 100644 index 0000000000..87b987ddb6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/encode_neteq_input.cc @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/encode_neteq_input.h" + +#include <utility> + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { +namespace test { + +EncodeNetEqInput::EncodeNetEqInput(std::unique_ptr<Generator> generator, + std::unique_ptr<AudioEncoder> encoder, + int64_t input_duration_ms) + : generator_(std::move(generator)), + encoder_(std::move(encoder)), + input_duration_ms_(input_duration_ms) { + CreatePacket(); +} + +EncodeNetEqInput::~EncodeNetEqInput() = default; + +absl::optional<int64_t> EncodeNetEqInput::NextPacketTime() const { + RTC_DCHECK(packet_data_); + return static_cast<int64_t>(packet_data_->time_ms); +} + +absl::optional<int64_t> EncodeNetEqInput::NextOutputEventTime() const { + return next_output_event_ms_; +} + +std::unique_ptr<NetEqInput::PacketData> EncodeNetEqInput::PopPacket() { + RTC_DCHECK(packet_data_); + // Grab the packet to return... + std::unique_ptr<PacketData> packet_to_return = std::move(packet_data_); + // ... and line up the next packet for future use. + CreatePacket(); + + return packet_to_return; +} + +void EncodeNetEqInput::AdvanceOutputEvent() { + next_output_event_ms_ += kOutputPeriodMs; +} + +bool EncodeNetEqInput::ended() const { + return next_output_event_ms_ > input_duration_ms_; +} + +absl::optional<RTPHeader> EncodeNetEqInput::NextHeader() const { + RTC_DCHECK(packet_data_); + return packet_data_->header; +} + +void EncodeNetEqInput::CreatePacket() { + // Create a new PacketData object. + RTC_DCHECK(!packet_data_); + packet_data_.reset(new NetEqInput::PacketData); + RTC_DCHECK_EQ(packet_data_->payload.size(), 0); + + // Loop until we get a packet. + AudioEncoder::EncodedInfo info; + RTC_DCHECK(!info.send_even_if_empty); + int num_blocks = 0; + while (packet_data_->payload.size() == 0 && !info.send_even_if_empty) { + const size_t num_samples = rtc::CheckedDivExact( + static_cast<int>(encoder_->SampleRateHz() * kOutputPeriodMs), 1000); + + info = encoder_->Encode(rtp_timestamp_, generator_->Generate(num_samples), + &packet_data_->payload); + + rtp_timestamp_ += rtc::dchecked_cast<uint32_t>( + num_samples * encoder_->RtpTimestampRateHz() / + encoder_->SampleRateHz()); + ++num_blocks; + } + packet_data_->header.timestamp = info.encoded_timestamp; + packet_data_->header.payloadType = info.payload_type; + packet_data_->header.sequenceNumber = sequence_number_++; + packet_data_->time_ms = next_packet_time_ms_; + next_packet_time_ms_ += num_blocks * kOutputPeriodMs; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/encode_neteq_input.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/encode_neteq_input.h new file mode 100644 index 0000000000..caa9ac76f4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/encode_neteq_input.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_ENCODE_NETEQ_INPUT_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_ENCODE_NETEQ_INPUT_H_ + +#include <memory> + +#include "api/audio_codecs/audio_encoder.h" +#include "modules/audio_coding/neteq/tools/neteq_input.h" + +namespace webrtc { +namespace test { + +// This class provides a NetEqInput that takes audio from a generator object and +// encodes it using a given audio encoder. +class EncodeNetEqInput : public NetEqInput { + public: + // Generator class, to be provided to the EncodeNetEqInput constructor. + class Generator { + public: + virtual ~Generator() = default; + // Returns the next num_samples values from the signal generator. + virtual rtc::ArrayView<const int16_t> Generate(size_t num_samples) = 0; + }; + + // The source will end after the given input duration. + EncodeNetEqInput(std::unique_ptr<Generator> generator, + std::unique_ptr<AudioEncoder> encoder, + int64_t input_duration_ms); + ~EncodeNetEqInput() override; + + absl::optional<int64_t> NextPacketTime() const override; + + absl::optional<int64_t> NextOutputEventTime() const override; + + std::unique_ptr<PacketData> PopPacket() override; + + void AdvanceOutputEvent() override; + + bool ended() const override; + + absl::optional<RTPHeader> NextHeader() const override; + + private: + static constexpr int64_t kOutputPeriodMs = 10; + + void CreatePacket(); + + std::unique_ptr<Generator> generator_; + std::unique_ptr<AudioEncoder> encoder_; + std::unique_ptr<PacketData> packet_data_; + uint32_t rtp_timestamp_ = 0; + int16_t sequence_number_ = 0; + int64_t next_packet_time_ms_ = 0; + int64_t next_output_event_ms_ = 0; + const int64_t input_duration_ms_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_ENCODE_NETEQ_INPUT_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/fake_decode_from_file.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/fake_decode_from_file.cc new file mode 100644 index 0000000000..6c5e5ac2e4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/fake_decode_from_file.cc @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/fake_decode_from_file.h" + +#include "modules/rtp_rtcp/source/byte_io.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { +namespace test { + +namespace { + +class FakeEncodedFrame : public AudioDecoder::EncodedAudioFrame { + public: + FakeEncodedFrame(AudioDecoder* decoder, rtc::Buffer&& payload) + : decoder_(decoder), payload_(std::move(payload)) {} + + size_t Duration() const override { + const int ret = decoder_->PacketDuration(payload_.data(), payload_.size()); + return ret < 0 ? 0 : static_cast<size_t>(ret); + } + + absl::optional<DecodeResult> Decode( + rtc::ArrayView<int16_t> decoded) const override { + auto speech_type = AudioDecoder::kSpeech; + const int ret = decoder_->Decode( + payload_.data(), payload_.size(), decoder_->SampleRateHz(), + decoded.size() * sizeof(int16_t), decoded.data(), &speech_type); + return ret < 0 ? absl::nullopt + : absl::optional<DecodeResult>( + {static_cast<size_t>(ret), speech_type}); + } + + // This is to mimic OpusFrame. + bool IsDtxPacket() const override { + uint32_t original_payload_size_bytes = + ByteReader<uint32_t>::ReadLittleEndian(&payload_.data()[8]); + return original_payload_size_bytes <= 2; + } + + private: + AudioDecoder* const decoder_; + const rtc::Buffer payload_; +}; + +} // namespace + +std::vector<AudioDecoder::ParseResult> FakeDecodeFromFile::ParsePayload( + rtc::Buffer&& payload, + uint32_t timestamp) { + std::vector<ParseResult> results; + std::unique_ptr<EncodedAudioFrame> frame( + new FakeEncodedFrame(this, std::move(payload))); + results.emplace_back(timestamp, 0, std::move(frame)); + return results; +} + +int FakeDecodeFromFile::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { + RTC_DCHECK_EQ(sample_rate_hz, SampleRateHz()); + + const int samples_to_decode = PacketDuration(encoded, encoded_len); + const int total_samples_to_decode = samples_to_decode * (stereo_ ? 2 : 1); + + if (encoded_len == 0) { + // Decoder is asked to produce codec-internal comfort noise. + RTC_DCHECK(!encoded); // NetEq always sends nullptr in this case. + RTC_DCHECK(cng_mode_); + RTC_DCHECK_GT(total_samples_to_decode, 0); + std::fill_n(decoded, total_samples_to_decode, 0); + *speech_type = kComfortNoise; + return rtc::dchecked_cast<int>(total_samples_to_decode); + } + + RTC_CHECK_GE(encoded_len, 12); + uint32_t timestamp_to_decode = + ByteReader<uint32_t>::ReadLittleEndian(encoded); + + if (next_timestamp_from_input_ && + timestamp_to_decode != *next_timestamp_from_input_) { + // A gap in the timestamp sequence is detected. Skip the same number of + // samples from the file. + uint32_t jump = timestamp_to_decode - *next_timestamp_from_input_; + RTC_CHECK(input_->Seek(jump)); + } + + next_timestamp_from_input_ = timestamp_to_decode + samples_to_decode; + + uint32_t original_payload_size_bytes = + ByteReader<uint32_t>::ReadLittleEndian(&encoded[8]); + if (original_payload_size_bytes <= 2) { + // This is a comfort noise payload. + RTC_DCHECK_GT(total_samples_to_decode, 0); + std::fill_n(decoded, total_samples_to_decode, 0); + *speech_type = kComfortNoise; + cng_mode_ = true; + return rtc::dchecked_cast<int>(total_samples_to_decode); + } + + cng_mode_ = false; + RTC_CHECK(input_->Read(static_cast<size_t>(samples_to_decode), decoded)); + + if (stereo_) { + InputAudioFile::DuplicateInterleaved(decoded, samples_to_decode, 2, + decoded); + } + + *speech_type = kSpeech; + last_decoded_length_ = samples_to_decode; + return rtc::dchecked_cast<int>(total_samples_to_decode); +} + +int FakeDecodeFromFile::PacketDuration(const uint8_t* encoded, + size_t encoded_len) const { + const uint32_t original_payload_size_bytes = + encoded_len < 8 + sizeof(uint32_t) + ? 0 + : ByteReader<uint32_t>::ReadLittleEndian(&encoded[8]); + const uint32_t samples_to_decode = + encoded_len < 4 + sizeof(uint32_t) + ? 0 + : ByteReader<uint32_t>::ReadLittleEndian(&encoded[4]); + if (encoded_len == 0) { + // Decoder is asked to produce codec-internal comfort noise. + return rtc::CheckedDivExact(SampleRateHz(), 100); + } + bool is_dtx_payload = + original_payload_size_bytes <= 2 || samples_to_decode == 0; + bool has_error_duration = + samples_to_decode % rtc::CheckedDivExact(SampleRateHz(), 100) != 0; + if (is_dtx_payload || has_error_duration) { + if (last_decoded_length_ > 0) { + // Use length of last decoded packet. + return rtc::dchecked_cast<int>(last_decoded_length_); + } else { + // This is the first packet to decode, and we do not know the length of + // it. Set it to 10 ms. + return rtc::CheckedDivExact(SampleRateHz(), 100); + } + } + return samples_to_decode; +} + +void FakeDecodeFromFile::PrepareEncoded(uint32_t timestamp, + size_t samples, + size_t original_payload_size_bytes, + rtc::ArrayView<uint8_t> encoded) { + RTC_CHECK_GE(encoded.size(), 12); + ByteWriter<uint32_t>::WriteLittleEndian(&encoded[0], timestamp); + ByteWriter<uint32_t>::WriteLittleEndian(&encoded[4], + rtc::checked_cast<uint32_t>(samples)); + ByteWriter<uint32_t>::WriteLittleEndian( + &encoded[8], rtc::checked_cast<uint32_t>(original_payload_size_bytes)); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/fake_decode_from_file.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/fake_decode_from_file.h new file mode 100644 index 0000000000..7b53653998 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/fake_decode_from_file.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_FAKE_DECODE_FROM_FILE_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_FAKE_DECODE_FROM_FILE_H_ + +#include <memory> + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio_codecs/audio_decoder.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" + +namespace webrtc { +namespace test { +// Provides an AudioDecoder implementation that delivers audio data from a file. +// The "encoded" input should contain information about what RTP timestamp the +// encoding represents, and how many samples the decoder should produce for that +// encoding. A helper method PrepareEncoded is provided to prepare such +// encodings. If packets are missing, as determined from the timestamps, the +// file reading will skip forward to match the loss. +class FakeDecodeFromFile : public AudioDecoder { + public: + FakeDecodeFromFile(std::unique_ptr<InputAudioFile> input, + int sample_rate_hz, + bool stereo) + : input_(std::move(input)), + sample_rate_hz_(sample_rate_hz), + stereo_(stereo) {} + + ~FakeDecodeFromFile() = default; + + std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload, + uint32_t timestamp) override; + + void Reset() override {} + + int SampleRateHz() const override { return sample_rate_hz_; } + + size_t Channels() const override { return stereo_ ? 2 : 1; } + + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + + int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override; + + // Helper method. Writes `timestamp`, `samples` and + // `original_payload_size_bytes` to `encoded` in a format that the + // FakeDecodeFromFile decoder will understand. `encoded` must be at least 12 + // bytes long. + static void PrepareEncoded(uint32_t timestamp, + size_t samples, + size_t original_payload_size_bytes, + rtc::ArrayView<uint8_t> encoded); + + private: + std::unique_ptr<InputAudioFile> input_; + absl::optional<uint32_t> next_timestamp_from_input_; + const int sample_rate_hz_; + const bool stereo_; + size_t last_decoded_length_ = 0; + bool cng_mode_ = false; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_FAKE_DECODE_FROM_FILE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/initial_packet_inserter_neteq_input.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/initial_packet_inserter_neteq_input.cc new file mode 100644 index 0000000000..3c33aabf1c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/initial_packet_inserter_neteq_input.cc @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/initial_packet_inserter_neteq_input.h" + +#include <limits> +#include <memory> +#include <utility> + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +InitialPacketInserterNetEqInput::InitialPacketInserterNetEqInput( + std::unique_ptr<NetEqInput> source, + int number_of_initial_packets, + int sample_rate_hz) + : source_(std::move(source)), + packets_to_insert_(number_of_initial_packets), + sample_rate_hz_(sample_rate_hz) {} + +absl::optional<int64_t> InitialPacketInserterNetEqInput::NextPacketTime() + const { + return source_->NextPacketTime(); +} + +absl::optional<int64_t> InitialPacketInserterNetEqInput::NextOutputEventTime() + const { + return source_->NextOutputEventTime(); +} + +std::unique_ptr<InitialPacketInserterNetEqInput::PacketData> +InitialPacketInserterNetEqInput::PopPacket() { + if (!first_packet_) { + first_packet_ = source_->PopPacket(); + if (!first_packet_) { + // The source has no packets, so we should not insert any dummy packets. + packets_to_insert_ = 0; + } + } + if (packets_to_insert_ > 0) { + RTC_CHECK(first_packet_); + auto dummy_packet = std::unique_ptr<PacketData>(new PacketData()); + dummy_packet->header = first_packet_->header; + dummy_packet->payload = rtc::Buffer(first_packet_->payload.data(), + first_packet_->payload.size()); + dummy_packet->time_ms = first_packet_->time_ms; + dummy_packet->header.sequenceNumber -= packets_to_insert_; + // This assumes 20ms per packet. + dummy_packet->header.timestamp -= + 20 * sample_rate_hz_ * packets_to_insert_ / 1000; + packets_to_insert_--; + return dummy_packet; + } + return source_->PopPacket(); +} + +void InitialPacketInserterNetEqInput::AdvanceOutputEvent() { + source_->AdvanceOutputEvent(); +} + +bool InitialPacketInserterNetEqInput::ended() const { + return source_->ended(); +} + +absl::optional<RTPHeader> InitialPacketInserterNetEqInput::NextHeader() const { + return source_->NextHeader(); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/initial_packet_inserter_neteq_input.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/initial_packet_inserter_neteq_input.h new file mode 100644 index 0000000000..bd20a7aecf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/initial_packet_inserter_neteq_input.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_INITIAL_PACKET_INSERTER_NETEQ_INPUT_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_INITIAL_PACKET_INSERTER_NETEQ_INPUT_H_ + +#include <map> +#include <memory> +#include <string> + +#include "modules/audio_coding/neteq/tools/neteq_input.h" + +namespace webrtc { +namespace test { + +// Wrapper class that can insert a number of packets at the start of the +// simulation. +class InitialPacketInserterNetEqInput final : public NetEqInput { + public: + InitialPacketInserterNetEqInput(std::unique_ptr<NetEqInput> source, + int number_of_initial_packets, + int sample_rate_hz); + absl::optional<int64_t> NextPacketTime() const override; + absl::optional<int64_t> NextOutputEventTime() const override; + std::unique_ptr<PacketData> PopPacket() override; + void AdvanceOutputEvent() override; + bool ended() const override; + absl::optional<RTPHeader> NextHeader() const override; + + private: + const std::unique_ptr<NetEqInput> source_; + int packets_to_insert_; + const int sample_rate_hz_; + std::unique_ptr<PacketData> first_packet_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_INITIAL_PACKET_INSERTER_NETEQ_INPUT_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/input_audio_file.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/input_audio_file.cc new file mode 100644 index 0000000000..b077dbff21 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/input_audio_file.cc @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/input_audio_file.h" + +#include "absl/strings/string_view.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +InputAudioFile::InputAudioFile(absl::string_view file_name, bool loop_at_end) + : loop_at_end_(loop_at_end) { + fp_ = fopen(std::string(file_name).c_str(), "rb"); + RTC_DCHECK(fp_) << file_name << " could not be opened."; +} + +InputAudioFile::~InputAudioFile() { + RTC_DCHECK(fp_); + fclose(fp_); +} + +bool InputAudioFile::Read(size_t samples, int16_t* destination) { + if (!fp_) { + return false; + } + size_t samples_read = fread(destination, sizeof(int16_t), samples, fp_); + if (samples_read < samples) { + if (!loop_at_end_) { + return false; + } + // Rewind and read the missing samples. + rewind(fp_); + size_t missing_samples = samples - samples_read; + if (fread(destination + samples_read, sizeof(int16_t), missing_samples, + fp_) < missing_samples) { + // Could not read enough even after rewinding the file. + return false; + } + } + return true; +} + +bool InputAudioFile::Seek(int samples) { + if (!fp_) { + return false; + } + // Find file boundaries. + const long current_pos = ftell(fp_); + RTC_CHECK_NE(EOF, current_pos) + << "Error returned when getting file position."; + RTC_CHECK_EQ(0, fseek(fp_, 0, SEEK_END)); // Move to end of file. + const long file_size = ftell(fp_); + RTC_CHECK_NE(EOF, file_size) << "Error returned when getting file position."; + // Find new position. + long new_pos = current_pos + sizeof(int16_t) * samples; // Samples to bytes. + if (loop_at_end_) { + new_pos = new_pos % file_size; // Wrap around the end of the file. + if (new_pos < 0) { + // For negative values of new_pos, newpos % file_size will also be + // negative. To get the correct result it's needed to add file_size. + new_pos += file_size; + } + } else { + new_pos = new_pos > file_size ? file_size : new_pos; // Don't loop. + } + RTC_CHECK_GE(new_pos, 0) + << "Trying to move to before the beginning of the file"; + // Move to new position relative to the beginning of the file. + RTC_CHECK_EQ(0, fseek(fp_, new_pos, SEEK_SET)); + return true; +} + +void InputAudioFile::DuplicateInterleaved(const int16_t* source, + size_t samples, + size_t channels, + int16_t* destination) { + // Start from the end of `source` and `destination`, and work towards the + // beginning. This is to allow in-place interleaving of the same array (i.e., + // `source` and `destination` are the same array). + for (int i = static_cast<int>(samples - 1); i >= 0; --i) { + for (int j = static_cast<int>(channels - 1); j >= 0; --j) { + destination[i * channels + j] = source[i]; + } + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/input_audio_file.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/input_audio_file.h new file mode 100644 index 0000000000..f538b295a3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/input_audio_file.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_INPUT_AUDIO_FILE_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_INPUT_AUDIO_FILE_H_ + +#include <stdio.h> + +#include <string> + +#include "absl/strings/string_view.h" + +namespace webrtc { +namespace test { + +// Class for handling a looping input audio file. +class InputAudioFile { + public: + explicit InputAudioFile(absl::string_view file_name, bool loop_at_end = true); + + virtual ~InputAudioFile(); + + InputAudioFile(const InputAudioFile&) = delete; + InputAudioFile& operator=(const InputAudioFile&) = delete; + + // Reads `samples` elements from source file to `destination`. Returns true + // if the read was successful, otherwise false. If the file end is reached, + // the file is rewound and reading continues from the beginning. + // The output `destination` must have the capacity to hold `samples` elements. + virtual bool Read(size_t samples, int16_t* destination); + + // Fast-forwards (`samples` > 0) or -backwards (`samples` < 0) the file by the + // indicated number of samples. Just like Read(), Seek() starts over at the + // beginning of the file if the end is reached. However, seeking backwards + // past the beginning of the file is not possible. + virtual bool Seek(int samples); + + // Creates a multi-channel signal from a mono signal. Each sample is repeated + // `channels` times to create an interleaved multi-channel signal where all + // channels are identical. The output `destination` must have the capacity to + // hold samples * channels elements. Note that `source` and `destination` can + // be the same array (i.e., point to the same address). + static void DuplicateInterleaved(const int16_t* source, + size_t samples, + size_t channels, + int16_t* destination); + + private: + FILE* fp_; + const bool loop_at_end_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_INPUT_AUDIO_FILE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/input_audio_file_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/input_audio_file_unittest.cc new file mode 100644 index 0000000000..52f7ea82a0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/input_audio_file_unittest.cc @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for test InputAudioFile class. + +#include "modules/audio_coding/neteq/tools/input_audio_file.h" + +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { + +TEST(TestInputAudioFile, DuplicateInterleaveSeparateSrcDst) { + static const size_t kSamples = 10; + static const size_t kChannels = 2; + int16_t input[kSamples]; + for (size_t i = 0; i < kSamples; ++i) { + input[i] = rtc::checked_cast<int16_t>(i); + } + int16_t output[kSamples * kChannels]; + InputAudioFile::DuplicateInterleaved(input, kSamples, kChannels, output); + + // Verify output + int16_t* output_ptr = output; + for (size_t i = 0; i < kSamples; ++i) { + for (size_t j = 0; j < kChannels; ++j) { + EXPECT_EQ(static_cast<int16_t>(i), *output_ptr++); + } + } +} + +TEST(TestInputAudioFile, DuplicateInterleaveSameSrcDst) { + static const size_t kSamples = 10; + static const size_t kChannels = 5; + int16_t input[kSamples * kChannels]; + for (size_t i = 0; i < kSamples; ++i) { + input[i] = rtc::checked_cast<int16_t>(i); + } + InputAudioFile::DuplicateInterleaved(input, kSamples, kChannels, input); + + // Verify output + int16_t* output_ptr = input; + for (size_t i = 0; i < kSamples; ++i) { + for (size_t j = 0; j < kChannels; ++j) { + EXPECT_EQ(static_cast<int16_t>(i), *output_ptr++); + } + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_delay_analyzer.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_delay_analyzer.cc new file mode 100644 index 0000000000..91c3a1d96b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_delay_analyzer.cc @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/neteq_delay_analyzer.h" + +#include <algorithm> +#include <fstream> +#include <ios> +#include <iterator> +#include <limits> +#include <utility> + +#include "absl/strings/string_view.h" +#include "modules/include/module_common_types_public.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { +namespace { +constexpr char kArrivalDelayX[] = "arrival_delay_x"; +constexpr char kArrivalDelayY[] = "arrival_delay_y"; +constexpr char kTargetDelayX[] = "target_delay_x"; +constexpr char kTargetDelayY[] = "target_delay_y"; +constexpr char kPlayoutDelayX[] = "playout_delay_x"; +constexpr char kPlayoutDelayY[] = "playout_delay_y"; + +// Helper function for NetEqDelayAnalyzer::CreateGraphs. Returns the +// interpolated value of a function at the point x. Vector x_vec contains the +// sample points, and y_vec contains the function values at these points. The +// return value is a linear interpolation between y_vec values. +double LinearInterpolate(double x, + const std::vector<int64_t>& x_vec, + const std::vector<int64_t>& y_vec) { + // Find first element which is larger than x. + auto it = std::upper_bound(x_vec.begin(), x_vec.end(), x); + if (it == x_vec.end()) { + --it; + } + const size_t upper_ix = it - x_vec.begin(); + + size_t lower_ix; + if (upper_ix == 0 || x_vec[upper_ix] <= x) { + lower_ix = upper_ix; + } else { + lower_ix = upper_ix - 1; + } + double y; + if (lower_ix == upper_ix) { + y = y_vec[lower_ix]; + } else { + RTC_DCHECK_NE(x_vec[lower_ix], x_vec[upper_ix]); + y = (x - x_vec[lower_ix]) * (y_vec[upper_ix] - y_vec[lower_ix]) / + (x_vec[upper_ix] - x_vec[lower_ix]) + + y_vec[lower_ix]; + } + return y; +} + +void PrintDelays(const NetEqDelayAnalyzer::Delays& delays, + int64_t ref_time_ms, + absl::string_view var_name_x, + absl::string_view var_name_y, + std::ofstream& output, + absl::string_view terminator = "") { + output << var_name_x << " = [ "; + for (const std::pair<int64_t, float>& delay : delays) { + output << (delay.first - ref_time_ms) / 1000.f << ", "; + } + output << "]" << terminator << std::endl; + + output << var_name_y << " = [ "; + for (const std::pair<int64_t, float>& delay : delays) { + output << delay.second << ", "; + } + output << "]" << terminator << std::endl; +} + +} // namespace + +void NetEqDelayAnalyzer::AfterInsertPacket( + const test::NetEqInput::PacketData& packet, + NetEq* neteq) { + data_.insert( + std::make_pair(packet.header.timestamp, TimingData(packet.time_ms))); + ssrcs_.insert(packet.header.ssrc); + payload_types_.insert(packet.header.payloadType); +} + +void NetEqDelayAnalyzer::BeforeGetAudio(NetEq* neteq) { + last_sync_buffer_ms_ = neteq->SyncBufferSizeMs(); +} + +void NetEqDelayAnalyzer::AfterGetAudio(int64_t time_now_ms, + const AudioFrame& audio_frame, + bool /*muted*/, + NetEq* neteq) { + get_audio_time_ms_.push_back(time_now_ms); + for (const RtpPacketInfo& info : audio_frame.packet_infos_) { + auto it = data_.find(info.rtp_timestamp()); + if (it == data_.end()) { + // This is a packet that was split out from another packet. Skip it. + continue; + } + auto& it_timing = it->second; + RTC_CHECK(!it_timing.decode_get_audio_count) + << "Decode time already written"; + it_timing.decode_get_audio_count = get_audio_count_; + RTC_CHECK(!it_timing.sync_delay_ms) << "Decode time already written"; + it_timing.sync_delay_ms = last_sync_buffer_ms_; + it_timing.target_delay_ms = neteq->TargetDelayMs(); + it_timing.current_delay_ms = neteq->FilteredCurrentDelayMs(); + } + last_sample_rate_hz_ = audio_frame.sample_rate_hz_; + ++get_audio_count_; +} + +void NetEqDelayAnalyzer::CreateGraphs(Delays* arrival_delay_ms, + Delays* corrected_arrival_delay_ms, + Delays* playout_delay_ms, + Delays* target_delay_ms) const { + if (get_audio_time_ms_.empty()) { + return; + } + // Create nominal_get_audio_time_ms, a vector starting at + // get_audio_time_ms_[0] and increasing by 10 for each element. + std::vector<int64_t> nominal_get_audio_time_ms(get_audio_time_ms_.size()); + nominal_get_audio_time_ms[0] = get_audio_time_ms_[0]; + std::transform( + nominal_get_audio_time_ms.begin(), nominal_get_audio_time_ms.end() - 1, + nominal_get_audio_time_ms.begin() + 1, [](int64_t& x) { return x + 10; }); + RTC_DCHECK( + std::is_sorted(get_audio_time_ms_.begin(), get_audio_time_ms_.end())); + + std::vector<double> rtp_timestamps_ms; + double offset = std::numeric_limits<double>::max(); + TimestampUnwrapper unwrapper; + // This loop traverses data_ and populates rtp_timestamps_ms as well as + // calculates the base offset. + for (auto& d : data_) { + rtp_timestamps_ms.push_back( + static_cast<double>(unwrapper.Unwrap(d.first)) / + rtc::CheckedDivExact(last_sample_rate_hz_, 1000)); + offset = + std::min(offset, d.second.arrival_time_ms - rtp_timestamps_ms.back()); + } + + // This loop traverses the data again and populates the graph vectors. The + // reason to have two loops and traverse twice is that the offset cannot be + // known until the first traversal is done. Meanwhile, the final offset must + // be known already at the start of this second loop. + size_t i = 0; + for (const auto& data : data_) { + const double offset_send_time_ms = rtp_timestamps_ms[i++] + offset; + const auto& timing = data.second; + corrected_arrival_delay_ms->push_back(std::make_pair( + timing.arrival_time_ms, + LinearInterpolate(timing.arrival_time_ms, get_audio_time_ms_, + nominal_get_audio_time_ms) - + offset_send_time_ms)); + arrival_delay_ms->push_back(std::make_pair( + timing.arrival_time_ms, timing.arrival_time_ms - offset_send_time_ms)); + + if (timing.decode_get_audio_count) { + // This packet was decoded. + RTC_DCHECK(timing.sync_delay_ms); + const int64_t get_audio_time = + *timing.decode_get_audio_count * 10 + get_audio_time_ms_[0]; + const float playout_ms = + get_audio_time + *timing.sync_delay_ms - offset_send_time_ms; + playout_delay_ms->push_back(std::make_pair(get_audio_time, playout_ms)); + RTC_DCHECK(timing.target_delay_ms); + RTC_DCHECK(timing.current_delay_ms); + const float target = + playout_ms - *timing.current_delay_ms + *timing.target_delay_ms; + target_delay_ms->push_back(std::make_pair(get_audio_time, target)); + } + } +} + +void NetEqDelayAnalyzer::CreateMatlabScript( + absl::string_view script_name) const { + Delays arrival_delay_ms; + Delays corrected_arrival_delay_ms; + Delays playout_delay_ms; + Delays target_delay_ms; + CreateGraphs(&arrival_delay_ms, &corrected_arrival_delay_ms, + &playout_delay_ms, &target_delay_ms); + + // Maybe better to find the actually smallest timestamp, to surely avoid + // x-axis starting from negative. + const int64_t ref_time_ms = arrival_delay_ms.front().first; + + // Create an output file stream to Matlab script file. + std::ofstream output(std::string{script_name}); + + PrintDelays(corrected_arrival_delay_ms, ref_time_ms, kArrivalDelayX, + kArrivalDelayY, output, ";"); + + // PrintDelays(corrected_arrival_delay_x, kCorrectedArrivalDelayX, + // kCorrectedArrivalDelayY, output); + + PrintDelays(playout_delay_ms, ref_time_ms, kPlayoutDelayX, kPlayoutDelayY, + output, ";"); + + PrintDelays(target_delay_ms, ref_time_ms, kTargetDelayX, kTargetDelayY, + output, ";"); + + output << "h=plot(" << kArrivalDelayX << ", " << kArrivalDelayY << ", " + << kTargetDelayX << ", " << kTargetDelayY << ", 'g.', " + << kPlayoutDelayX << ", " << kPlayoutDelayY << ");" << std::endl; + output << "set(h(1),'color',0.75*[1 1 1]);" << std::endl; + output << "set(h(2),'markersize',6);" << std::endl; + output << "set(h(3),'linew',1.5);" << std::endl; + output << "ax1=axis;" << std::endl; + output << "axis tight" << std::endl; + output << "ax2=axis;" << std::endl; + output << "axis([ax2(1:3) ax1(4)])" << std::endl; + output << "xlabel('time [s]');" << std::endl; + output << "ylabel('relative delay [ms]');" << std::endl; + if (!ssrcs_.empty()) { + auto ssrc_it = ssrcs_.cbegin(); + output << "title('SSRC: 0x" << std::hex << static_cast<int64_t>(*ssrc_it++); + while (ssrc_it != ssrcs_.end()) { + output << ", 0x" << std::hex << static_cast<int64_t>(*ssrc_it++); + } + output << std::dec; + auto pt_it = payload_types_.cbegin(); + output << "; Payload Types: " << *pt_it++; + while (pt_it != payload_types_.end()) { + output << ", " << *pt_it++; + } + output << "');" << std::endl; + } +} + +void NetEqDelayAnalyzer::CreatePythonScript( + absl::string_view script_name) const { + Delays arrival_delay_ms; + Delays corrected_arrival_delay_ms; + Delays playout_delay_ms; + Delays target_delay_ms; + CreateGraphs(&arrival_delay_ms, &corrected_arrival_delay_ms, + &playout_delay_ms, &target_delay_ms); + + // Maybe better to find the actually smallest timestamp, to surely avoid + // x-axis starting from negative. + const int64_t ref_time_ms = arrival_delay_ms.front().first; + + // Create an output file stream to the python script file. + std::ofstream output(std::string{script_name}); + + // Necessary includes + output << "import numpy as np" << std::endl; + output << "import matplotlib.pyplot as plt" << std::endl; + + PrintDelays(corrected_arrival_delay_ms, ref_time_ms, kArrivalDelayX, + kArrivalDelayY, output); + + // PrintDelays(corrected_arrival_delay_x, kCorrectedArrivalDelayX, + // kCorrectedArrivalDelayY, output); + + PrintDelays(playout_delay_ms, ref_time_ms, kPlayoutDelayX, kPlayoutDelayY, + output); + + PrintDelays(target_delay_ms, ref_time_ms, kTargetDelayX, kTargetDelayY, + output); + + output << "if __name__ == '__main__':" << std::endl; + output << " h=plt.plot(" << kArrivalDelayX << ", " << kArrivalDelayY << ", " + << kTargetDelayX << ", " << kTargetDelayY << ", 'g.', " + << kPlayoutDelayX << ", " << kPlayoutDelayY << ")" << std::endl; + output << " plt.setp(h[0],'color',[.75, .75, .75])" << std::endl; + output << " plt.setp(h[1],'markersize',6)" << std::endl; + output << " plt.setp(h[2],'linewidth',1.5)" << std::endl; + output << " plt.axis('tight')" << std::endl; + output << " plt.xlabel('time [s]')" << std::endl; + output << " plt.ylabel('relative delay [ms]')" << std::endl; + if (!ssrcs_.empty()) { + auto ssrc_it = ssrcs_.cbegin(); + output << " plt.title('SSRC: 0x" << std::hex + << static_cast<int64_t>(*ssrc_it++); + while (ssrc_it != ssrcs_.end()) { + output << ", 0x" << std::hex << static_cast<int64_t>(*ssrc_it++); + } + output << std::dec; + auto pt_it = payload_types_.cbegin(); + output << "; Payload Types: " << *pt_it++; + while (pt_it != payload_types_.end()) { + output << ", " << *pt_it++; + } + output << "')" << std::endl; + } + output << " plt.show()" << std::endl; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_delay_analyzer.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_delay_analyzer.h new file mode 100644 index 0000000000..ffcba5843f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_delay_analyzer.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_DELAY_ANALYZER_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_DELAY_ANALYZER_H_ + +#include <map> +#include <set> +#include <string> +#include <vector> + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "modules/audio_coding/neteq/tools/neteq_input.h" +#include "modules/audio_coding/neteq/tools/neteq_test.h" + +namespace webrtc { +namespace test { + +class NetEqDelayAnalyzer : public test::NetEqPostInsertPacket, + public test::NetEqGetAudioCallback { + public: + void AfterInsertPacket(const test::NetEqInput::PacketData& packet, + NetEq* neteq) override; + + void BeforeGetAudio(NetEq* neteq) override; + + void AfterGetAudio(int64_t time_now_ms, + const AudioFrame& audio_frame, + bool muted, + NetEq* neteq) override; + + using Delays = std::vector<std::pair<int64_t, float>>; + void CreateGraphs(Delays* arrival_delay_ms, + Delays* corrected_arrival_delay_ms, + Delays* playout_delay_ms, + Delays* target_delay_ms) const; + + // Creates a matlab script with file name script_name. When executed in + // Matlab, the script will generate graphs with the same timing information + // as provided by CreateGraphs. + void CreateMatlabScript(absl::string_view script_name) const; + + // Creates a python script with file name `script_name`. When executed in + // Python, the script will generate graphs with the same timing information + // as provided by CreateGraphs. + void CreatePythonScript(absl::string_view script_name) const; + + private: + struct TimingData { + explicit TimingData(int64_t at) : arrival_time_ms(at) {} + int64_t arrival_time_ms; + absl::optional<int64_t> decode_get_audio_count; + absl::optional<int64_t> sync_delay_ms; + absl::optional<int> target_delay_ms; + absl::optional<int> current_delay_ms; + }; + std::map<uint32_t, TimingData> data_; + std::vector<int64_t> get_audio_time_ms_; + size_t get_audio_count_ = 0; + size_t last_sync_buffer_ms_ = 0; + int last_sample_rate_hz_ = 0; + std::set<uint32_t> ssrcs_; + std::set<int> payload_types_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_DELAY_ANALYZER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_event_log_input.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_event_log_input.cc new file mode 100644 index 0000000000..0c1f27799a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_event_log_input.cc @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/neteq_event_log_input.h" + +#include <limits> +#include <memory> + +#include "absl/strings/string_view.h" +#include "modules/audio_coding/neteq/tools/rtc_event_log_source.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +NetEqEventLogInput* NetEqEventLogInput::CreateFromFile( + absl::string_view file_name, + absl::optional<uint32_t> ssrc_filter) { + auto event_log_src = + RtcEventLogSource::CreateFromFile(file_name, ssrc_filter); + if (!event_log_src) { + return nullptr; + } + return new NetEqEventLogInput(std::move(event_log_src)); +} + +NetEqEventLogInput* NetEqEventLogInput::CreateFromString( + absl::string_view file_contents, + absl::optional<uint32_t> ssrc_filter) { + auto event_log_src = + RtcEventLogSource::CreateFromString(file_contents, ssrc_filter); + if (!event_log_src) { + return nullptr; + } + return new NetEqEventLogInput(std::move(event_log_src)); +} + +absl::optional<int64_t> NetEqEventLogInput::NextOutputEventTime() const { + return next_output_event_ms_; +} + +void NetEqEventLogInput::AdvanceOutputEvent() { + next_output_event_ms_ = source_->NextAudioOutputEventMs(); + if (*next_output_event_ms_ == std::numeric_limits<int64_t>::max()) { + next_output_event_ms_ = absl::nullopt; + } +} + +PacketSource* NetEqEventLogInput::source() { + return source_.get(); +} + +NetEqEventLogInput::NetEqEventLogInput( + std::unique_ptr<RtcEventLogSource> source) + : source_(std::move(source)) { + LoadNextPacket(); + AdvanceOutputEvent(); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_event_log_input.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_event_log_input.h new file mode 100644 index 0000000000..c947ee1fc0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_event_log_input.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_EVENT_LOG_INPUT_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_EVENT_LOG_INPUT_H_ + +#include <map> +#include <memory> +#include <string> + +#include "absl/strings/string_view.h" +#include "modules/audio_coding/neteq/tools/neteq_packet_source_input.h" +#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" + +namespace webrtc { +namespace test { + +class RtcEventLogSource; + +// Implementation of NetEqPacketSourceInput to be used with an +// RtcEventLogSource. +class NetEqEventLogInput final : public NetEqPacketSourceInput { + public: + static NetEqEventLogInput* CreateFromFile( + absl::string_view file_name, + absl::optional<uint32_t> ssrc_filter); + static NetEqEventLogInput* CreateFromString( + absl::string_view file_contents, + absl::optional<uint32_t> ssrc_filter); + + absl::optional<int64_t> NextOutputEventTime() const override; + void AdvanceOutputEvent() override; + + protected: + PacketSource* source() override; + + private: + NetEqEventLogInput(std::unique_ptr<RtcEventLogSource> source); + std::unique_ptr<RtcEventLogSource> source_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_EVENT_LOG_INPUT_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_input.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_input.cc new file mode 100644 index 0000000000..de416348f1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_input.cc @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/neteq_input.h" + +#include "rtc_base/strings/string_builder.h" + +namespace webrtc { +namespace test { + +NetEqInput::PacketData::PacketData() = default; +NetEqInput::PacketData::~PacketData() = default; + +std::string NetEqInput::PacketData::ToString() const { + rtc::StringBuilder ss; + ss << "{" + "time_ms: " + << static_cast<int64_t>(time_ms) + << ", " + "header: {" + "pt: " + << static_cast<int>(header.payloadType) + << ", " + "sn: " + << header.sequenceNumber + << ", " + "ts: " + << header.timestamp + << ", " + "ssrc: " + << header.ssrc + << "}, " + "payload bytes: " + << payload.size() << "}"; + return ss.Release(); +} + +TimeLimitedNetEqInput::TimeLimitedNetEqInput(std::unique_ptr<NetEqInput> input, + int64_t duration_ms) + : input_(std::move(input)), + start_time_ms_(input_->NextEventTime()), + duration_ms_(duration_ms) {} + +TimeLimitedNetEqInput::~TimeLimitedNetEqInput() = default; + +absl::optional<int64_t> TimeLimitedNetEqInput::NextPacketTime() const { + return ended_ ? absl::nullopt : input_->NextPacketTime(); +} + +absl::optional<int64_t> TimeLimitedNetEqInput::NextOutputEventTime() const { + return ended_ ? absl::nullopt : input_->NextOutputEventTime(); +} + +std::unique_ptr<NetEqInput::PacketData> TimeLimitedNetEqInput::PopPacket() { + if (ended_) { + return std::unique_ptr<PacketData>(); + } + auto packet = input_->PopPacket(); + MaybeSetEnded(); + return packet; +} + +void TimeLimitedNetEqInput::AdvanceOutputEvent() { + if (!ended_) { + input_->AdvanceOutputEvent(); + MaybeSetEnded(); + } +} + +bool TimeLimitedNetEqInput::ended() const { + return ended_ || input_->ended(); +} + +absl::optional<RTPHeader> TimeLimitedNetEqInput::NextHeader() const { + return ended_ ? absl::nullopt : input_->NextHeader(); +} + +void TimeLimitedNetEqInput::MaybeSetEnded() { + if (NextEventTime() && start_time_ms_ && + *NextEventTime() - *start_time_ms_ > duration_ms_) { + ended_ = true; + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_input.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_input.h new file mode 100644 index 0000000000..3a66264043 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_input.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_INPUT_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_INPUT_H_ + +#include <algorithm> +#include <memory> +#include <string> + +#include "absl/types/optional.h" +#include "modules/audio_coding/neteq/tools/packet.h" +#include "modules/audio_coding/neteq/tools/packet_source.h" +#include "rtc_base/buffer.h" + +namespace webrtc { +namespace test { + +// Interface class for input to the NetEqTest class. +class NetEqInput { + public: + struct PacketData { + PacketData(); + ~PacketData(); + std::string ToString() const; + + RTPHeader header; + rtc::Buffer payload; + int64_t time_ms; + }; + + virtual ~NetEqInput() = default; + + // Returns at what time (in ms) NetEq::InsertPacket should be called next, or + // empty if the source is out of packets. + virtual absl::optional<int64_t> NextPacketTime() const = 0; + + // Returns at what time (in ms) NetEq::GetAudio should be called next, or + // empty if no more output events are available. + virtual absl::optional<int64_t> NextOutputEventTime() const = 0; + + // Returns the time (in ms) for the next event from either NextPacketTime() + // or NextOutputEventTime(), or empty if both are out of events. + absl::optional<int64_t> NextEventTime() const { + const auto a = NextPacketTime(); + const auto b = NextOutputEventTime(); + // Return the minimum of non-empty `a` and `b`, or empty if both are empty. + if (a) { + return b ? std::min(*a, *b) : a; + } + return b ? b : absl::nullopt; + } + + // Returns the next packet to be inserted into NetEq. The packet following the + // returned one is pre-fetched in the NetEqInput object, such that future + // calls to NextPacketTime() or NextHeader() will return information from that + // packet. + virtual std::unique_ptr<PacketData> PopPacket() = 0; + + // Move to the next output event. This will make NextOutputEventTime() return + // a new value (potentially the same if several output events share the same + // time). + virtual void AdvanceOutputEvent() = 0; + + // Returns true if the source has come to an end. An implementation must + // eventually return true from this method, or the test will end up in an + // infinite loop. + virtual bool ended() const = 0; + + // Returns the RTP header for the next packet, i.e., the packet that will be + // delivered next by PopPacket(). + virtual absl::optional<RTPHeader> NextHeader() const = 0; +}; + +// Wrapper class to impose a time limit on a NetEqInput object, typically +// another time limit than what the object itself provides. For example, an +// input taken from a file can be cut shorter by wrapping it in this class. +class TimeLimitedNetEqInput : public NetEqInput { + public: + TimeLimitedNetEqInput(std::unique_ptr<NetEqInput> input, int64_t duration_ms); + ~TimeLimitedNetEqInput() override; + absl::optional<int64_t> NextPacketTime() const override; + absl::optional<int64_t> NextOutputEventTime() const override; + std::unique_ptr<PacketData> PopPacket() override; + void AdvanceOutputEvent() override; + bool ended() const override; + absl::optional<RTPHeader> NextHeader() const override; + + private: + void MaybeSetEnded(); + + std::unique_ptr<NetEqInput> input_; + const absl::optional<int64_t> start_time_ms_; + const int64_t duration_ms_; + bool ended_ = false; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_INPUT_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_packet_source_input.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_packet_source_input.cc new file mode 100644 index 0000000000..55a5653238 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_packet_source_input.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/neteq_packet_source_input.h" + +#include <algorithm> +#include <limits> + +#include "absl/strings/string_view.h" +#include "modules/audio_coding/neteq/tools/rtp_file_source.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +NetEqPacketSourceInput::NetEqPacketSourceInput() : next_output_event_ms_(0) {} + +absl::optional<int64_t> NetEqPacketSourceInput::NextPacketTime() const { + return packet_ + ? absl::optional<int64_t>(static_cast<int64_t>(packet_->time_ms())) + : absl::nullopt; +} + +absl::optional<RTPHeader> NetEqPacketSourceInput::NextHeader() const { + return packet_ ? absl::optional<RTPHeader>(packet_->header()) : absl::nullopt; +} + +void NetEqPacketSourceInput::LoadNextPacket() { + packet_ = source()->NextPacket(); +} + +std::unique_ptr<NetEqInput::PacketData> NetEqPacketSourceInput::PopPacket() { + if (!packet_) { + return std::unique_ptr<PacketData>(); + } + std::unique_ptr<PacketData> packet_data(new PacketData); + packet_data->header = packet_->header(); + if (packet_->payload_length_bytes() == 0 && + packet_->virtual_payload_length_bytes() > 0) { + // This is a header-only "dummy" packet. Set the payload to all zeros, with + // length according to the virtual length. + packet_data->payload.SetSize(packet_->virtual_payload_length_bytes()); + std::fill_n(packet_data->payload.data(), packet_data->payload.size(), 0); + } else { + packet_data->payload.SetData(packet_->payload(), + packet_->payload_length_bytes()); + } + packet_data->time_ms = packet_->time_ms(); + + LoadNextPacket(); + + return packet_data; +} + +NetEqRtpDumpInput::NetEqRtpDumpInput(absl::string_view file_name, + const RtpHeaderExtensionMap& hdr_ext_map, + absl::optional<uint32_t> ssrc_filter) + : source_(RtpFileSource::Create(file_name, ssrc_filter)) { + for (const auto& ext_pair : hdr_ext_map) { + source_->RegisterRtpHeaderExtension(ext_pair.second, ext_pair.first); + } + LoadNextPacket(); +} + +absl::optional<int64_t> NetEqRtpDumpInput::NextOutputEventTime() const { + return next_output_event_ms_; +} + +void NetEqRtpDumpInput::AdvanceOutputEvent() { + if (next_output_event_ms_) { + *next_output_event_ms_ += kOutputPeriodMs; + } + if (!NextPacketTime()) { + next_output_event_ms_ = absl::nullopt; + } +} + +PacketSource* NetEqRtpDumpInput::source() { + return source_.get(); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_packet_source_input.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_packet_source_input.h new file mode 100644 index 0000000000..407fa491b1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_packet_source_input.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_PACKET_SOURCE_INPUT_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_PACKET_SOURCE_INPUT_H_ + +#include <map> +#include <memory> +#include <string> + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "modules/audio_coding/neteq/tools/neteq_input.h" +#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" + +namespace webrtc { +namespace test { + +class RtpFileSource; + +// An adapter class to dress up a PacketSource object as a NetEqInput. +class NetEqPacketSourceInput : public NetEqInput { + public: + using RtpHeaderExtensionMap = std::map<int, webrtc::RTPExtensionType>; + + NetEqPacketSourceInput(); + absl::optional<int64_t> NextPacketTime() const override; + std::unique_ptr<PacketData> PopPacket() override; + absl::optional<RTPHeader> NextHeader() const override; + bool ended() const override { return !next_output_event_ms_; } + + protected: + virtual PacketSource* source() = 0; + void LoadNextPacket(); + + absl::optional<int64_t> next_output_event_ms_; + + private: + std::unique_ptr<Packet> packet_; +}; + +// Implementation of NetEqPacketSourceInput to be used with an RtpFileSource. +class NetEqRtpDumpInput final : public NetEqPacketSourceInput { + public: + NetEqRtpDumpInput(absl::string_view file_name, + const RtpHeaderExtensionMap& hdr_ext_map, + absl::optional<uint32_t> ssrc_filter); + + absl::optional<int64_t> NextOutputEventTime() const override; + void AdvanceOutputEvent() override; + + protected: + PacketSource* source() override; + + private: + static constexpr int64_t kOutputPeriodMs = 10; + + std::unique_ptr<RtpFileSource> source_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_PACKET_SOURCE_INPUT_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_performance_test.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_performance_test.cc new file mode 100644 index 0000000000..ccaa87b5e8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_performance_test.cc @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/neteq_performance_test.h" + +#include "api/audio/audio_frame.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/neteq/neteq.h" +#include "modules/audio_coding/codecs/pcm16b/pcm16b.h" +#include "modules/audio_coding/neteq/default_neteq_factory.h" +#include "modules/audio_coding/neteq/tools/audio_loop.h" +#include "modules/audio_coding/neteq/tools/rtp_generator.h" +#include "rtc_base/checks.h" +#include "system_wrappers/include/clock.h" +#include "test/testsupport/file_utils.h" + +using webrtc::NetEq; +using webrtc::test::AudioLoop; +using webrtc::test::RtpGenerator; + +namespace webrtc { +namespace test { + +int64_t NetEqPerformanceTest::Run(int runtime_ms, + int lossrate, + double drift_factor) { + const std::string kInputFileName = + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + const int kSampRateHz = 32000; + const std::string kDecoderName = "pcm16-swb32"; + const int kPayloadType = 95; + + // Initialize NetEq instance. + NetEq::Config config; + config.sample_rate_hz = kSampRateHz; + webrtc::Clock* clock = webrtc::Clock::GetRealTimeClock(); + auto audio_decoder_factory = CreateBuiltinAudioDecoderFactory(); + auto neteq = + DefaultNetEqFactory().CreateNetEq(config, audio_decoder_factory, clock); + // Register decoder in `neteq`. + if (!neteq->RegisterPayloadType(kPayloadType, + SdpAudioFormat("l16", kSampRateHz, 1))) + return -1; + + // Set up AudioLoop object. + AudioLoop audio_loop; + const size_t kMaxLoopLengthSamples = kSampRateHz * 10; // 10 second loop. + const size_t kInputBlockSizeSamples = 60 * kSampRateHz / 1000; // 60 ms. + if (!audio_loop.Init(kInputFileName, kMaxLoopLengthSamples, + kInputBlockSizeSamples)) + return -1; + + int32_t time_now_ms = 0; + + // Get first input packet. + RTPHeader rtp_header; + RtpGenerator rtp_gen(kSampRateHz / 1000); + // Start with positive drift first half of simulation. + rtp_gen.set_drift_factor(drift_factor); + bool drift_flipped = false; + int32_t packet_input_time_ms = + rtp_gen.GetRtpHeader(kPayloadType, kInputBlockSizeSamples, &rtp_header); + auto input_samples = audio_loop.GetNextBlock(); + if (input_samples.empty()) + exit(1); + uint8_t input_payload[kInputBlockSizeSamples * sizeof(int16_t)]; + size_t payload_len = WebRtcPcm16b_Encode(input_samples.data(), + input_samples.size(), input_payload); + RTC_CHECK_EQ(sizeof(input_payload), payload_len); + + // Main loop. + int64_t start_time_ms = clock->TimeInMilliseconds(); + AudioFrame out_frame; + while (time_now_ms < runtime_ms) { + while (packet_input_time_ms <= time_now_ms) { + // Drop every N packets, where N = FLAG_lossrate. + bool lost = false; + if (lossrate > 0) { + lost = ((rtp_header.sequenceNumber - 1) % lossrate) == 0; + } + if (!lost) { + // Insert packet. + int error = neteq->InsertPacket(rtp_header, input_payload); + if (error != NetEq::kOK) + return -1; + } + + // Get next packet. + packet_input_time_ms = rtp_gen.GetRtpHeader( + kPayloadType, kInputBlockSizeSamples, &rtp_header); + input_samples = audio_loop.GetNextBlock(); + if (input_samples.empty()) + return -1; + payload_len = WebRtcPcm16b_Encode(input_samples.data(), + input_samples.size(), input_payload); + RTC_DCHECK_EQ(payload_len, kInputBlockSizeSamples * sizeof(int16_t)); + } + + // Get output audio, but don't do anything with it. + bool muted; + int error = neteq->GetAudio(&out_frame, &muted); + RTC_CHECK(!muted); + if (error != NetEq::kOK) + return -1; + + RTC_DCHECK_EQ(out_frame.samples_per_channel_, (kSampRateHz * 10) / 1000); + + static const int kOutputBlockSizeMs = 10; + time_now_ms += kOutputBlockSizeMs; + if (time_now_ms >= runtime_ms / 2 && !drift_flipped) { + // Apply negative drift second half of simulation. + rtp_gen.set_drift_factor(-drift_factor); + drift_flipped = true; + } + } + int64_t end_time_ms = clock->TimeInMilliseconds(); + return end_time_ms - start_time_ms; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_performance_test.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_performance_test.h new file mode 100644 index 0000000000..b5b4d91577 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_performance_test.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_PERFORMANCE_TEST_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_PERFORMANCE_TEST_H_ + +#include <stdint.h> + +namespace webrtc { +namespace test { + +class NetEqPerformanceTest { + public: + // Runs a performance test with parameters as follows: + // `runtime_ms`: the simulation time, i.e., the duration of the audio data. + // `lossrate`: drop one out of `lossrate` packets, e.g., one out of 10. + // `drift_factor`: clock drift in [0, 1]. + // Returns the runtime in ms. + static int64_t Run(int runtime_ms, int lossrate, double drift_factor); +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_PERFORMANCE_TEST_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_quality_test.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_quality_test.cc new file mode 100644 index 0000000000..1fd82dfcdd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_quality_test.cc @@ -0,0 +1,475 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/neteq_quality_test.h" + +#include <stdio.h> + +#include <cmath> + +#include "absl/flags/flag.h" +#include "absl/strings/string_view.h" +#include "modules/audio_coding/neteq/default_neteq_factory.h" +#include "modules/audio_coding/neteq/tools/neteq_quality_test.h" +#include "modules/audio_coding/neteq/tools/output_audio_file.h" +#include "modules/audio_coding/neteq/tools/output_wav_file.h" +#include "modules/audio_coding/neteq/tools/resample_input_audio_file.h" +#include "rtc_base/checks.h" +#include "system_wrappers/include/clock.h" +#include "test/testsupport/file_utils.h" + +const std::string& DefaultInFilename() { + static const std::string path = + ::webrtc::test::ResourcePath("audio_coding/speech_mono_16kHz", "pcm"); + return path; +} + +const std::string& DefaultOutFilename() { + static const std::string path = + ::webrtc::test::OutputPath() + "neteq_quality_test_out.pcm"; + return path; +} + +ABSL_FLAG( + std::string, + in_filename, + DefaultInFilename(), + "Filename for input audio (specify sample rate with --input_sample_rate, " + "and channels with --channels)."); + +ABSL_FLAG(int, input_sample_rate, 16000, "Sample rate of input file in Hz."); + +ABSL_FLAG(int, channels, 1, "Number of channels in input audio."); + +ABSL_FLAG(std::string, + out_filename, + DefaultOutFilename(), + "Name of output audio file."); + +ABSL_FLAG( + int, + runtime_ms, + 10000, + "Simulated runtime (milliseconds). -1 will consume the complete file."); + +ABSL_FLAG(int, packet_loss_rate, 10, "Percentile of packet loss."); + +ABSL_FLAG(int, + random_loss_mode, + ::webrtc::test::kUniformLoss, + "Random loss mode: 0--no loss, 1--uniform loss, 2--Gilbert Elliot " + "loss, 3--fixed loss."); + +ABSL_FLAG(int, + burst_length, + 30, + "Burst length in milliseconds, only valid for Gilbert Elliot loss."); + +ABSL_FLAG(float, drift_factor, 0.0, "Time drift factor."); + +ABSL_FLAG(int, + preload_packets, + 1, + "Preload the buffer with this many packets."); + +ABSL_FLAG(std::string, + loss_events, + "", + "List of loss events time and duration separated by comma: " + "<first_event_time> <first_event_duration>, <second_event_time> " + "<second_event_duration>, ..."); + +namespace webrtc { +namespace test { + +namespace { + +std::unique_ptr<NetEq> CreateNetEq( + const NetEq::Config& config, + Clock* clock, + const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory) { + return DefaultNetEqFactory().CreateNetEq(config, decoder_factory, clock); +} + +} // namespace + +const uint8_t kPayloadType = 95; +const int kOutputSizeMs = 10; +const int kInitSeed = 0x12345678; +const int kPacketLossTimeUnitMs = 10; + +// Common validator for file names. +static bool ValidateFilename(absl::string_view value, bool is_output) { + if (!is_output) { + RTC_CHECK_NE(value.substr(value.find_last_of('.') + 1), "wav") + << "WAV file input is not supported"; + } + FILE* fid = is_output ? fopen(std::string(value).c_str(), "wb") + : fopen(std::string(value).c_str(), "rb"); + if (fid == nullptr) + return false; + fclose(fid); + return true; +} + +// ProbTrans00Solver() is to calculate the transition probability from no-loss +// state to itself in a modified Gilbert Elliot packet loss model. The result is +// to achieve the target packet loss rate `loss_rate`, when a packet is not +// lost only if all `units` drawings within the duration of the packet result in +// no-loss. +static double ProbTrans00Solver(int units, + double loss_rate, + double prob_trans_10) { + if (units == 1) + return prob_trans_10 / (1.0f - loss_rate) - prob_trans_10; + // 0 == prob_trans_00 ^ (units - 1) + (1 - loss_rate) / prob_trans_10 * + // prob_trans_00 - (1 - loss_rate) * (1 + 1 / prob_trans_10). + // There is a unique solution between 0.0 and 1.0, due to the monotonicity and + // an opposite sign at 0.0 and 1.0. + // For simplicity, we reformulate the equation as + // f(x) = x ^ (units - 1) + a x + b. + // Its derivative is + // f'(x) = (units - 1) x ^ (units - 2) + a. + // The derivative is strictly greater than 0 when x is between 0 and 1. + // We use Newton's method to solve the equation, iteration is + // x(k+1) = x(k) - f(x) / f'(x); + const double kPrecision = 0.001f; + const int kIterations = 100; + const double a = (1.0f - loss_rate) / prob_trans_10; + const double b = (loss_rate - 1.0f) * (1.0f + 1.0f / prob_trans_10); + double x = 0.0; // Starting point; + double f = b; + double f_p; + int iter = 0; + while ((f >= kPrecision || f <= -kPrecision) && iter < kIterations) { + f_p = (units - 1.0f) * std::pow(x, units - 2) + a; + x -= f / f_p; + if (x > 1.0f) { + x = 1.0f; + } else if (x < 0.0f) { + x = 0.0f; + } + f = std::pow(x, units - 1) + a * x + b; + iter++; + } + return x; +} + +NetEqQualityTest::NetEqQualityTest( + int block_duration_ms, + int in_sampling_khz, + int out_sampling_khz, + const SdpAudioFormat& format, + const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory) + : audio_format_(format), + channels_(absl::GetFlag(FLAGS_channels)), + decoded_time_ms_(0), + decodable_time_ms_(0), + drift_factor_(absl::GetFlag(FLAGS_drift_factor)), + packet_loss_rate_(absl::GetFlag(FLAGS_packet_loss_rate)), + block_duration_ms_(block_duration_ms), + in_sampling_khz_(in_sampling_khz), + out_sampling_khz_(out_sampling_khz), + in_size_samples_( + static_cast<size_t>(in_sampling_khz_ * block_duration_ms_)), + payload_size_bytes_(0), + max_payload_bytes_(0), + in_file_( + new ResampleInputAudioFile(absl::GetFlag(FLAGS_in_filename), + absl::GetFlag(FLAGS_input_sample_rate), + in_sampling_khz * 1000, + absl::GetFlag(FLAGS_runtime_ms) > 0)), + rtp_generator_( + new RtpGenerator(in_sampling_khz_, 0, 0, decodable_time_ms_)), + total_payload_size_bytes_(0) { + // Flag validation + RTC_CHECK(ValidateFilename(absl::GetFlag(FLAGS_in_filename), false)) + << "Invalid input filename."; + + RTC_CHECK(absl::GetFlag(FLAGS_input_sample_rate) == 8000 || + absl::GetFlag(FLAGS_input_sample_rate) == 16000 || + absl::GetFlag(FLAGS_input_sample_rate) == 32000 || + absl::GetFlag(FLAGS_input_sample_rate) == 48000) + << "Invalid sample rate should be 8000, 16000, 32000 or 48000 Hz."; + + RTC_CHECK_EQ(absl::GetFlag(FLAGS_channels), 1) + << "Invalid number of channels, current support only 1."; + + RTC_CHECK(ValidateFilename(absl::GetFlag(FLAGS_out_filename), true)) + << "Invalid output filename."; + + RTC_CHECK(absl::GetFlag(FLAGS_packet_loss_rate) >= 0 && + absl::GetFlag(FLAGS_packet_loss_rate) <= 100) + << "Invalid packet loss percentile, should be between 0 and 100."; + + RTC_CHECK(absl::GetFlag(FLAGS_random_loss_mode) >= 0 && + absl::GetFlag(FLAGS_random_loss_mode) < kLastLossMode) + << "Invalid random packet loss mode, should be between 0 and " + << kLastLossMode - 1 << "."; + + RTC_CHECK_GE(absl::GetFlag(FLAGS_burst_length), kPacketLossTimeUnitMs) + << "Invalid burst length, should be greater than or equal to " + << kPacketLossTimeUnitMs << " ms."; + + RTC_CHECK_GT(absl::GetFlag(FLAGS_drift_factor), -0.1) + << "Invalid drift factor, should be greater than -0.1."; + + RTC_CHECK_GE(absl::GetFlag(FLAGS_preload_packets), 0) + << "Invalid number of packets to preload; must be non-negative."; + + const std::string out_filename = absl::GetFlag(FLAGS_out_filename); + const std::string log_filename = out_filename + ".log"; + log_file_.open(log_filename.c_str(), std::ofstream::out); + RTC_CHECK(log_file_.is_open()); + + if (out_filename.size() >= 4 && + out_filename.substr(out_filename.size() - 4) == ".wav") { + // Open a wav file. + output_.reset( + new webrtc::test::OutputWavFile(out_filename, 1000 * out_sampling_khz)); + } else { + // Open a pcm file. + output_.reset(new webrtc::test::OutputAudioFile(out_filename)); + } + + NetEq::Config config; + config.sample_rate_hz = out_sampling_khz_ * 1000; + neteq_ = CreateNetEq(config, Clock::GetRealTimeClock(), decoder_factory); + max_payload_bytes_ = in_size_samples_ * channels_ * sizeof(int16_t); + in_data_.reset(new int16_t[in_size_samples_ * channels_]); +} + +NetEqQualityTest::~NetEqQualityTest() { + log_file_.close(); +} + +bool NoLoss::Lost(int now_ms) { + return false; +} + +UniformLoss::UniformLoss(double loss_rate) : loss_rate_(loss_rate) {} + +bool UniformLoss::Lost(int now_ms) { + int drop_this = rand(); + return (drop_this < loss_rate_ * RAND_MAX); +} + +GilbertElliotLoss::GilbertElliotLoss(double prob_trans_11, double prob_trans_01) + : prob_trans_11_(prob_trans_11), + prob_trans_01_(prob_trans_01), + lost_last_(false), + uniform_loss_model_(new UniformLoss(0)) {} + +GilbertElliotLoss::~GilbertElliotLoss() {} + +bool GilbertElliotLoss::Lost(int now_ms) { + // Simulate bursty channel (Gilbert model). + // (1st order) Markov chain model with memory of the previous/last + // packet state (lost or received). + if (lost_last_) { + // Previous packet was not received. + uniform_loss_model_->set_loss_rate(prob_trans_11_); + return lost_last_ = uniform_loss_model_->Lost(now_ms); + } else { + uniform_loss_model_->set_loss_rate(prob_trans_01_); + return lost_last_ = uniform_loss_model_->Lost(now_ms); + } +} + +FixedLossModel::FixedLossModel( + std::set<FixedLossEvent, FixedLossEventCmp> loss_events) + : loss_events_(loss_events) { + loss_events_it_ = loss_events_.begin(); +} + +FixedLossModel::~FixedLossModel() {} + +bool FixedLossModel::Lost(int now_ms) { + if (loss_events_it_ != loss_events_.end() && + now_ms > loss_events_it_->start_ms) { + if (now_ms <= loss_events_it_->start_ms + loss_events_it_->duration_ms) { + return true; + } else { + ++loss_events_it_; + return false; + } + } + return false; +} + +void NetEqQualityTest::SetUp() { + ASSERT_TRUE(neteq_->RegisterPayloadType(kPayloadType, audio_format_)); + rtp_generator_->set_drift_factor(drift_factor_); + + int units = block_duration_ms_ / kPacketLossTimeUnitMs; + switch (absl::GetFlag(FLAGS_random_loss_mode)) { + case kUniformLoss: { + // `unit_loss_rate` is the packet loss rate for each unit time interval + // (kPacketLossTimeUnitMs). Since a packet loss event is generated if any + // of |block_duration_ms_ / kPacketLossTimeUnitMs| unit time intervals of + // a full packet duration is drawn with a loss, `unit_loss_rate` fulfills + // (1 - unit_loss_rate) ^ (block_duration_ms_ / kPacketLossTimeUnitMs) == + // 1 - packet_loss_rate. + double unit_loss_rate = + (1.0 - std::pow(1.0 - 0.01 * packet_loss_rate_, 1.0 / units)); + loss_model_.reset(new UniformLoss(unit_loss_rate)); + break; + } + case kGilbertElliotLoss: { + // `FLAGS_burst_length` should be integer times of kPacketLossTimeUnitMs. + ASSERT_EQ(0, absl::GetFlag(FLAGS_burst_length) % kPacketLossTimeUnitMs); + + // We do not allow 100 percent packet loss in Gilbert Elliot model, which + // makes no sense. + ASSERT_GT(100, packet_loss_rate_); + + // To guarantee the overall packet loss rate, transition probabilities + // need to satisfy: + // pi_0 * (1 - prob_trans_01_) ^ units + + // pi_1 * prob_trans_10_ ^ (units - 1) == 1 - loss_rate + // pi_0 = prob_trans_10 / (prob_trans_10 + prob_trans_01_) + // is the stationary state probability of no-loss + // pi_1 = prob_trans_01_ / (prob_trans_10 + prob_trans_01_) + // is the stationary state probability of loss + // After a derivation prob_trans_00 should satisfy: + // prob_trans_00 ^ (units - 1) = (loss_rate - 1) / prob_trans_10 * + // prob_trans_00 + (1 - loss_rate) * (1 + 1 / prob_trans_10). + double loss_rate = 0.01f * packet_loss_rate_; + double prob_trans_10 = + 1.0f * kPacketLossTimeUnitMs / absl::GetFlag(FLAGS_burst_length); + double prob_trans_00 = ProbTrans00Solver(units, loss_rate, prob_trans_10); + loss_model_.reset( + new GilbertElliotLoss(1.0f - prob_trans_10, 1.0f - prob_trans_00)); + break; + } + case kFixedLoss: { + std::istringstream loss_events_stream(absl::GetFlag(FLAGS_loss_events)); + std::string loss_event_string; + std::set<FixedLossEvent, FixedLossEventCmp> loss_events; + while (std::getline(loss_events_stream, loss_event_string, ',')) { + std::vector<int> loss_event_params; + std::istringstream loss_event_params_stream(loss_event_string); + std::copy(std::istream_iterator<int>(loss_event_params_stream), + std::istream_iterator<int>(), + std::back_inserter(loss_event_params)); + RTC_CHECK_EQ(loss_event_params.size(), 2); + auto result = loss_events.insert( + FixedLossEvent(loss_event_params[0], loss_event_params[1])); + RTC_CHECK(result.second); + } + RTC_CHECK_GT(loss_events.size(), 0); + loss_model_.reset(new FixedLossModel(loss_events)); + break; + } + default: { + loss_model_.reset(new NoLoss); + break; + } + } + + // Make sure that the packet loss profile is same for all derived tests. + srand(kInitSeed); +} + +std::ofstream& NetEqQualityTest::Log() { + return log_file_; +} + +bool NetEqQualityTest::PacketLost() { + int cycles = block_duration_ms_ / kPacketLossTimeUnitMs; + + // The loop is to make sure that codecs with different block lengths share the + // same packet loss profile. + bool lost = false; + for (int idx = 0; idx < cycles; idx++) { + if (loss_model_->Lost(decoded_time_ms_)) { + // The packet will be lost if any of the drawings indicates a loss, but + // the loop has to go on to make sure that codecs with different block + // lengths keep the same pace. + lost = true; + } + } + return lost; +} + +int NetEqQualityTest::Transmit() { + int packet_input_time_ms = rtp_generator_->GetRtpHeader( + kPayloadType, in_size_samples_, &rtp_header_); + Log() << "Packet of size " << payload_size_bytes_ << " bytes, for frame at " + << packet_input_time_ms << " ms "; + if (payload_size_bytes_ > 0) { + if (!PacketLost()) { + int ret = neteq_->InsertPacket( + rtp_header_, + rtc::ArrayView<const uint8_t>(payload_.data(), payload_size_bytes_)); + if (ret != NetEq::kOK) + return -1; + Log() << "was sent."; + } else { + Log() << "was lost."; + } + } + Log() << std::endl; + return packet_input_time_ms; +} + +int NetEqQualityTest::DecodeBlock() { + bool muted; + int ret = neteq_->GetAudio(&out_frame_, &muted); + RTC_CHECK(!muted); + + if (ret != NetEq::kOK) { + return -1; + } else { + RTC_DCHECK_EQ(out_frame_.num_channels_, channels_); + RTC_DCHECK_EQ(out_frame_.samples_per_channel_, + static_cast<size_t>(kOutputSizeMs * out_sampling_khz_)); + RTC_CHECK(output_->WriteArray( + out_frame_.data(), + out_frame_.samples_per_channel_ * out_frame_.num_channels_)); + return static_cast<int>(out_frame_.samples_per_channel_); + } +} + +void NetEqQualityTest::Simulate() { + int audio_size_samples; + bool end_of_input = false; + int runtime_ms = absl::GetFlag(FLAGS_runtime_ms) >= 0 + ? absl::GetFlag(FLAGS_runtime_ms) + : INT_MAX; + + while (!end_of_input && decoded_time_ms_ < runtime_ms) { + // Preload the buffer if needed. + while (decodable_time_ms_ - + absl::GetFlag(FLAGS_preload_packets) * block_duration_ms_ < + decoded_time_ms_) { + if (!in_file_->Read(in_size_samples_ * channels_, &in_data_[0])) { + end_of_input = true; + ASSERT_TRUE(end_of_input && absl::GetFlag(FLAGS_runtime_ms) < 0); + break; + } + payload_.Clear(); + payload_size_bytes_ = EncodeBlock(&in_data_[0], in_size_samples_, + &payload_, max_payload_bytes_); + total_payload_size_bytes_ += payload_size_bytes_; + decodable_time_ms_ = Transmit() + block_duration_ms_; + } + audio_size_samples = DecodeBlock(); + if (audio_size_samples > 0) { + decoded_time_ms_ += audio_size_samples / out_sampling_khz_; + } + } + Log() << "Average bit rate was " + << 8.0f * total_payload_size_bytes_ / absl::GetFlag(FLAGS_runtime_ms) + << " kbps" << std::endl; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_quality_test.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_quality_test.h new file mode 100644 index 0000000000..edcb117748 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_quality_test.h @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_QUALITY_TEST_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_QUALITY_TEST_H_ + +#include <fstream> +#include <memory> + +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/neteq/neteq.h" +#include "modules/audio_coding/neteq/tools/audio_sink.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" +#include "modules/audio_coding/neteq/tools/rtp_generator.h" +#include "system_wrappers/include/clock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { + +enum LossModes { + kNoLoss, + kUniformLoss, + kGilbertElliotLoss, + kFixedLoss, + kLastLossMode +}; + +class LossModel { + public: + virtual ~LossModel() {} + virtual bool Lost(int now_ms) = 0; +}; + +class NoLoss : public LossModel { + public: + bool Lost(int now_ms) override; +}; + +class UniformLoss : public LossModel { + public: + UniformLoss(double loss_rate); + bool Lost(int now_ms) override; + void set_loss_rate(double loss_rate) { loss_rate_ = loss_rate; } + + private: + double loss_rate_; +}; + +class GilbertElliotLoss : public LossModel { + public: + GilbertElliotLoss(double prob_trans_11, double prob_trans_01); + ~GilbertElliotLoss() override; + bool Lost(int now_ms) override; + + private: + // Prob. of losing current packet, when previous packet is lost. + double prob_trans_11_; + // Prob. of losing current packet, when previous packet is not lost. + double prob_trans_01_; + bool lost_last_; + std::unique_ptr<UniformLoss> uniform_loss_model_; +}; + +struct FixedLossEvent { + int start_ms; + int duration_ms; + FixedLossEvent(int start_ms, int duration_ms) + : start_ms(start_ms), duration_ms(duration_ms) {} +}; + +struct FixedLossEventCmp { + bool operator()(const FixedLossEvent& l_event, + const FixedLossEvent& r_event) const { + return l_event.start_ms < r_event.start_ms; + } +}; + +class FixedLossModel : public LossModel { + public: + FixedLossModel(std::set<FixedLossEvent, FixedLossEventCmp> loss_events); + ~FixedLossModel() override; + bool Lost(int now_ms) override; + + private: + std::set<FixedLossEvent, FixedLossEventCmp> loss_events_; + std::set<FixedLossEvent, FixedLossEventCmp>::iterator loss_events_it_; +}; + +class NetEqQualityTest : public ::testing::Test { + protected: + NetEqQualityTest( + int block_duration_ms, + int in_sampling_khz, + int out_sampling_khz, + const SdpAudioFormat& format, + const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory = + webrtc::CreateBuiltinAudioDecoderFactory()); + ~NetEqQualityTest() override; + + void SetUp() override; + + // EncodeBlock(...) does the following: + // 1. encodes a block of audio, saved in `in_data` and has a length of + // `block_size_samples` (samples per channel), + // 2. save the bit stream to `payload` of `max_bytes` bytes in size, + // 3. returns the length of the payload (in bytes), + virtual int EncodeBlock(int16_t* in_data, + size_t block_size_samples, + rtc::Buffer* payload, + size_t max_bytes) = 0; + + // PacketLost(...) determines weather a packet sent at an indicated time gets + // lost or not. + bool PacketLost(); + + // DecodeBlock() decodes a block of audio using the payload stored in + // `payload_` with the length of `payload_size_bytes_` (bytes). The decoded + // audio is to be stored in `out_data_`. + int DecodeBlock(); + + // Transmit() uses `rtp_generator_` to generate a packet and passes it to + // `neteq_`. + int Transmit(); + + // Runs encoding / transmitting / decoding. + void Simulate(); + + // Write to log file. Usage Log() << ... + std::ofstream& Log(); + + SdpAudioFormat audio_format_; + const size_t channels_; + + private: + int decoded_time_ms_; + int decodable_time_ms_; + double drift_factor_; + int packet_loss_rate_; + const int block_duration_ms_; + const int in_sampling_khz_; + const int out_sampling_khz_; + + // Number of samples per channel in a frame. + const size_t in_size_samples_; + + size_t payload_size_bytes_; + size_t max_payload_bytes_; + + std::unique_ptr<InputAudioFile> in_file_; + std::unique_ptr<AudioSink> output_; + std::ofstream log_file_; + + std::unique_ptr<RtpGenerator> rtp_generator_; + std::unique_ptr<NetEq> neteq_; + std::unique_ptr<LossModel> loss_model_; + + std::unique_ptr<int16_t[]> in_data_; + rtc::Buffer payload_; + AudioFrame out_frame_; + RTPHeader rtp_header_; + + size_t total_payload_size_bytes_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_QUALITY_TEST_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_replacement_input.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_replacement_input.cc new file mode 100644 index 0000000000..ffd114ae5b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_replacement_input.cc @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/neteq_replacement_input.h" + +#include "modules/audio_coding/neteq/tools/fake_decode_from_file.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +NetEqReplacementInput::NetEqReplacementInput( + std::unique_ptr<NetEqInput> source, + uint8_t replacement_payload_type, + const std::set<uint8_t>& comfort_noise_types, + const std::set<uint8_t>& forbidden_types) + : source_(std::move(source)), + replacement_payload_type_(replacement_payload_type), + comfort_noise_types_(comfort_noise_types), + forbidden_types_(forbidden_types) { + RTC_CHECK(source_); + packet_ = source_->PopPacket(); + ReplacePacket(); +} + +absl::optional<int64_t> NetEqReplacementInput::NextPacketTime() const { + return packet_ + ? absl::optional<int64_t>(static_cast<int64_t>(packet_->time_ms)) + : absl::nullopt; +} + +absl::optional<int64_t> NetEqReplacementInput::NextOutputEventTime() const { + return source_->NextOutputEventTime(); +} + +std::unique_ptr<NetEqInput::PacketData> NetEqReplacementInput::PopPacket() { + std::unique_ptr<PacketData> to_return = std::move(packet_); + while (true) { + packet_ = source_->PopPacket(); + if (!packet_) + break; + if (packet_->payload.size() > packet_->header.paddingLength) { + // Not padding only. Good to go. Skip this packet otherwise. + break; + } + } + ReplacePacket(); + return to_return; +} + +void NetEqReplacementInput::AdvanceOutputEvent() { + source_->AdvanceOutputEvent(); +} + +bool NetEqReplacementInput::ended() const { + return source_->ended(); +} + +absl::optional<RTPHeader> NetEqReplacementInput::NextHeader() const { + return source_->NextHeader(); +} + +void NetEqReplacementInput::ReplacePacket() { + if (!source_->NextPacketTime()) { + // End of input. Cannot do proper replacement on the very last packet, so we + // delete it instead. + packet_.reset(); + return; + } + + RTC_DCHECK(packet_); + + RTC_CHECK_EQ(forbidden_types_.count(packet_->header.payloadType), 0) + << "Payload type " << static_cast<int>(packet_->header.payloadType) + << " is forbidden."; + + // Check if this packet is comfort noise. + if (comfort_noise_types_.count(packet_->header.payloadType) != 0) { + // If CNG, simply insert a zero-energy one-byte payload. + uint8_t cng_payload[1] = {127}; // Max attenuation of CNG. + packet_->payload.SetData(cng_payload); + return; + } + + absl::optional<RTPHeader> next_hdr = source_->NextHeader(); + RTC_DCHECK(next_hdr); + uint8_t payload[12]; + RTC_DCHECK_LE(last_frame_size_timestamps_, 120 * 48); + uint32_t input_frame_size_timestamps = last_frame_size_timestamps_; + const uint32_t timestamp_diff = + next_hdr->timestamp - packet_->header.timestamp; + if (next_hdr->sequenceNumber == packet_->header.sequenceNumber + 1 && + timestamp_diff <= 120 * 48) { + // Packets are in order and the timestamp diff is less than 5760 samples. + // Accept the timestamp diff as a valid frame size. + input_frame_size_timestamps = timestamp_diff; + last_frame_size_timestamps_ = input_frame_size_timestamps; + } + RTC_DCHECK_LE(input_frame_size_timestamps, 120 * 48); + FakeDecodeFromFile::PrepareEncoded(packet_->header.timestamp, + input_frame_size_timestamps, + packet_->payload.size(), payload); + packet_->payload.SetData(payload); + packet_->header.payloadType = replacement_payload_type_; + return; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_replacement_input.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_replacement_input.h new file mode 100644 index 0000000000..9ce9b9dc63 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_replacement_input.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_REPLACEMENT_INPUT_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_REPLACEMENT_INPUT_H_ + +#include <memory> +#include <set> + +#include "modules/audio_coding/neteq/tools/neteq_input.h" + +namespace webrtc { +namespace test { + +// This class converts the packets from a NetEqInput to fake encodings to be +// decoded by a FakeDecodeFromFile decoder. +class NetEqReplacementInput : public NetEqInput { + public: + NetEqReplacementInput(std::unique_ptr<NetEqInput> source, + uint8_t replacement_payload_type, + const std::set<uint8_t>& comfort_noise_types, + const std::set<uint8_t>& forbidden_types); + + absl::optional<int64_t> NextPacketTime() const override; + absl::optional<int64_t> NextOutputEventTime() const override; + std::unique_ptr<PacketData> PopPacket() override; + void AdvanceOutputEvent() override; + bool ended() const override; + absl::optional<RTPHeader> NextHeader() const override; + + private: + void ReplacePacket(); + + std::unique_ptr<NetEqInput> source_; + const uint8_t replacement_payload_type_; + const std::set<uint8_t> comfort_noise_types_; + const std::set<uint8_t> forbidden_types_; + std::unique_ptr<PacketData> packet_; // The next packet to deliver. + uint32_t last_frame_size_timestamps_ = 960; // Initial guess: 20 ms @ 48 kHz. +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_REPLACEMENT_INPUT_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_rtpplay.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_rtpplay.cc new file mode 100644 index 0000000000..b274069bd4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_rtpplay.cc @@ -0,0 +1,406 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <iostream> +#include <string> + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "modules/audio_coding/neteq/tools/neteq_test.h" +#include "modules/audio_coding/neteq/tools/neteq_test_factory.h" +#include "rtc_base/strings/string_builder.h" +#include "system_wrappers/include/field_trial.h" +#include "test/field_trial.h" + +using TestConfig = webrtc::test::NetEqTestFactory::Config; + +ABSL_FLAG(bool, + codec_map, + false, + "Prints the mapping between RTP payload type and " + "codec"); +ABSL_FLAG(std::string, + force_fieldtrials, + "", + "Field trials control experimental feature code which can be forced. " + "E.g. running with --force_fieldtrials=WebRTC-FooFeature/Enable/" + " will assign the group Enable to field trial WebRTC-FooFeature."); +ABSL_FLAG(int, pcmu, TestConfig::default_pcmu(), "RTP payload type for PCM-u"); +ABSL_FLAG(int, pcma, TestConfig::default_pcma(), "RTP payload type for PCM-a"); +ABSL_FLAG(int, ilbc, TestConfig::default_ilbc(), "RTP payload type for iLBC"); +ABSL_FLAG(int, isac, TestConfig::default_isac(), "RTP payload type for iSAC"); +ABSL_FLAG(int, + isac_swb, + TestConfig::default_isac_swb(), + "RTP payload type for iSAC-swb (32 kHz)"); +ABSL_FLAG(int, opus, TestConfig::default_opus(), "RTP payload type for Opus"); +ABSL_FLAG(int, + pcm16b, + TestConfig::default_pcm16b(), + "RTP payload type for PCM16b-nb (8 kHz)"); +ABSL_FLAG(int, + pcm16b_wb, + TestConfig::default_pcm16b_wb(), + "RTP payload type for PCM16b-wb (16 kHz)"); +ABSL_FLAG(int, + pcm16b_swb32, + TestConfig::default_pcm16b_swb32(), + "RTP payload type for PCM16b-swb32 (32 kHz)"); +ABSL_FLAG(int, + pcm16b_swb48, + TestConfig::default_pcm16b_swb48(), + "RTP payload type for PCM16b-swb48 (48 kHz)"); +ABSL_FLAG(int, g722, TestConfig::default_g722(), "RTP payload type for G.722"); +ABSL_FLAG(int, + avt, + TestConfig::default_avt(), + "RTP payload type for AVT/DTMF (8 kHz)"); +ABSL_FLAG(int, + avt_16, + TestConfig::default_avt_16(), + "RTP payload type for AVT/DTMF (16 kHz)"); +ABSL_FLAG(int, + avt_32, + TestConfig::default_avt_32(), + "RTP payload type for AVT/DTMF (32 kHz)"); +ABSL_FLAG(int, + avt_48, + TestConfig::default_avt_48(), + "RTP payload type for AVT/DTMF (48 kHz)"); +ABSL_FLAG(int, + red, + TestConfig::default_red(), + "RTP payload type for redundant audio (RED)"); +ABSL_FLAG(int, + cn_nb, + TestConfig::default_cn_nb(), + "RTP payload type for comfort noise (8 kHz)"); +ABSL_FLAG(int, + cn_wb, + TestConfig::default_cn_wb(), + "RTP payload type for comfort noise (16 kHz)"); +ABSL_FLAG(int, + cn_swb32, + TestConfig::default_cn_swb32(), + "RTP payload type for comfort noise (32 kHz)"); +ABSL_FLAG(int, + cn_swb48, + TestConfig::default_cn_swb48(), + "RTP payload type for comfort noise (48 kHz)"); +ABSL_FLAG(std::string, + replacement_audio_file, + "", + "A PCM file that will be used to populate dummy" + " RTP packets"); +ABSL_FLAG(std::string, + ssrc, + "", + "Only use packets with this SSRC (decimal or hex, the latter " + "starting with 0x)"); +ABSL_FLAG(int, + audio_level, + TestConfig::default_audio_level(), + "Extension ID for audio level (RFC 6464)"); +ABSL_FLAG(int, + abs_send_time, + TestConfig::default_abs_send_time(), + "Extension ID for absolute sender time"); +ABSL_FLAG(int, + transport_seq_no, + TestConfig::default_transport_seq_no(), + "Extension ID for transport sequence number"); +ABSL_FLAG(int, + video_content_type, + TestConfig::default_video_content_type(), + "Extension ID for video content type"); +ABSL_FLAG(int, + video_timing, + TestConfig::default_video_timing(), + "Extension ID for video timing"); +ABSL_FLAG(std::string, + output_files_base_name, + "", + "Custom path used as prefix for the output files - i.e., " + "matlab plot, python plot, text log."); +ABSL_FLAG(bool, + matlabplot, + false, + "Generates a matlab script for plotting the delay profile"); +ABSL_FLAG(bool, + pythonplot, + false, + "Generates a python script for plotting the delay profile"); +ABSL_FLAG(bool, + textlog, + false, + "Generates a text log describing the simulation on a " + "step-by-step basis."); +ABSL_FLAG(bool, concealment_events, false, "Prints concealment events"); +ABSL_FLAG(int, + max_nr_packets_in_buffer, + TestConfig::default_max_nr_packets_in_buffer(), + "Maximum allowed number of packets in the buffer"); +ABSL_FLAG(bool, + enable_fast_accelerate, + false, + "Enables jitter buffer fast accelerate"); + +namespace { + +// Parses the input string for a valid SSRC (at the start of the string). If a +// valid SSRC is found, it is written to the output variable `ssrc`, and true is +// returned. Otherwise, false is returned. +bool ParseSsrc(absl::string_view str, uint32_t* ssrc) { + if (str.empty()) + return true; + int base = 10; + // Look for "0x" or "0X" at the start and change base to 16 if found. + if ((str.compare(0, 2, "0x") == 0) || (str.compare(0, 2, "0X") == 0)) + base = 16; + errno = 0; + char* end_ptr; + std::string str_str = std::string(str); + unsigned long value = strtoul(str_str.c_str(), &end_ptr, base); // NOLINT + if (value == ULONG_MAX && errno == ERANGE) + return false; // Value out of range for unsigned long. + if (sizeof(unsigned long) > sizeof(uint32_t) && value > 0xFFFFFFFF) // NOLINT + return false; // Value out of range for uint32_t. + if (end_ptr - str_str.c_str() < static_cast<ptrdiff_t>(str.length())) + return false; // Part of the string was not parsed. + *ssrc = static_cast<uint32_t>(value); + return true; +} + +static bool ValidateExtensionId(int value) { + if (value > 0 && value <= 255) // Value is ok. + return true; + printf("Extension ID must be between 1 and 255, not %d\n", + static_cast<int>(value)); + return false; +} + +// Flag validators. +bool ValidatePayloadType(int value) { + if (value >= 0 && value <= 127) // Value is ok. + return true; + printf("Payload type must be between 0 and 127, not %d\n", + static_cast<int>(value)); + return false; +} + +bool ValidateSsrcValue(absl::string_view str) { + uint32_t dummy_ssrc; + if (ParseSsrc(str, &dummy_ssrc)) // Value is ok. + return true; + printf("Invalid SSRC: %.*s\n", static_cast<int>(str.size()), str.data()); + return false; +} + +void PrintCodecMappingEntry(absl::string_view codec, int flag) { + std::cout << codec << ": " << flag << std::endl; +} + +void PrintCodecMapping() { + PrintCodecMappingEntry("PCM-u", absl::GetFlag(FLAGS_pcmu)); + PrintCodecMappingEntry("PCM-a", absl::GetFlag(FLAGS_pcma)); + PrintCodecMappingEntry("iLBC", absl::GetFlag(FLAGS_ilbc)); + PrintCodecMappingEntry("iSAC", absl::GetFlag(FLAGS_isac)); + PrintCodecMappingEntry("iSAC-swb (32 kHz)", absl::GetFlag(FLAGS_isac_swb)); + PrintCodecMappingEntry("Opus", absl::GetFlag(FLAGS_opus)); + PrintCodecMappingEntry("PCM16b-nb (8 kHz)", absl::GetFlag(FLAGS_pcm16b)); + PrintCodecMappingEntry("PCM16b-wb (16 kHz)", absl::GetFlag(FLAGS_pcm16b_wb)); + PrintCodecMappingEntry("PCM16b-swb32 (32 kHz)", + absl::GetFlag(FLAGS_pcm16b_swb32)); + PrintCodecMappingEntry("PCM16b-swb48 (48 kHz)", + absl::GetFlag(FLAGS_pcm16b_swb48)); + PrintCodecMappingEntry("G.722", absl::GetFlag(FLAGS_g722)); + PrintCodecMappingEntry("AVT/DTMF (8 kHz)", absl::GetFlag(FLAGS_avt)); + PrintCodecMappingEntry("AVT/DTMF (16 kHz)", absl::GetFlag(FLAGS_avt_16)); + PrintCodecMappingEntry("AVT/DTMF (32 kHz)", absl::GetFlag(FLAGS_avt_32)); + PrintCodecMappingEntry("AVT/DTMF (48 kHz)", absl::GetFlag(FLAGS_avt_48)); + PrintCodecMappingEntry("redundant audio (RED)", absl::GetFlag(FLAGS_red)); + PrintCodecMappingEntry("comfort noise (8 kHz)", absl::GetFlag(FLAGS_cn_nb)); + PrintCodecMappingEntry("comfort noise (16 kHz)", absl::GetFlag(FLAGS_cn_wb)); + PrintCodecMappingEntry("comfort noise (32 kHz)", + absl::GetFlag(FLAGS_cn_swb32)); + PrintCodecMappingEntry("comfort noise (48 kHz)", + absl::GetFlag(FLAGS_cn_swb48)); +} + +bool ValidateOutputFilesOptions(bool textlog, + bool plotting, + absl::string_view output_files_base_name, + absl::string_view output_audio_filename) { + bool output_files_base_name_specified = !output_files_base_name.empty(); + if (!textlog && !plotting && output_files_base_name_specified) { + std::cout << "Error: --output_files_base_name cannot be used without at " + "least one of the following flags: --textlog, --matlabplot, " + "--pythonplot." + << std::endl; + return false; + } + // Without `output_audio_filename`, `output_files_base_name` is required when + // plotting output files must be generated (in order to form a valid output + // file name). + if (output_audio_filename.empty() && plotting && + !output_files_base_name_specified) { + std::cout << "Error: when no output audio file is specified and " + "--matlabplot and/or --pythonplot are used, " + "--output_files_base_name must be also used." + << std::endl; + return false; + } + return true; +} + +absl::optional<std::string> CreateOptionalOutputFileName( + bool output_requested, + absl::string_view basename, + absl::string_view output_audio_filename, + absl::string_view suffix) { + if (!output_requested) { + return absl::nullopt; + } + if (!basename.empty()) { + // Override the automatic assignment. + rtc::StringBuilder sb(basename); + sb << suffix; + return sb.str(); + } + if (!output_audio_filename.empty()) { + // Automatically assign name. + rtc::StringBuilder sb(output_audio_filename); + sb << suffix; + return sb.str(); + } + std::cout << "Error: invalid text log file parameters."; + return absl::nullopt; +} + +} // namespace + +int main(int argc, char* argv[]) { + std::vector<char*> args = absl::ParseCommandLine(argc, argv); + webrtc::test::NetEqTestFactory factory; + std::string usage = + "Tool for decoding an RTP dump file using NetEq.\n" + "Example usage:\n" + "./neteq_rtpplay input.rtp [output.{pcm, wav}]\n"; + if (absl::GetFlag(FLAGS_codec_map)) { + PrintCodecMapping(); + exit(0); + } + if (args.size() != 2 && + args.size() != 3) { // The output audio file is optional. + // Print usage information. + std::cout << usage; + exit(0); + } + const std::string output_audio_filename((args.size() == 3) ? args[2] : ""); + const std::string output_files_base_name( + absl::GetFlag(FLAGS_output_files_base_name)); + RTC_CHECK(ValidateOutputFilesOptions( + absl::GetFlag(FLAGS_textlog), + absl::GetFlag(FLAGS_matlabplot) || absl::GetFlag(FLAGS_pythonplot), + output_files_base_name, output_audio_filename)); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_pcmu))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_pcma))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_ilbc))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_isac))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_isac_swb))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_opus))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_pcm16b))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_pcm16b_wb))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_pcm16b_swb32))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_pcm16b_swb48))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_g722))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_avt))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_avt_16))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_avt_32))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_avt_48))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_red))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_cn_nb))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_cn_wb))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_cn_swb32))); + RTC_CHECK(ValidatePayloadType(absl::GetFlag(FLAGS_cn_swb48))); + RTC_CHECK(ValidateSsrcValue(absl::GetFlag(FLAGS_ssrc))); + RTC_CHECK(ValidateExtensionId(absl::GetFlag(FLAGS_audio_level))); + RTC_CHECK(ValidateExtensionId(absl::GetFlag(FLAGS_abs_send_time))); + RTC_CHECK(ValidateExtensionId(absl::GetFlag(FLAGS_transport_seq_no))); + RTC_CHECK(ValidateExtensionId(absl::GetFlag(FLAGS_video_content_type))); + RTC_CHECK(ValidateExtensionId(absl::GetFlag(FLAGS_video_timing))); + + // Make force_fieldtrials persistent string during entire program live as + // absl::GetFlag creates temporary string and c_str() will point to + // deallocated string. + const std::string force_fieldtrials = absl::GetFlag(FLAGS_force_fieldtrials); + webrtc::field_trial::InitFieldTrialsFromString(force_fieldtrials.c_str()); + + webrtc::test::NetEqTestFactory::Config config; + config.pcmu = absl::GetFlag(FLAGS_pcmu); + config.pcma = absl::GetFlag(FLAGS_pcma); + config.ilbc = absl::GetFlag(FLAGS_ilbc); + config.isac = absl::GetFlag(FLAGS_isac); + config.isac_swb = absl::GetFlag(FLAGS_isac_swb); + config.opus = absl::GetFlag(FLAGS_opus); + config.pcm16b = absl::GetFlag(FLAGS_pcm16b); + config.pcm16b_wb = absl::GetFlag(FLAGS_pcm16b_wb); + config.pcm16b_swb32 = absl::GetFlag(FLAGS_pcm16b_swb32); + config.pcm16b_swb48 = absl::GetFlag(FLAGS_pcm16b_swb48); + config.g722 = absl::GetFlag(FLAGS_g722); + config.avt = absl::GetFlag(FLAGS_avt); + config.avt_16 = absl::GetFlag(FLAGS_avt_16); + config.avt_32 = absl::GetFlag(FLAGS_avt_32); + config.avt_48 = absl::GetFlag(FLAGS_avt_48); + config.red = absl::GetFlag(FLAGS_red); + config.cn_nb = absl::GetFlag(FLAGS_cn_nb); + config.cn_wb = absl::GetFlag(FLAGS_cn_wb); + config.cn_swb32 = absl::GetFlag(FLAGS_cn_swb32); + config.cn_swb48 = absl::GetFlag(FLAGS_cn_swb48); + config.replacement_audio_file = absl::GetFlag(FLAGS_replacement_audio_file); + config.audio_level = absl::GetFlag(FLAGS_audio_level); + config.abs_send_time = absl::GetFlag(FLAGS_abs_send_time); + config.transport_seq_no = absl::GetFlag(FLAGS_transport_seq_no); + config.video_content_type = absl::GetFlag(FLAGS_video_content_type); + config.video_timing = absl::GetFlag(FLAGS_video_timing); + config.matlabplot = absl::GetFlag(FLAGS_matlabplot); + config.pythonplot = absl::GetFlag(FLAGS_pythonplot); + config.concealment_events = absl::GetFlag(FLAGS_concealment_events); + config.max_nr_packets_in_buffer = + absl::GetFlag(FLAGS_max_nr_packets_in_buffer); + config.enable_fast_accelerate = absl::GetFlag(FLAGS_enable_fast_accelerate); + if (!output_audio_filename.empty()) { + config.output_audio_filename = output_audio_filename; + } + config.textlog = absl::GetFlag(FLAGS_textlog); + config.textlog_filename = CreateOptionalOutputFileName( + absl::GetFlag(FLAGS_textlog), output_files_base_name, + output_audio_filename, ".text_log.txt"); + config.plot_scripts_basename = CreateOptionalOutputFileName( + absl::GetFlag(FLAGS_matlabplot) || absl::GetFlag(FLAGS_pythonplot), + output_files_base_name, output_audio_filename, ""); + + // Check if an SSRC value was provided. + if (absl::GetFlag(FLAGS_ssrc).size() > 0) { + uint32_t ssrc; + RTC_CHECK(ParseSsrc(absl::GetFlag(FLAGS_ssrc), &ssrc)) + << "Flag verification has failed."; + config.ssrc_filter = absl::make_optional(ssrc); + } + + std::unique_ptr<webrtc::test::NetEqTest> test = + factory.InitializeTestFromFile(/*input_filename=*/args[1], + /*factory=*/nullptr, config); + RTC_CHECK(test) << "ERROR: Unable to run test"; + test->Run(); + return 0; +} diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_rtpplay_test.sh b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_rtpplay_test.sh new file mode 100755 index 0000000000..0a6bf16016 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_rtpplay_test.sh @@ -0,0 +1,183 @@ +#!/bin/bash +# +# Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +# + +# Aliases. +BIN=$1 +TEST_RTC_EVENT_LOG=$2 +INPUT_PCM_FILE=$3 + +# Check setup. +if [ ! -f $BIN ]; then + echo "Cannot find neteq_rtpplay binary." + exit 99 +fi +if [ ! -f $TEST_RTC_EVENT_LOG ]; then + echo "Cannot find RTC event log file." + exit 99 +fi +if [ ! -f $INPUT_PCM_FILE ]; then + echo "Cannot find PCM file." + exit 99 +fi + +# Defines. + +TMP_DIR=$(mktemp -d /tmp/tmp_XXXXXXXXXX) +PASS=0 +FAIL=1 +TEST_SUITE_RESULT=$PASS + +file_hash () { + md5sum $1 | awk '{ print $1 }' +} + +test_passed () { + echo PASS +} + +test_failed () { + echo "FAIL: $1" + TEST_SUITE_RESULT=$FAIL +} + +test_file_checksums_match () { + if [ ! -f $1 ] || [ ! -f $2 ]; then + test_failed "Cannot compare hash values: file(s) not found." + return + fi + HASH1=$(file_hash $1) + HASH2=$(file_hash $2) + if [ "$HASH1" = "$HASH2" ]; then + test_passed + else + test_failed "$1 differs from $2" + fi +} + +test_file_exists () { + if [ -f $1 ]; then + test_passed + else + test_failed "$1 does not exist" + fi +} + +test_exit_code_0 () { + if [ $1 -eq 0 ]; then + test_passed + else + test_failed "$1 did not return 0" + fi +} + +test_exit_code_not_0 () { + if [ $1 -eq 0 ]; then + test_failed "$1 returned 0" + else + test_passed + fi +} + +# Generate test data. + +# Case 1. Pre-existing way. +CASE1_WAV=$TMP_DIR/case1.wav +$BIN $TEST_RTC_EVENT_LOG $CASE1_WAV \ + --replacement_audio_file $INPUT_PCM_FILE \ + --textlog --pythonplot --matlabplot \ + > $TMP_DIR/case1.stdout 2> /dev/null +CASE1_RETURN_CODE=$? +CASE1_TEXTLOG=$TMP_DIR/case1.wav.text_log.txt +CASE1_PYPLOT=$TMP_DIR/case1_wav.py +CASE1_MATPLOT=$TMP_DIR/case1_wav.m + +# Case 2. No output files. +$BIN $TEST_RTC_EVENT_LOG --replacement_audio_file $INPUT_PCM_FILE \ + > $TMP_DIR/case2.stdout 2> /dev/null +CASE2_RETURN_CODE=$? + +# Case 3. No output audio file. + +# Case 3.1 Without --output_files_base_name (won't run). +$BIN $TEST_RTC_EVENT_LOG \ + --replacement_audio_file $INPUT_PCM_FILE \ + --textlog --pythonplot --matlabplot \ + &> /dev/null +CASE3_1_RETURN_CODE=$? + +# Case 3.2 With --output_files_base_name (runs). +$BIN $TEST_RTC_EVENT_LOG \ + --replacement_audio_file $INPUT_PCM_FILE \ + --output_files_base_name $TMP_DIR/case3_2 \ + --textlog --pythonplot --matlabplot \ + > $TMP_DIR/case3_2.stdout 2> /dev/null +CASE3_2_RETURN_CODE=$? +CASE3_2_TEXTLOG=$TMP_DIR/case3_2.text_log.txt +CASE3_2_PYPLOT=$TMP_DIR/case3_2.py +CASE3_2_MATPLOT=$TMP_DIR/case3_2.m + +# Case 4. With output audio file and --output_files_base_name. +CASE4_WAV=$TMP_DIR/case4.wav +$BIN $TEST_RTC_EVENT_LOG $TMP_DIR/case4.wav \ + --replacement_audio_file $INPUT_PCM_FILE \ + --output_files_base_name $TMP_DIR/case4 \ + --textlog --pythonplot --matlabplot \ + > $TMP_DIR/case4.stdout 2> /dev/null +CASE4_RETURN_CODE=$? +CASE4_TEXTLOG=$TMP_DIR/case4.text_log.txt +CASE4_PYPLOT=$TMP_DIR/case4.py +CASE4_MATPLOT=$TMP_DIR/case4.m + +# Tests. + +echo Check exit codes +test_exit_code_0 $CASE1_RETURN_CODE +test_exit_code_0 $CASE2_RETURN_CODE +test_exit_code_not_0 $CASE3_1_RETURN_CODE +test_exit_code_0 $CASE3_2_RETURN_CODE +test_exit_code_0 $CASE4_RETURN_CODE + +echo Check that the expected output files exist +test_file_exists $CASE1_TEXTLOG +test_file_exists $CASE3_2_TEXTLOG +test_file_exists $CASE4_TEXTLOG +test_file_exists $CASE1_PYPLOT +test_file_exists $CASE3_2_PYPLOT +test_file_exists $CASE4_PYPLOT +test_file_exists $CASE1_MATPLOT +test_file_exists $CASE3_2_MATPLOT +test_file_exists $CASE4_MATPLOT + +echo Check that the same WAV file is produced +test_file_checksums_match $CASE1_WAV $CASE4_WAV + +echo Check that the same text log is produced +test_file_checksums_match $CASE1_TEXTLOG $CASE3_2_TEXTLOG +test_file_checksums_match $CASE1_TEXTLOG $CASE4_TEXTLOG + +echo Check that the same python plot scripts is produced +test_file_checksums_match $CASE1_PYPLOT $CASE3_2_PYPLOT +test_file_checksums_match $CASE1_PYPLOT $CASE4_PYPLOT + +echo Check that the same matlab plot scripts is produced +test_file_checksums_match $CASE1_MATPLOT $CASE3_2_MATPLOT +test_file_checksums_match $CASE1_MATPLOT $CASE4_MATPLOT + +# Clean up +rm -fr $TMP_DIR + +if [ $TEST_SUITE_RESULT -eq $PASS ]; then + echo All tests passed. + exit 0 +else + echo One or more tests failed. + exit 1 +fi diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_getter.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_getter.cc new file mode 100644 index 0000000000..6738e494f6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_getter.cc @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/neteq_stats_getter.h" + +#include <algorithm> +#include <numeric> +#include <utility> + +#include "rtc_base/checks.h" +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/time_utils.h" + +namespace webrtc { +namespace test { + +std::string NetEqStatsGetter::ConcealmentEvent::ToString() const { + char ss_buf[256]; + rtc::SimpleStringBuilder ss(ss_buf); + ss << "ConcealmentEvent duration_ms:" << duration_ms + << " event_number:" << concealment_event_number + << " time_from_previous_event_end_ms:" << time_from_previous_event_end_ms; + return ss.str(); +} + +NetEqStatsGetter::NetEqStatsGetter( + std::unique_ptr<NetEqDelayAnalyzer> delay_analyzer) + : delay_analyzer_(std::move(delay_analyzer)) {} + +void NetEqStatsGetter::BeforeGetAudio(NetEq* neteq) { + if (delay_analyzer_) { + delay_analyzer_->BeforeGetAudio(neteq); + } +} + +void NetEqStatsGetter::AfterGetAudio(int64_t time_now_ms, + const AudioFrame& audio_frame, + bool muted, + NetEq* neteq) { + // TODO(minyue): Get stats should better not be called as a call back after + // get audio. It is called independently from get audio in practice. + const auto lifetime_stat = neteq->GetLifetimeStatistics(); + if (last_stats_query_time_ms_ == 0 || + rtc::TimeDiff(time_now_ms, last_stats_query_time_ms_) >= + stats_query_interval_ms_) { + NetEqNetworkStatistics stats; + RTC_CHECK_EQ(neteq->NetworkStatistics(&stats), 0); + stats_.push_back(std::make_pair(time_now_ms, stats)); + lifetime_stats_.push_back(std::make_pair(time_now_ms, lifetime_stat)); + last_stats_query_time_ms_ = time_now_ms; + } + + const auto voice_concealed_samples = + lifetime_stat.concealed_samples - lifetime_stat.silent_concealed_samples; + if (current_concealment_event_ != lifetime_stat.concealment_events && + voice_concealed_samples_until_last_event_ < voice_concealed_samples) { + if (last_event_end_time_ms_ > 0) { + // Do not account for the first event to avoid start of the call + // skewing. + ConcealmentEvent concealment_event; + uint64_t last_event_voice_concealed_samples = + voice_concealed_samples - voice_concealed_samples_until_last_event_; + RTC_CHECK_GT(last_event_voice_concealed_samples, 0); + concealment_event.duration_ms = last_event_voice_concealed_samples / + (audio_frame.sample_rate_hz_ / 1000); + concealment_event.concealment_event_number = current_concealment_event_; + concealment_event.time_from_previous_event_end_ms = + time_now_ms - last_event_end_time_ms_; + concealment_events_.emplace_back(concealment_event); + voice_concealed_samples_until_last_event_ = voice_concealed_samples; + } + last_event_end_time_ms_ = time_now_ms; + voice_concealed_samples_until_last_event_ = voice_concealed_samples; + current_concealment_event_ = lifetime_stat.concealment_events; + } + + if (delay_analyzer_) { + delay_analyzer_->AfterGetAudio(time_now_ms, audio_frame, muted, neteq); + } +} + +double NetEqStatsGetter::AverageSpeechExpandRate() const { + double sum_speech_expand = std::accumulate( + stats_.begin(), stats_.end(), double{0.0}, + [](double a, std::pair<int64_t, NetEqNetworkStatistics> b) { + return a + static_cast<double>(b.second.speech_expand_rate); + }); + return sum_speech_expand / 16384.0 / stats_.size(); +} + +NetEqStatsGetter::Stats NetEqStatsGetter::AverageStats() const { + Stats sum_stats = std::accumulate( + stats_.begin(), stats_.end(), Stats(), + [](Stats a, std::pair<int64_t, NetEqNetworkStatistics> bb) { + const auto& b = bb.second; + a.current_buffer_size_ms += b.current_buffer_size_ms; + a.preferred_buffer_size_ms += b.preferred_buffer_size_ms; + a.jitter_peaks_found += b.jitter_peaks_found; + a.expand_rate += b.expand_rate / 16384.0; + a.speech_expand_rate += b.speech_expand_rate / 16384.0; + a.preemptive_rate += b.preemptive_rate / 16384.0; + a.accelerate_rate += b.accelerate_rate / 16384.0; + a.secondary_decoded_rate += b.secondary_decoded_rate / 16384.0; + a.secondary_discarded_rate += b.secondary_discarded_rate / 16384.0; + a.mean_waiting_time_ms += b.mean_waiting_time_ms; + a.median_waiting_time_ms += b.median_waiting_time_ms; + a.min_waiting_time_ms = std::min( + a.min_waiting_time_ms, static_cast<double>(b.min_waiting_time_ms)); + a.max_waiting_time_ms = std::max( + a.max_waiting_time_ms, static_cast<double>(b.max_waiting_time_ms)); + return a; + }); + + sum_stats.current_buffer_size_ms /= stats_.size(); + sum_stats.preferred_buffer_size_ms /= stats_.size(); + sum_stats.jitter_peaks_found /= stats_.size(); + sum_stats.packet_loss_rate /= stats_.size(); + sum_stats.expand_rate /= stats_.size(); + sum_stats.speech_expand_rate /= stats_.size(); + sum_stats.preemptive_rate /= stats_.size(); + sum_stats.accelerate_rate /= stats_.size(); + sum_stats.secondary_decoded_rate /= stats_.size(); + sum_stats.secondary_discarded_rate /= stats_.size(); + sum_stats.added_zero_samples /= stats_.size(); + sum_stats.mean_waiting_time_ms /= stats_.size(); + sum_stats.median_waiting_time_ms /= stats_.size(); + + return sum_stats; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_getter.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_getter.h new file mode 100644 index 0000000000..b1b12bb1f8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_getter.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_STATS_GETTER_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_STATS_GETTER_H_ + +#include <memory> +#include <string> +#include <vector> + +#include "modules/audio_coding/neteq/tools/neteq_delay_analyzer.h" +#include "modules/audio_coding/neteq/tools/neteq_test.h" + +namespace webrtc { +namespace test { + +class NetEqStatsGetter : public NetEqGetAudioCallback { + public: + // This struct is a replica of webrtc::NetEqNetworkStatistics, but with all + // values stored in double precision. + struct Stats { + double current_buffer_size_ms = 0.0; + double preferred_buffer_size_ms = 0.0; + double jitter_peaks_found = 0.0; + double packet_loss_rate = 0.0; + double expand_rate = 0.0; + double speech_expand_rate = 0.0; + double preemptive_rate = 0.0; + double accelerate_rate = 0.0; + double secondary_decoded_rate = 0.0; + double secondary_discarded_rate = 0.0; + double clockdrift_ppm = 0.0; + double added_zero_samples = 0.0; + double mean_waiting_time_ms = 0.0; + double median_waiting_time_ms = 0.0; + double min_waiting_time_ms = 0.0; + double max_waiting_time_ms = 0.0; + }; + + struct ConcealmentEvent { + uint64_t duration_ms; + size_t concealment_event_number; + int64_t time_from_previous_event_end_ms; + std::string ToString() const; + }; + + // Takes a pointer to another callback object, which will be invoked after + // this object finishes. This does not transfer ownership, and null is a + // valid value. + explicit NetEqStatsGetter(std::unique_ptr<NetEqDelayAnalyzer> delay_analyzer); + + void set_stats_query_interval_ms(int64_t stats_query_interval_ms) { + stats_query_interval_ms_ = stats_query_interval_ms; + } + + void BeforeGetAudio(NetEq* neteq) override; + + void AfterGetAudio(int64_t time_now_ms, + const AudioFrame& audio_frame, + bool muted, + NetEq* neteq) override; + + double AverageSpeechExpandRate() const; + + NetEqDelayAnalyzer* delay_analyzer() const { return delay_analyzer_.get(); } + + const std::vector<ConcealmentEvent>& concealment_events() const { + // Do not account for the last concealment event to avoid potential end + // call skewing. + return concealment_events_; + } + + const std::vector<std::pair<int64_t, NetEqNetworkStatistics>>* stats() const { + return &stats_; + } + + const std::vector<std::pair<int64_t, NetEqLifetimeStatistics>>* + lifetime_stats() const { + return &lifetime_stats_; + } + + Stats AverageStats() const; + + private: + std::unique_ptr<NetEqDelayAnalyzer> delay_analyzer_; + int64_t stats_query_interval_ms_ = 1000; + int64_t last_stats_query_time_ms_ = 0; + std::vector<std::pair<int64_t, NetEqNetworkStatistics>> stats_; + std::vector<std::pair<int64_t, NetEqLifetimeStatistics>> lifetime_stats_; + size_t current_concealment_event_ = 1; + uint64_t voice_concealed_samples_until_last_event_ = 0; + std::vector<ConcealmentEvent> concealment_events_; + int64_t last_event_end_time_ms_ = 0; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_STATS_GETTER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc new file mode 100644 index 0000000000..162a4c9300 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/neteq_stats_plotter.h" + +#include <inttypes.h> +#include <stdio.h> + +#include <utility> + +#include "absl/strings/string_view.h" + +namespace webrtc { +namespace test { + +NetEqStatsPlotter::NetEqStatsPlotter(bool make_matlab_plot, + bool make_python_plot, + bool show_concealment_events, + absl::string_view base_file_name) + : make_matlab_plot_(make_matlab_plot), + make_python_plot_(make_python_plot), + show_concealment_events_(show_concealment_events), + base_file_name_(base_file_name) { + std::unique_ptr<NetEqDelayAnalyzer> delay_analyzer; + if (make_matlab_plot || make_python_plot) { + delay_analyzer.reset(new NetEqDelayAnalyzer); + } + stats_getter_.reset(new NetEqStatsGetter(std::move(delay_analyzer))); +} + +void NetEqStatsPlotter::SimulationEnded(int64_t simulation_time_ms) { + if (make_matlab_plot_) { + auto matlab_script_name = base_file_name_; + std::replace(matlab_script_name.begin(), matlab_script_name.end(), '.', + '_'); + printf("Creating Matlab plot script %s.m\n", matlab_script_name.c_str()); + stats_getter_->delay_analyzer()->CreateMatlabScript(matlab_script_name + + ".m"); + } + if (make_python_plot_) { + auto python_script_name = base_file_name_; + std::replace(python_script_name.begin(), python_script_name.end(), '.', + '_'); + printf("Creating Python plot script %s.py\n", python_script_name.c_str()); + stats_getter_->delay_analyzer()->CreatePythonScript(python_script_name + + ".py"); + } + + printf("Simulation statistics:\n"); + printf(" output duration: %" PRId64 " ms\n", simulation_time_ms); + auto stats = stats_getter_->AverageStats(); + printf(" packet_loss_rate: %f %%\n", 100.0 * stats.packet_loss_rate); + printf(" expand_rate: %f %%\n", 100.0 * stats.expand_rate); + printf(" speech_expand_rate: %f %%\n", 100.0 * stats.speech_expand_rate); + printf(" preemptive_rate: %f %%\n", 100.0 * stats.preemptive_rate); + printf(" accelerate_rate: %f %%\n", 100.0 * stats.accelerate_rate); + printf(" secondary_decoded_rate: %f %%\n", + 100.0 * stats.secondary_decoded_rate); + printf(" secondary_discarded_rate: %f %%\n", + 100.0 * stats.secondary_discarded_rate); + printf(" clockdrift_ppm: %f ppm\n", stats.clockdrift_ppm); + printf(" mean_waiting_time_ms: %f ms\n", stats.mean_waiting_time_ms); + printf(" median_waiting_time_ms: %f ms\n", stats.median_waiting_time_ms); + printf(" min_waiting_time_ms: %f ms\n", stats.min_waiting_time_ms); + printf(" max_waiting_time_ms: %f ms\n", stats.max_waiting_time_ms); + printf(" current_buffer_size_ms: %f ms\n", stats.current_buffer_size_ms); + printf(" preferred_buffer_size_ms: %f ms\n", stats.preferred_buffer_size_ms); + if (show_concealment_events_) { + printf(" concealment_events_ms:\n"); + for (auto concealment_event : stats_getter_->concealment_events()) + printf("%s\n", concealment_event.ToString().c_str()); + printf(" end of concealment_events_ms\n"); + } + + const auto lifetime_stats_vector = stats_getter_->lifetime_stats(); + if (!lifetime_stats_vector->empty()) { + auto lifetime_stats = lifetime_stats_vector->back().second; + printf(" total_samples_received: %" PRIu64 "\n", + lifetime_stats.total_samples_received); + printf(" concealed_samples: %" PRIu64 "\n", + lifetime_stats.concealed_samples); + printf(" concealment_events: %" PRIu64 "\n", + lifetime_stats.concealment_events); + printf(" delayed_packet_outage_samples: %" PRIu64 "\n", + lifetime_stats.delayed_packet_outage_samples); + printf(" num_interruptions: %d\n", lifetime_stats.interruption_count); + printf(" sum_interruption_length_ms: %d ms\n", + lifetime_stats.total_interruption_duration_ms); + printf(" interruption_ratio: %f\n", + static_cast<double>(lifetime_stats.total_interruption_duration_ms) / + simulation_time_ms); + printf(" removed_samples_for_acceleration: %" PRIu64 "\n", + lifetime_stats.removed_samples_for_acceleration); + printf(" inserted_samples_for_deceleration: %" PRIu64 "\n", + lifetime_stats.inserted_samples_for_deceleration); + printf(" generated_noise_samples: %" PRIu64 "\n", + lifetime_stats.generated_noise_samples); + printf(" packets_discarded: %" PRIu64 "\n", + lifetime_stats.packets_discarded); + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_plotter.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_plotter.h new file mode 100644 index 0000000000..11c16da9d1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_stats_plotter.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_STATS_PLOTTER_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_STATS_PLOTTER_H_ + +#include <memory> +#include <string> + +#include "absl/strings/string_view.h" +#include "modules/audio_coding/neteq/tools/neteq_delay_analyzer.h" +#include "modules/audio_coding/neteq/tools/neteq_stats_getter.h" +#include "modules/audio_coding/neteq/tools/neteq_test.h" + +namespace webrtc { +namespace test { + +class NetEqStatsPlotter : public NetEqSimulationEndedCallback { + public: + NetEqStatsPlotter(bool make_matlab_plot, + bool make_python_plot, + bool show_concealment_events, + absl::string_view base_file_name); + + void SimulationEnded(int64_t simulation_time_ms) override; + + NetEqStatsGetter* stats_getter() { return stats_getter_.get(); } + + private: + std::unique_ptr<NetEqStatsGetter> stats_getter_; + const bool make_matlab_plot_; + const bool make_python_plot_; + const bool show_concealment_events_; + const std::string base_file_name_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_STATS_PLOTTER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test.cc new file mode 100644 index 0000000000..19b1df11a1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test.cc @@ -0,0 +1,349 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/neteq_test.h" + +#include <iomanip> +#include <iostream> + +#include "modules/audio_coding/neteq/default_neteq_factory.h" +#include "modules/rtp_rtcp/source/byte_io.h" +#include "system_wrappers/include/clock.h" + +namespace webrtc { +namespace test { +namespace { + +absl::optional<NetEq::Operation> ActionToOperations( + absl::optional<NetEqSimulator::Action> a) { + if (!a) { + return absl::nullopt; + } + switch (*a) { + case NetEqSimulator::Action::kAccelerate: + return absl::make_optional(NetEq::Operation::kAccelerate); + case NetEqSimulator::Action::kExpand: + return absl::make_optional(NetEq::Operation::kExpand); + case NetEqSimulator::Action::kNormal: + return absl::make_optional(NetEq::Operation::kNormal); + case NetEqSimulator::Action::kPreemptiveExpand: + return absl::make_optional(NetEq::Operation::kPreemptiveExpand); + } +} + +std::unique_ptr<NetEq> CreateNetEq( + const NetEq::Config& config, + Clock* clock, + const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory) { + return DefaultNetEqFactory().CreateNetEq(config, decoder_factory, clock); +} + +} // namespace + +void DefaultNetEqTestErrorCallback::OnInsertPacketError( + const NetEqInput::PacketData& packet) { + std::cerr << "InsertPacket returned an error." << std::endl; + std::cerr << "Packet data: " << packet.ToString() << std::endl; + RTC_FATAL(); +} + +void DefaultNetEqTestErrorCallback::OnGetAudioError() { + std::cerr << "GetAudio returned an error." << std::endl; + RTC_FATAL(); +} + +NetEqTest::NetEqTest(const NetEq::Config& config, + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory, + const DecoderMap& codecs, + std::unique_ptr<std::ofstream> text_log, + NetEqFactory* neteq_factory, + std::unique_ptr<NetEqInput> input, + std::unique_ptr<AudioSink> output, + Callbacks callbacks) + : clock_(0), + neteq_(neteq_factory + ? neteq_factory->CreateNetEq(config, decoder_factory, &clock_) + : CreateNetEq(config, &clock_, decoder_factory)), + input_(std::move(input)), + output_(std::move(output)), + callbacks_(callbacks), + sample_rate_hz_(config.sample_rate_hz), + text_log_(std::move(text_log)) { + RTC_CHECK(!config.enable_muted_state) + << "The code does not handle enable_muted_state"; + RegisterDecoders(codecs); +} + +NetEqTest::~NetEqTest() = default; + +int64_t NetEqTest::Run() { + int64_t simulation_time = 0; + SimulationStepResult step_result; + do { + step_result = RunToNextGetAudio(); + simulation_time += step_result.simulation_step_ms; + } while (!step_result.is_simulation_finished); + if (callbacks_.simulation_ended_callback) { + callbacks_.simulation_ended_callback->SimulationEnded(simulation_time); + } + return simulation_time; +} + +NetEqTest::SimulationStepResult NetEqTest::RunToNextGetAudio() { + SimulationStepResult result; + const int64_t start_time_ms = *input_->NextEventTime(); + int64_t time_now_ms = start_time_ms; + current_state_.packet_iat_ms.clear(); + + while (!input_->ended()) { + // Advance time to next event. + RTC_DCHECK(input_->NextEventTime()); + clock_.AdvanceTimeMilliseconds(*input_->NextEventTime() - time_now_ms); + time_now_ms = *input_->NextEventTime(); + // Check if it is time to insert packet. + if (input_->NextPacketTime() && time_now_ms >= *input_->NextPacketTime()) { + std::unique_ptr<NetEqInput::PacketData> packet_data = input_->PopPacket(); + RTC_CHECK(packet_data); + const size_t payload_data_length = + packet_data->payload.size() - packet_data->header.paddingLength; + if (payload_data_length != 0) { + int error = neteq_->InsertPacket( + packet_data->header, + rtc::ArrayView<const uint8_t>(packet_data->payload)); + if (error != NetEq::kOK && callbacks_.error_callback) { + callbacks_.error_callback->OnInsertPacketError(*packet_data); + } + if (callbacks_.post_insert_packet) { + callbacks_.post_insert_packet->AfterInsertPacket(*packet_data, + neteq_.get()); + } + } else { + neteq_->InsertEmptyPacket(packet_data->header); + } + if (last_packet_time_ms_) { + current_state_.packet_iat_ms.push_back(time_now_ms - + *last_packet_time_ms_); + } + if (text_log_) { + const auto ops_state = neteq_->GetOperationsAndState(); + const auto delta_wallclock = + last_packet_time_ms_ ? (time_now_ms - *last_packet_time_ms_) : -1; + const auto delta_timestamp = + last_packet_timestamp_ + ? (static_cast<int64_t>(packet_data->header.timestamp) - + *last_packet_timestamp_) * + 1000 / sample_rate_hz_ + : -1; + const auto packet_size_bytes = + packet_data->payload.size() == 12 + ? ByteReader<uint32_t>::ReadLittleEndian( + &packet_data->payload[8]) + : -1; + *text_log_ << "Packet - wallclock: " << std::setw(5) << time_now_ms + << ", delta wc: " << std::setw(4) << delta_wallclock + << ", seq_no: " << packet_data->header.sequenceNumber + << ", timestamp: " << std::setw(10) + << packet_data->header.timestamp + << ", delta ts: " << std::setw(4) << delta_timestamp + << ", size: " << std::setw(5) << packet_size_bytes + << ", frame size: " << std::setw(3) + << ops_state.current_frame_size_ms + << ", buffer size: " << std::setw(4) + << ops_state.current_buffer_size_ms << std::endl; + } + last_packet_time_ms_ = absl::make_optional<int>(time_now_ms); + last_packet_timestamp_ = + absl::make_optional<uint32_t>(packet_data->header.timestamp); + } + + // Check if it is time to get output audio. + if (input_->NextOutputEventTime() && + time_now_ms >= *input_->NextOutputEventTime()) { + if (callbacks_.get_audio_callback) { + callbacks_.get_audio_callback->BeforeGetAudio(neteq_.get()); + } + AudioFrame out_frame; + bool muted; + int error = neteq_->GetAudio(&out_frame, &muted, nullptr, + ActionToOperations(next_action_)); + next_action_ = absl::nullopt; + RTC_CHECK(!muted) << "The code does not handle enable_muted_state"; + if (error != NetEq::kOK) { + if (callbacks_.error_callback) { + callbacks_.error_callback->OnGetAudioError(); + } + } else { + sample_rate_hz_ = out_frame.sample_rate_hz_; + } + if (callbacks_.get_audio_callback) { + callbacks_.get_audio_callback->AfterGetAudio(time_now_ms, out_frame, + muted, neteq_.get()); + } + + if (output_) { + RTC_CHECK(output_->WriteArray( + out_frame.data(), + out_frame.samples_per_channel_ * out_frame.num_channels_)); + } + + input_->AdvanceOutputEvent(); + result.simulation_step_ms = + input_->NextEventTime().value_or(time_now_ms) - start_time_ms; + const auto operations_state = neteq_->GetOperationsAndState(); + current_state_.current_delay_ms = operations_state.current_buffer_size_ms; + current_state_.packet_size_ms = operations_state.current_frame_size_ms; + current_state_.next_packet_available = + operations_state.next_packet_available; + current_state_.packet_buffer_flushed = + operations_state.packet_buffer_flushes > + prev_ops_state_.packet_buffer_flushes; + // TODO(ivoc): Add more accurate reporting by tracking the origin of + // samples in the sync buffer. + result.action_times_ms[Action::kExpand] = 0; + result.action_times_ms[Action::kAccelerate] = 0; + result.action_times_ms[Action::kPreemptiveExpand] = 0; + result.action_times_ms[Action::kNormal] = 0; + + if (out_frame.speech_type_ == AudioFrame::SpeechType::kPLC || + out_frame.speech_type_ == AudioFrame::SpeechType::kPLCCNG) { + // Consider the whole frame to be the result of expansion. + result.action_times_ms[Action::kExpand] = 10; + } else if (operations_state.accelerate_samples - + prev_ops_state_.accelerate_samples > + 0) { + // Consider the whole frame to be the result of acceleration. + result.action_times_ms[Action::kAccelerate] = 10; + } else if (operations_state.preemptive_samples - + prev_ops_state_.preemptive_samples > + 0) { + // Consider the whole frame to be the result of preemptive expansion. + result.action_times_ms[Action::kPreemptiveExpand] = 10; + } else { + // Consider the whole frame to be the result of normal playout. + result.action_times_ms[Action::kNormal] = 10; + } + auto lifetime_stats = LifetimeStats(); + if (text_log_) { + const bool plc = + (out_frame.speech_type_ == AudioFrame::SpeechType::kPLC) || + (out_frame.speech_type_ == AudioFrame::SpeechType::kPLCCNG); + const bool cng = out_frame.speech_type_ == AudioFrame::SpeechType::kCNG; + const bool voice_concealed = + (lifetime_stats.concealed_samples - + lifetime_stats.silent_concealed_samples) > + (prev_lifetime_stats_.concealed_samples - + prev_lifetime_stats_.silent_concealed_samples); + *text_log_ << "GetAudio - wallclock: " << std::setw(5) << time_now_ms + << ", delta wc: " << std::setw(4) + << (input_->NextEventTime().value_or(time_now_ms) - + start_time_ms) + << ", CNG: " << cng << ", PLC: " << plc + << ", voice concealed: " << voice_concealed + << ", buffer size: " << std::setw(4) + << current_state_.current_delay_ms << std::endl; + if (lifetime_stats.packets_discarded > + prev_lifetime_stats_.packets_discarded) { + *text_log_ << "Discarded " + << (lifetime_stats.packets_discarded - + prev_lifetime_stats_.packets_discarded) + << " primary packets." << std::endl; + } + if (operations_state.packet_buffer_flushes > + prev_ops_state_.packet_buffer_flushes) { + *text_log_ << "Flushed packet buffer " + << (operations_state.packet_buffer_flushes - + prev_ops_state_.packet_buffer_flushes) + << " times." << std::endl; + } + } + prev_lifetime_stats_ = lifetime_stats; + const bool no_more_packets_to_decode = + !input_->NextPacketTime() && !operations_state.next_packet_available; + // End the simulation if the gap is too large. This indicates an issue + // with the event log file. + const bool simulation_step_too_large = result.simulation_step_ms > 1000; + if (simulation_step_too_large) { + // If we don't reset the step time, the large gap will be included in + // the simulation time, which can be a large distortion. + result.simulation_step_ms = 10; + } + result.is_simulation_finished = simulation_step_too_large || + no_more_packets_to_decode || + input_->ended(); + prev_ops_state_ = operations_state; + return result; + } + } + result.simulation_step_ms = + input_->NextEventTime().value_or(time_now_ms) - start_time_ms; + result.is_simulation_finished = true; + return result; +} + +void NetEqTest::SetNextAction(NetEqTest::Action next_operation) { + next_action_ = absl::optional<Action>(next_operation); +} + +NetEqTest::NetEqState NetEqTest::GetNetEqState() { + return current_state_; +} + +NetEqNetworkStatistics NetEqTest::SimulationStats() { + NetEqNetworkStatistics stats; + RTC_CHECK_EQ(neteq_->NetworkStatistics(&stats), 0); + return stats; +} + +NetEqLifetimeStatistics NetEqTest::LifetimeStats() const { + return neteq_->GetLifetimeStatistics(); +} + +NetEqTest::DecoderMap NetEqTest::StandardDecoderMap() { + DecoderMap codecs = { + {0, SdpAudioFormat("pcmu", 8000, 1)}, + {8, SdpAudioFormat("pcma", 8000, 1)}, +#ifdef WEBRTC_CODEC_ILBC + {102, SdpAudioFormat("ilbc", 8000, 1)}, +#endif + {103, SdpAudioFormat("isac", 16000, 1)}, +#if !defined(WEBRTC_ANDROID) + {104, SdpAudioFormat("isac", 32000, 1)}, +#endif +#ifdef WEBRTC_CODEC_OPUS + {111, SdpAudioFormat("opus", 48000, 2)}, +#endif + {93, SdpAudioFormat("l16", 8000, 1)}, + {94, SdpAudioFormat("l16", 16000, 1)}, + {95, SdpAudioFormat("l16", 32000, 1)}, + {96, SdpAudioFormat("l16", 48000, 1)}, + {9, SdpAudioFormat("g722", 8000, 1)}, + {106, SdpAudioFormat("telephone-event", 8000, 1)}, + {114, SdpAudioFormat("telephone-event", 16000, 1)}, + {115, SdpAudioFormat("telephone-event", 32000, 1)}, + {116, SdpAudioFormat("telephone-event", 48000, 1)}, + {117, SdpAudioFormat("red", 8000, 1)}, + {13, SdpAudioFormat("cn", 8000, 1)}, + {98, SdpAudioFormat("cn", 16000, 1)}, + {99, SdpAudioFormat("cn", 32000, 1)}, + {100, SdpAudioFormat("cn", 48000, 1)} + }; + return codecs; +} + +void NetEqTest::RegisterDecoders(const DecoderMap& codecs) { + for (const auto& c : codecs) { + RTC_CHECK(neteq_->RegisterPayloadType(c.first, c.second)) + << "Cannot register " << c.second.name << " to payload type " + << c.first; + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test.h new file mode 100644 index 0000000000..0a6c24f3d6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_TEST_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_TEST_H_ + +#include <fstream> +#include <map> +#include <memory> +#include <string> +#include <utility> + +#include "absl/types/optional.h" +#include "api/audio_codecs/audio_decoder_factory.h" +#include "api/neteq/neteq.h" +#include "api/neteq/neteq_factory.h" +#include "api/test/neteq_simulator.h" +#include "modules/audio_coding/neteq/tools/audio_sink.h" +#include "modules/audio_coding/neteq/tools/neteq_input.h" +#include "system_wrappers/include/clock.h" + +namespace webrtc { +namespace test { + +class NetEqTestErrorCallback { + public: + virtual ~NetEqTestErrorCallback() = default; + virtual void OnInsertPacketError(const NetEqInput::PacketData& packet) {} + virtual void OnGetAudioError() {} +}; + +class DefaultNetEqTestErrorCallback : public NetEqTestErrorCallback { + void OnInsertPacketError(const NetEqInput::PacketData& packet) override; + void OnGetAudioError() override; +}; + +class NetEqPostInsertPacket { + public: + virtual ~NetEqPostInsertPacket() = default; + virtual void AfterInsertPacket(const NetEqInput::PacketData& packet, + NetEq* neteq) = 0; +}; + +class NetEqGetAudioCallback { + public: + virtual ~NetEqGetAudioCallback() = default; + virtual void BeforeGetAudio(NetEq* neteq) = 0; + virtual void AfterGetAudio(int64_t time_now_ms, + const AudioFrame& audio_frame, + bool muted, + NetEq* neteq) = 0; +}; + +class NetEqSimulationEndedCallback { + public: + virtual ~NetEqSimulationEndedCallback() = default; + virtual void SimulationEnded(int64_t simulation_time_ms) = 0; +}; + +// Class that provides an input--output test for NetEq. The input (both packets +// and output events) is provided by a NetEqInput object, while the output is +// directed to an AudioSink object. +class NetEqTest : public NetEqSimulator { + public: + using DecoderMap = std::map<int, SdpAudioFormat>; + + struct Callbacks { + NetEqTestErrorCallback* error_callback = nullptr; + NetEqPostInsertPacket* post_insert_packet = nullptr; + NetEqGetAudioCallback* get_audio_callback = nullptr; + NetEqSimulationEndedCallback* simulation_ended_callback = nullptr; + }; + + // Sets up the test with given configuration, codec mappings, input, ouput, + // and callback objects for error reporting. + NetEqTest(const NetEq::Config& config, + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory, + const DecoderMap& codecs, + std::unique_ptr<std::ofstream> text_log, + NetEqFactory* neteq_factory, + std::unique_ptr<NetEqInput> input, + std::unique_ptr<AudioSink> output, + Callbacks callbacks); + + ~NetEqTest() override; + + // Runs the test. Returns the duration of the produced audio in ms. + int64_t Run() override; + // Runs the simulation until we hit the next GetAudio event. If the simulation + // is finished, is_simulation_finished will be set to true in the returned + // SimulationStepResult. + SimulationStepResult RunToNextGetAudio() override; + + void SetNextAction(Action next_operation) override; + NetEqState GetNetEqState() override; + + // Returns the statistics from NetEq. + NetEqNetworkStatistics SimulationStats(); + NetEqLifetimeStatistics LifetimeStats() const; + + static DecoderMap StandardDecoderMap(); + + private: + void RegisterDecoders(const DecoderMap& codecs); + SimulatedClock clock_; + absl::optional<Action> next_action_; + absl::optional<int> last_packet_time_ms_; + std::unique_ptr<NetEq> neteq_; + std::unique_ptr<NetEqInput> input_; + std::unique_ptr<AudioSink> output_; + Callbacks callbacks_; + int sample_rate_hz_; + NetEqState current_state_; + NetEqOperationsAndState prev_ops_state_; + NetEqLifetimeStatistics prev_lifetime_stats_; + absl::optional<uint32_t> last_packet_timestamp_; + std::unique_ptr<std::ofstream> text_log_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_TEST_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test_factory.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test_factory.cc new file mode 100644 index 0000000000..6cd371406c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test_factory.cc @@ -0,0 +1,342 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/neteq_test_factory.h" + +#include <errno.h> +#include <limits.h> // For ULONG_MAX returned by strtoul. +#include <stdio.h> +#include <stdlib.h> // For strtoul. + +#include <fstream> +#include <iostream> +#include <memory> +#include <set> +#include <string> +#include <utility> + +#include "absl/strings/string_view.h" +#include "api/audio_codecs/builtin_audio_decoder_factory.h" +#include "api/neteq/neteq.h" +#include "modules/audio_coding/neteq/tools/audio_sink.h" +#include "modules/audio_coding/neteq/tools/fake_decode_from_file.h" +#include "modules/audio_coding/neteq/tools/initial_packet_inserter_neteq_input.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" +#include "modules/audio_coding/neteq/tools/neteq_delay_analyzer.h" +#include "modules/audio_coding/neteq/tools/neteq_event_log_input.h" +#include "modules/audio_coding/neteq/tools/neteq_packet_source_input.h" +#include "modules/audio_coding/neteq/tools/neteq_replacement_input.h" +#include "modules/audio_coding/neteq/tools/neteq_stats_getter.h" +#include "modules/audio_coding/neteq/tools/neteq_stats_plotter.h" +#include "modules/audio_coding/neteq/tools/neteq_test.h" +#include "modules/audio_coding/neteq/tools/output_audio_file.h" +#include "modules/audio_coding/neteq/tools/output_wav_file.h" +#include "modules/audio_coding/neteq/tools/rtp_file_source.h" +#include "rtc_base/checks.h" +#include "test/function_audio_decoder_factory.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace test { +namespace { + +absl::optional<int> CodecSampleRate( + uint8_t payload_type, + webrtc::test::NetEqTestFactory::Config config) { + if (payload_type == config.pcmu || payload_type == config.pcma || + payload_type == config.ilbc || payload_type == config.pcm16b || + payload_type == config.cn_nb || payload_type == config.avt) + return 8000; + if (payload_type == config.isac || payload_type == config.pcm16b_wb || + payload_type == config.g722 || payload_type == config.cn_wb || + payload_type == config.avt_16) + return 16000; + if (payload_type == config.isac_swb || payload_type == config.pcm16b_swb32 || + payload_type == config.cn_swb32 || payload_type == config.avt_32) + return 32000; + if (payload_type == config.opus || payload_type == config.pcm16b_swb48 || + payload_type == config.cn_swb48 || payload_type == config.avt_48) + return 48000; + if (payload_type == config.red) + return 0; + return absl::nullopt; +} + +} // namespace + +// A callback class which prints whenver the inserted packet stream changes +// the SSRC. +class SsrcSwitchDetector : public NetEqPostInsertPacket { + public: + // Takes a pointer to another callback object, which will be invoked after + // this object finishes. This does not transfer ownership, and null is a + // valid value. + explicit SsrcSwitchDetector(NetEqPostInsertPacket* other_callback) + : other_callback_(other_callback) {} + + void AfterInsertPacket(const NetEqInput::PacketData& packet, + NetEq* neteq) override { + if (last_ssrc_ && packet.header.ssrc != *last_ssrc_) { + std::cout << "Changing streams from 0x" << std::hex << *last_ssrc_ + << " to 0x" << std::hex << packet.header.ssrc << std::dec + << " (payload type " + << static_cast<int>(packet.header.payloadType) << ")" + << std::endl; + } + last_ssrc_ = packet.header.ssrc; + if (other_callback_) { + other_callback_->AfterInsertPacket(packet, neteq); + } + } + + private: + NetEqPostInsertPacket* other_callback_; + absl::optional<uint32_t> last_ssrc_; +}; + +NetEqTestFactory::NetEqTestFactory() = default; +NetEqTestFactory::~NetEqTestFactory() = default; + +NetEqTestFactory::Config::Config() = default; +NetEqTestFactory::Config::Config(const Config& other) = default; +NetEqTestFactory::Config::~Config() = default; + +std::unique_ptr<NetEqTest> NetEqTestFactory::InitializeTestFromString( + absl::string_view input_string, + NetEqFactory* factory, + const Config& config) { + std::unique_ptr<NetEqInput> input( + NetEqEventLogInput::CreateFromString(input_string, config.ssrc_filter)); + if (!input) { + std::cerr << "Error: Cannot parse input string" << std::endl; + return nullptr; + } + return InitializeTest(std::move(input), factory, config); +} + +std::unique_ptr<NetEqTest> NetEqTestFactory::InitializeTestFromFile( + absl::string_view input_file_name, + NetEqFactory* factory, + const Config& config) { + // Gather RTP header extensions in a map. + NetEqPacketSourceInput::RtpHeaderExtensionMap rtp_ext_map = { + {config.audio_level, kRtpExtensionAudioLevel}, + {config.abs_send_time, kRtpExtensionAbsoluteSendTime}, + {config.transport_seq_no, kRtpExtensionTransportSequenceNumber}, + {config.video_content_type, kRtpExtensionVideoContentType}, + {config.video_timing, kRtpExtensionVideoTiming}}; + + std::unique_ptr<NetEqInput> input; + if (RtpFileSource::ValidRtpDump(input_file_name) || + RtpFileSource::ValidPcap(input_file_name)) { + input.reset(new NetEqRtpDumpInput(input_file_name, rtp_ext_map, + config.ssrc_filter)); + } else { + input.reset(NetEqEventLogInput::CreateFromFile(input_file_name, + config.ssrc_filter)); + } + + std::cout << "Input file: " << input_file_name << std::endl; + if (!input) { + std::cerr << "Error: Cannot open input file" << std::endl; + return nullptr; + } + return InitializeTest(std::move(input), factory, config); +} + +std::unique_ptr<NetEqTest> NetEqTestFactory::InitializeTest( + std::unique_ptr<NetEqInput> input, + NetEqFactory* factory, + const Config& config) { + if (input->ended()) { + std::cerr << "Error: Input is empty" << std::endl; + return nullptr; + } + + if (!config.field_trial_string.empty()) { + field_trials_ = + std::make_unique<ScopedFieldTrials>(config.field_trial_string); + } + + // Skip some initial events/packets if requested. + if (config.skip_get_audio_events > 0) { + std::cout << "Skipping " << config.skip_get_audio_events + << " get_audio events" << std::endl; + if (!input->NextPacketTime() || !input->NextOutputEventTime()) { + std::cerr << "No events found" << std::endl; + return nullptr; + } + for (int i = 0; i < config.skip_get_audio_events; i++) { + input->AdvanceOutputEvent(); + if (!input->NextOutputEventTime()) { + std::cerr << "Not enough get_audio events found" << std::endl; + return nullptr; + } + } + while (*input->NextPacketTime() < *input->NextOutputEventTime()) { + input->PopPacket(); + if (!input->NextPacketTime()) { + std::cerr << "Not enough incoming packets found" << std::endl; + return nullptr; + } + } + } + + // Check the sample rate. + absl::optional<int> sample_rate_hz; + std::set<std::pair<int, uint32_t>> discarded_pt_and_ssrc; + while (absl::optional<RTPHeader> first_rtp_header = input->NextHeader()) { + RTC_DCHECK(first_rtp_header); + sample_rate_hz = CodecSampleRate(first_rtp_header->payloadType, config); + if (sample_rate_hz) { + std::cout << "Found valid packet with payload type " + << static_cast<int>(first_rtp_header->payloadType) + << " and SSRC 0x" << std::hex << first_rtp_header->ssrc + << std::dec << std::endl; + if (config.initial_dummy_packets > 0) { + std::cout << "Nr of initial dummy packets: " + << config.initial_dummy_packets << std::endl; + input = std::make_unique<InitialPacketInserterNetEqInput>( + std::move(input), config.initial_dummy_packets, *sample_rate_hz); + } + break; + } + // Discard this packet and move to the next. Keep track of discarded payload + // types and SSRCs. + discarded_pt_and_ssrc.emplace(first_rtp_header->payloadType, + first_rtp_header->ssrc); + input->PopPacket(); + } + if (!discarded_pt_and_ssrc.empty()) { + std::cout << "Discarded initial packets with the following payload types " + "and SSRCs:" + << std::endl; + for (const auto& d : discarded_pt_and_ssrc) { + std::cout << "PT " << d.first << "; SSRC 0x" << std::hex + << static_cast<int>(d.second) << std::dec << std::endl; + } + } + if (!sample_rate_hz) { + std::cerr << "Cannot find any packets with known payload types" + << std::endl; + return nullptr; + } + + // If an output file is requested, open it. + std::unique_ptr<AudioSink> output; + if (!config.output_audio_filename.has_value()) { + output = std::make_unique<VoidAudioSink>(); + std::cout << "No output audio file" << std::endl; + } else if (config.output_audio_filename->size() >= 4 && + config.output_audio_filename->substr( + config.output_audio_filename->size() - 4) == ".wav") { + // Open a wav file with the known sample rate. + output = std::make_unique<OutputWavFile>(*config.output_audio_filename, + *sample_rate_hz); + std::cout << "Output WAV file: " << *config.output_audio_filename + << std::endl; + } else { + // Open a pcm file. + output = std::make_unique<OutputAudioFile>(*config.output_audio_filename); + std::cout << "Output PCM file: " << *config.output_audio_filename + << std::endl; + } + + NetEqTest::DecoderMap codecs = NetEqTest::StandardDecoderMap(); + + rtc::scoped_refptr<AudioDecoderFactory> decoder_factory = + CreateBuiltinAudioDecoderFactory(); + + // Check if a replacement audio file was provided. + if (config.replacement_audio_file.size() > 0) { + // Find largest unused payload type. + int replacement_pt = 127; + while (codecs.find(replacement_pt) != codecs.end()) { + --replacement_pt; + if (replacement_pt <= 0) { + std::cerr << "Error: Unable to find available replacement payload type" + << std::endl; + return nullptr; + } + } + + auto std_set_int32_to_uint8 = [](const std::set<int32_t>& a) { + std::set<uint8_t> b; + for (auto& x : a) { + b.insert(static_cast<uint8_t>(x)); + } + return b; + }; + + std::set<uint8_t> cn_types = std_set_int32_to_uint8( + {config.cn_nb, config.cn_wb, config.cn_swb32, config.cn_swb48}); + std::set<uint8_t> forbidden_types = + std_set_int32_to_uint8({config.g722, config.red, config.avt, + config.avt_16, config.avt_32, config.avt_48}); + input.reset(new NetEqReplacementInput(std::move(input), replacement_pt, + cn_types, forbidden_types)); + + // Note that capture-by-copy implies that the lambda captures the value of + // decoder_factory before it's reassigned on the left-hand side. + decoder_factory = rtc::make_ref_counted<FunctionAudioDecoderFactory>( + [decoder_factory, config]( + const SdpAudioFormat& format, + absl::optional<AudioCodecPairId> codec_pair_id) { + std::unique_ptr<AudioDecoder> decoder = + decoder_factory->MakeAudioDecoder(format, codec_pair_id); + if (!decoder && format.name == "replacement") { + decoder = std::make_unique<FakeDecodeFromFile>( + std::make_unique<InputAudioFile>(config.replacement_audio_file), + format.clockrate_hz, format.num_channels > 1); + } + return decoder; + }); + + if (!codecs + .insert({replacement_pt, SdpAudioFormat("replacement", 48000, 1)}) + .second) { + std::cerr << "Error: Unable to insert replacement audio codec" + << std::endl; + return nullptr; + } + } + + // Create a text log output stream if needed. + std::unique_ptr<std::ofstream> text_log; + if (config.textlog && config.textlog_filename.has_value()) { + // Write to file. + text_log = std::make_unique<std::ofstream>(*config.textlog_filename); + } else if (config.textlog) { + // Print to stdout. + text_log = std::make_unique<std::ofstream>(); + text_log->basic_ios<char>::rdbuf(std::cout.rdbuf()); + } + + NetEqTest::Callbacks callbacks; + stats_plotter_ = std::make_unique<NetEqStatsPlotter>( + config.matlabplot, config.pythonplot, config.concealment_events, + config.plot_scripts_basename.value_or("")); + + ssrc_switch_detector_.reset( + new SsrcSwitchDetector(stats_plotter_->stats_getter()->delay_analyzer())); + callbacks.post_insert_packet = ssrc_switch_detector_.get(); + callbacks.get_audio_callback = stats_plotter_->stats_getter(); + callbacks.simulation_ended_callback = stats_plotter_.get(); + NetEq::Config neteq_config; + neteq_config.sample_rate_hz = *sample_rate_hz; + neteq_config.max_packets_in_buffer = config.max_nr_packets_in_buffer; + neteq_config.enable_fast_accelerate = config.enable_fast_accelerate; + return std::make_unique<NetEqTest>( + neteq_config, decoder_factory, codecs, std::move(text_log), factory, + std::move(input), std::move(output), callbacks); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test_factory.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test_factory.h new file mode 100644 index 0000000000..96ce0b4334 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_test_factory.h @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_TEST_FACTORY_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_TEST_FACTORY_H_ + +#include <memory> +#include <string> + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "modules/audio_coding/neteq/tools/neteq_test.h" +#include "test/field_trial.h" + +namespace webrtc { +namespace test { + +class SsrcSwitchDetector; +class NetEqStatsGetter; +class NetEqStatsPlotter; + +// Note that the NetEqTestFactory needs to be alive when the NetEqTest object is +// used for a simulation. +class NetEqTestFactory { + public: + NetEqTestFactory(); + ~NetEqTestFactory(); + struct Config { + Config(); + Config(const Config& other); + ~Config(); + // RTP payload type for PCM-u. + static constexpr int default_pcmu() { return 0; } + int pcmu = default_pcmu(); + // RTP payload type for PCM-a. + static constexpr int default_pcma() { return 8; } + int pcma = default_pcma(); + // RTP payload type for iLBC. + static constexpr int default_ilbc() { return 102; } + int ilbc = default_ilbc(); + // RTP payload type for iSAC. + static constexpr int default_isac() { return 103; } + int isac = default_isac(); + // RTP payload type for iSAC-swb (32 kHz). + static constexpr int default_isac_swb() { return 104; } + int isac_swb = default_isac_swb(); + // RTP payload type for Opus. + static constexpr int default_opus() { return 111; } + int opus = default_opus(); + // RTP payload type for PCM16b-nb (8 kHz). + static constexpr int default_pcm16b() { return 93; } + int pcm16b = default_pcm16b(); + // RTP payload type for PCM16b-wb (16 kHz). + static constexpr int default_pcm16b_wb() { return 94; } + int pcm16b_wb = default_pcm16b_wb(); + // RTP payload type for PCM16b-swb32 (32 kHz). + static constexpr int default_pcm16b_swb32() { return 95; } + int pcm16b_swb32 = default_pcm16b_swb32(); + // RTP payload type for PCM16b-swb48 (48 kHz). + static constexpr int default_pcm16b_swb48() { return 96; } + int pcm16b_swb48 = default_pcm16b_swb48(); + // RTP payload type for G.722. + static constexpr int default_g722() { return 9; } + int g722 = default_g722(); + // RTP payload type for AVT/DTMF (8 kHz). + static constexpr int default_avt() { return 106; } + int avt = default_avt(); + // RTP payload type for AVT/DTMF (16 kHz). + static constexpr int default_avt_16() { return 114; } + int avt_16 = default_avt_16(); + // RTP payload type for AVT/DTMF (32 kHz). + static constexpr int default_avt_32() { return 115; } + int avt_32 = default_avt_32(); + // RTP payload type for AVT/DTMF (48 kHz). + static constexpr int default_avt_48() { return 116; } + int avt_48 = default_avt_48(); + // RTP payload type for redundant audio (RED). + static constexpr int default_red() { return 117; } + int red = default_red(); + // RTP payload type for comfort noise (8 kHz). + static constexpr int default_cn_nb() { return 13; } + int cn_nb = default_cn_nb(); + // RTP payload type for comfort noise (16 kHz). + static constexpr int default_cn_wb() { return 98; } + int cn_wb = default_cn_wb(); + // RTP payload type for comfort noise (32 kHz). + static constexpr int default_cn_swb32() { return 99; } + int cn_swb32 = default_cn_swb32(); + // RTP payload type for comfort noise (48 kHz). + static constexpr int default_cn_swb48() { return 100; } + int cn_swb48 = default_cn_swb48(); + // A PCM file that will be used to populate dummy RTP packets. + std::string replacement_audio_file; + // Only use packets with this SSRC. + absl::optional<uint32_t> ssrc_filter; + // Extension ID for audio level (RFC 6464). + static constexpr int default_audio_level() { return 1; } + int audio_level = default_audio_level(); + // Extension ID for absolute sender time. + static constexpr int default_abs_send_time() { return 3; } + int abs_send_time = default_abs_send_time(); + // Extension ID for transport sequence number. + static constexpr int default_transport_seq_no() { return 5; } + int transport_seq_no = default_transport_seq_no(); + // Extension ID for video content type. + static constexpr int default_video_content_type() { return 7; } + int video_content_type = default_video_content_type(); + // Extension ID for video timing. + static constexpr int default_video_timing() { return 8; } + int video_timing = default_video_timing(); + // Generate a matlab script for plotting the delay profile. + bool matlabplot = false; + // Generates a python script for plotting the delay profile. + bool pythonplot = false; + // Prints concealment events. + bool concealment_events = false; + // Maximum allowed number of packets in the buffer. + static constexpr int default_max_nr_packets_in_buffer() { return 200; } + int max_nr_packets_in_buffer = default_max_nr_packets_in_buffer(); + // Number of dummy packets to put in the packet buffer at the start of the + // simulation. + static constexpr int default_initial_dummy_packets() { return 0; } + int initial_dummy_packets = default_initial_dummy_packets(); + // Number of getAudio events to skip at the start of the simulation. + static constexpr int default_skip_get_audio_events() { return 0; } + int skip_get_audio_events = default_skip_get_audio_events(); + // Enables jitter buffer fast accelerate. + bool enable_fast_accelerate = false; + // Dumps events that describes the simulation on a step-by-step basis. + bool textlog = false; + // If specified and `textlog` is true, the output of `textlog` is written to + // the specified file name. + absl::optional<std::string> textlog_filename; + // Base name for the output script files for plotting the delay profile. + absl::optional<std::string> plot_scripts_basename; + // Path to the output audio file. + absl::optional<std::string> output_audio_filename; + // Field trials to use during the simulation. + std::string field_trial_string; + }; + + std::unique_ptr<NetEqTest> InitializeTestFromFile( + absl::string_view input_filename, + NetEqFactory* neteq_factory, + const Config& config); + std::unique_ptr<NetEqTest> InitializeTestFromString( + absl::string_view input_string, + NetEqFactory* neteq_factory, + const Config& config); + + private: + std::unique_ptr<NetEqTest> InitializeTest(std::unique_ptr<NetEqInput> input, + NetEqFactory* neteq_factory, + const Config& config); + std::unique_ptr<SsrcSwitchDetector> ssrc_switch_detector_; + std::unique_ptr<NetEqStatsPlotter> stats_plotter_; + // The field trials are stored in the test factory, because neteq_test is not + // in a testonly target, and therefore cannot use ScopedFieldTrials. + std::unique_ptr<ScopedFieldTrials> field_trials_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_NETEQ_TEST_FACTORY_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/output_audio_file.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/output_audio_file.h new file mode 100644 index 0000000000..25577fc882 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/output_audio_file.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_OUTPUT_AUDIO_FILE_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_OUTPUT_AUDIO_FILE_H_ + +#include <stdio.h> + +#include <string> + +#include "absl/strings/string_view.h" +#include "modules/audio_coding/neteq/tools/audio_sink.h" + +namespace webrtc { +namespace test { + +class OutputAudioFile : public AudioSink { + public: + // Creates an OutputAudioFile, opening a file named `file_name` for writing. + // The file format is 16-bit signed host-endian PCM. + explicit OutputAudioFile(absl::string_view file_name) { + out_file_ = fopen(std::string(file_name).c_str(), "wb"); + } + + virtual ~OutputAudioFile() { + if (out_file_) + fclose(out_file_); + } + + OutputAudioFile(const OutputAudioFile&) = delete; + OutputAudioFile& operator=(const OutputAudioFile&) = delete; + + bool WriteArray(const int16_t* audio, size_t num_samples) override { + RTC_DCHECK(out_file_); + return fwrite(audio, sizeof(*audio), num_samples, out_file_) == num_samples; + } + + private: + FILE* out_file_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_OUTPUT_AUDIO_FILE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/output_wav_file.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/output_wav_file.h new file mode 100644 index 0000000000..20eedfb554 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/output_wav_file.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_OUTPUT_WAV_FILE_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_OUTPUT_WAV_FILE_H_ + +#include <string> + +#include "absl/strings/string_view.h" +#include "common_audio/wav_file.h" +#include "modules/audio_coding/neteq/tools/audio_sink.h" + +namespace webrtc { +namespace test { + +class OutputWavFile : public AudioSink { + public: + // Creates an OutputWavFile, opening a file named `file_name` for writing. + // The output file is a PCM encoded wav file. + OutputWavFile(absl::string_view file_name, + int sample_rate_hz, + int num_channels = 1) + : wav_writer_(file_name, sample_rate_hz, num_channels) {} + + OutputWavFile(const OutputWavFile&) = delete; + OutputWavFile& operator=(const OutputWavFile&) = delete; + + bool WriteArray(const int16_t* audio, size_t num_samples) override { + wav_writer_.WriteSamples(audio, num_samples); + return true; + } + + private: + WavWriter wav_writer_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_OUTPUT_WAV_FILE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet.cc new file mode 100644 index 0000000000..e540173f43 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet.cc @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/packet.h" + +#include "api/array_view.h" +#include "modules/rtp_rtcp/source/rtp_packet_received.h" +#include "rtc_base/checks.h" +#include "rtc_base/copy_on_write_buffer.h" + +namespace webrtc { +namespace test { + +Packet::Packet(rtc::CopyOnWriteBuffer packet, + size_t virtual_packet_length_bytes, + double time_ms, + const RtpHeaderExtensionMap* extension_map) + : packet_(std::move(packet)), + virtual_packet_length_bytes_(virtual_packet_length_bytes), + time_ms_(time_ms), + valid_header_(ParseHeader(extension_map)) {} + +Packet::Packet(const RTPHeader& header, + size_t virtual_packet_length_bytes, + size_t virtual_payload_length_bytes, + double time_ms) + : header_(header), + virtual_packet_length_bytes_(virtual_packet_length_bytes), + virtual_payload_length_bytes_(virtual_payload_length_bytes), + time_ms_(time_ms), + valid_header_(true) {} + +Packet::~Packet() = default; + +bool Packet::ExtractRedHeaders(std::list<RTPHeader*>* headers) const { + // + // 0 1 2 3 + // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // |1| block PT | timestamp offset | block length | + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // |1| ... | + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // |0| block PT | + // +-+-+-+-+-+-+-+-+ + // + + const uint8_t* payload_ptr = payload(); + const uint8_t* payload_end_ptr = payload_ptr + payload_length_bytes(); + + // Find all RED headers with the extension bit set to 1. That is, all headers + // but the last one. + while ((payload_ptr < payload_end_ptr) && (*payload_ptr & 0x80)) { + RTPHeader* header = new RTPHeader; + CopyToHeader(header); + header->payloadType = payload_ptr[0] & 0x7F; + uint32_t offset = (payload_ptr[1] << 6) + ((payload_ptr[2] & 0xFC) >> 2); + header->timestamp -= offset; + headers->push_front(header); + payload_ptr += 4; + } + // Last header. + RTC_DCHECK_LT(payload_ptr, payload_end_ptr); + if (payload_ptr >= payload_end_ptr) { + return false; // Payload too short. + } + RTPHeader* header = new RTPHeader; + CopyToHeader(header); + header->payloadType = payload_ptr[0] & 0x7F; + headers->push_front(header); + return true; +} + +void Packet::DeleteRedHeaders(std::list<RTPHeader*>* headers) { + while (!headers->empty()) { + delete headers->front(); + headers->pop_front(); + } +} + +bool Packet::ParseHeader(const RtpHeaderExtensionMap* extension_map) { + // Use RtpPacketReceived instead of RtpPacket because former already has a + // converter into legacy RTPHeader. + webrtc::RtpPacketReceived rtp_packet(extension_map); + + // Because of the special case of dummy packets that have padding marked in + // the RTP header, but do not have rtp payload with the padding size, handle + // padding manually. Regular RTP packet parser reports failure, but it is fine + // in this context. + bool padding = (packet_[0] & 0b0010'0000); + size_t padding_size = 0; + if (padding) { + // Clear the padding bit to prevent failure when rtp payload is omited. + rtc::CopyOnWriteBuffer packet(packet_); + packet.MutableData()[0] &= ~0b0010'0000; + if (!rtp_packet.Parse(std::move(packet))) { + return false; + } + if (rtp_packet.payload_size() > 0) { + padding_size = rtp_packet.data()[rtp_packet.size() - 1]; + } + if (padding_size > rtp_packet.payload_size()) { + return false; + } + } else { + if (!rtp_packet.Parse(packet_)) { + return false; + } + } + rtp_payload_ = rtc::MakeArrayView(packet_.data() + rtp_packet.headers_size(), + rtp_packet.payload_size() - padding_size); + rtp_packet.GetHeader(&header_); + + RTC_CHECK_GE(virtual_packet_length_bytes_, rtp_packet.size()); + RTC_DCHECK_GE(virtual_packet_length_bytes_, rtp_packet.headers_size()); + virtual_payload_length_bytes_ = + virtual_packet_length_bytes_ - rtp_packet.headers_size(); + return true; +} + +void Packet::CopyToHeader(RTPHeader* destination) const { + *destination = header_; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet.h new file mode 100644 index 0000000000..96710907df --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_PACKET_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_PACKET_H_ + +#include <list> + +#include "api/array_view.h" +#include "api/rtp_headers.h" +#include "modules/rtp_rtcp/include/rtp_header_extension_map.h" +#include "rtc_base/copy_on_write_buffer.h" + +namespace webrtc { +namespace test { + +// Class for handling RTP packets in test applications. +class Packet { + public: + // Creates a packet, with the packet payload (including header bytes) in + // `packet`. The `time_ms` is an extra time associated with this packet, + // typically used to denote arrival time. + // `virtual_packet_length_bytes` is typically used when reading RTP dump files + // that only contain the RTP headers, and no payload (a.k.a RTP dummy files or + // RTP light). The `virtual_packet_length_bytes` tells what size the packet + // had on wire, including the now discarded payload. + Packet(rtc::CopyOnWriteBuffer packet, + size_t virtual_packet_length_bytes, + double time_ms, + const RtpHeaderExtensionMap* extension_map = nullptr); + + Packet(rtc::CopyOnWriteBuffer packet, + double time_ms, + const RtpHeaderExtensionMap* extension_map = nullptr) + : Packet(packet, packet.size(), time_ms, extension_map) {} + + // Same as above, but creates the packet from an already parsed RTPHeader. + // This is typically used when reading RTP dump files that only contain the + // RTP headers, and no payload. The `virtual_packet_length_bytes` tells what + // size the packet had on wire, including the now discarded payload, + // The `virtual_payload_length_bytes` tells the size of the payload. + Packet(const RTPHeader& header, + size_t virtual_packet_length_bytes, + size_t virtual_payload_length_bytes, + double time_ms); + + virtual ~Packet(); + + Packet(const Packet&) = delete; + Packet& operator=(const Packet&) = delete; + + // Parses the first bytes of the RTP payload, interpreting them as RED headers + // according to RFC 2198. The headers will be inserted into `headers`. The + // caller of the method assumes ownership of the objects in the list, and + // must delete them properly. + bool ExtractRedHeaders(std::list<RTPHeader*>* headers) const; + + // Deletes all RTPHeader objects in `headers`, but does not delete `headers` + // itself. + static void DeleteRedHeaders(std::list<RTPHeader*>* headers); + + const uint8_t* payload() const { return rtp_payload_.data(); } + + size_t packet_length_bytes() const { return packet_.size(); } + + size_t payload_length_bytes() const { return rtp_payload_.size(); } + + size_t virtual_packet_length_bytes() const { + return virtual_packet_length_bytes_; + } + + size_t virtual_payload_length_bytes() const { + return virtual_payload_length_bytes_; + } + + const RTPHeader& header() const { return header_; } + + double time_ms() const { return time_ms_; } + bool valid_header() const { return valid_header_; } + + private: + bool ParseHeader(const RtpHeaderExtensionMap* extension_map); + void CopyToHeader(RTPHeader* destination) const; + + RTPHeader header_; + const rtc::CopyOnWriteBuffer packet_; + rtc::ArrayView<const uint8_t> rtp_payload_; // Empty for dummy RTP packets. + // Virtual lengths are used when parsing RTP header files (dummy RTP files). + const size_t virtual_packet_length_bytes_; + size_t virtual_payload_length_bytes_ = 0; + const double time_ms_; // Used to denote a packet's arrival time. + const bool valid_header_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_PACKET_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet_source.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet_source.cc new file mode 100644 index 0000000000..598ae6edd4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet_source.cc @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/packet_source.h" + +namespace webrtc { +namespace test { + +PacketSource::PacketSource() = default; + +PacketSource::~PacketSource() = default; + +void PacketSource::FilterOutPayloadType(uint8_t payload_type) { + filter_.set(payload_type, true); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet_source.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet_source.h new file mode 100644 index 0000000000..be1705cae1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet_source.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_PACKET_SOURCE_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_PACKET_SOURCE_H_ + +#include <bitset> +#include <memory> + +#include "modules/audio_coding/neteq/tools/packet.h" + +namespace webrtc { +namespace test { + +// Interface class for an object delivering RTP packets to test applications. +class PacketSource { + public: + PacketSource(); + virtual ~PacketSource(); + + PacketSource(const PacketSource&) = delete; + PacketSource& operator=(const PacketSource&) = delete; + + // Returns next packet. Returns nullptr if the source is depleted, or if an + // error occurred. + virtual std::unique_ptr<Packet> NextPacket() = 0; + + virtual void FilterOutPayloadType(uint8_t payload_type); + + protected: + std::bitset<128> filter_; // Payload type is 7 bits in the RFC. +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_PACKET_SOURCE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet_unittest.cc new file mode 100644 index 0000000000..69cf56b529 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/packet_unittest.cc @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for test Packet class. + +#include "modules/audio_coding/neteq/tools/packet.h" + +#include "test/gtest.h" + +namespace webrtc { +namespace test { + +namespace { +const int kHeaderLengthBytes = 12; + +void MakeRtpHeader(int payload_type, + int seq_number, + uint32_t timestamp, + uint32_t ssrc, + uint8_t* rtp_data) { + rtp_data[0] = 0x80; + rtp_data[1] = static_cast<uint8_t>(payload_type); + rtp_data[2] = (seq_number >> 8) & 0xFF; + rtp_data[3] = (seq_number)&0xFF; + rtp_data[4] = timestamp >> 24; + rtp_data[5] = (timestamp >> 16) & 0xFF; + rtp_data[6] = (timestamp >> 8) & 0xFF; + rtp_data[7] = timestamp & 0xFF; + rtp_data[8] = ssrc >> 24; + rtp_data[9] = (ssrc >> 16) & 0xFF; + rtp_data[10] = (ssrc >> 8) & 0xFF; + rtp_data[11] = ssrc & 0xFF; +} +} // namespace + +TEST(TestPacket, RegularPacket) { + const size_t kPacketLengthBytes = 100; + rtc::CopyOnWriteBuffer packet_memory(kPacketLengthBytes); + const uint8_t kPayloadType = 17; + const uint16_t kSequenceNumber = 4711; + const uint32_t kTimestamp = 47114711; + const uint32_t kSsrc = 0x12345678; + MakeRtpHeader(kPayloadType, kSequenceNumber, kTimestamp, kSsrc, + packet_memory.MutableData()); + const double kPacketTime = 1.0; + Packet packet(std::move(packet_memory), kPacketTime); + ASSERT_TRUE(packet.valid_header()); + EXPECT_EQ(kPayloadType, packet.header().payloadType); + EXPECT_EQ(kSequenceNumber, packet.header().sequenceNumber); + EXPECT_EQ(kTimestamp, packet.header().timestamp); + EXPECT_EQ(kSsrc, packet.header().ssrc); + EXPECT_EQ(0, packet.header().numCSRCs); + EXPECT_EQ(kPacketLengthBytes, packet.packet_length_bytes()); + EXPECT_EQ(kPacketLengthBytes - kHeaderLengthBytes, + packet.payload_length_bytes()); + EXPECT_EQ(kPacketLengthBytes, packet.virtual_packet_length_bytes()); + EXPECT_EQ(kPacketLengthBytes - kHeaderLengthBytes, + packet.virtual_payload_length_bytes()); + EXPECT_EQ(kPacketTime, packet.time_ms()); +} + +TEST(TestPacket, DummyPacket) { + const size_t kPacketLengthBytes = kHeaderLengthBytes; // Only RTP header. + const size_t kVirtualPacketLengthBytes = 100; + rtc::CopyOnWriteBuffer packet_memory(kPacketLengthBytes); + const uint8_t kPayloadType = 17; + const uint16_t kSequenceNumber = 4711; + const uint32_t kTimestamp = 47114711; + const uint32_t kSsrc = 0x12345678; + MakeRtpHeader(kPayloadType, kSequenceNumber, kTimestamp, kSsrc, + packet_memory.MutableData()); + const double kPacketTime = 1.0; + Packet packet(std::move(packet_memory), kVirtualPacketLengthBytes, + kPacketTime); + ASSERT_TRUE(packet.valid_header()); + EXPECT_EQ(kPayloadType, packet.header().payloadType); + EXPECT_EQ(kSequenceNumber, packet.header().sequenceNumber); + EXPECT_EQ(kTimestamp, packet.header().timestamp); + EXPECT_EQ(kSsrc, packet.header().ssrc); + EXPECT_EQ(0, packet.header().numCSRCs); + EXPECT_EQ(kPacketLengthBytes, packet.packet_length_bytes()); + EXPECT_EQ(kPacketLengthBytes - kHeaderLengthBytes, + packet.payload_length_bytes()); + EXPECT_EQ(kVirtualPacketLengthBytes, packet.virtual_packet_length_bytes()); + EXPECT_EQ(kVirtualPacketLengthBytes - kHeaderLengthBytes, + packet.virtual_payload_length_bytes()); + EXPECT_EQ(kPacketTime, packet.time_ms()); +} + +TEST(TestPacket, DummyPaddingPacket) { + const size_t kPacketLengthBytes = kHeaderLengthBytes; // Only RTP header. + const size_t kVirtualPacketLengthBytes = 100; + rtc::CopyOnWriteBuffer packet_memory(kPacketLengthBytes); + const uint8_t kPayloadType = 17; + const uint16_t kSequenceNumber = 4711; + const uint32_t kTimestamp = 47114711; + const uint32_t kSsrc = 0x12345678; + MakeRtpHeader(kPayloadType, kSequenceNumber, kTimestamp, kSsrc, + packet_memory.MutableData()); + packet_memory.MutableData()[0] |= 0b0010'0000; // Set the padding bit. + const double kPacketTime = 1.0; + Packet packet(std::move(packet_memory), kVirtualPacketLengthBytes, + kPacketTime); + ASSERT_TRUE(packet.valid_header()); + EXPECT_EQ(kPayloadType, packet.header().payloadType); + EXPECT_EQ(kSequenceNumber, packet.header().sequenceNumber); + EXPECT_EQ(kTimestamp, packet.header().timestamp); + EXPECT_EQ(kSsrc, packet.header().ssrc); + EXPECT_EQ(0, packet.header().numCSRCs); + EXPECT_EQ(kPacketLengthBytes, packet.packet_length_bytes()); + EXPECT_EQ(kPacketLengthBytes - kHeaderLengthBytes, + packet.payload_length_bytes()); + EXPECT_EQ(kVirtualPacketLengthBytes, packet.virtual_packet_length_bytes()); + EXPECT_EQ(kVirtualPacketLengthBytes - kHeaderLengthBytes, + packet.virtual_payload_length_bytes()); + EXPECT_EQ(kPacketTime, packet.time_ms()); +} + +namespace { +// Writes one RED block header starting at `rtp_data`, according to RFC 2198. +// returns the number of bytes written (1 or 4). +// +// Format if `last_payoad` is false: +// 0 1 2 3 +// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// |1| block PT | timestamp offset | block length | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// +// Format if `last_payoad` is true: +// 0 1 2 3 4 5 6 7 +// +-+-+-+-+-+-+-+-+ +// |0| Block PT | +// +-+-+-+-+-+-+-+-+ + +int MakeRedHeader(int payload_type, + uint32_t timestamp_offset, + int block_length, + bool last_payload, + uint8_t* rtp_data) { + rtp_data[0] = 0x80 | (payload_type & 0x7F); // Set the first bit to 1. + if (last_payload) { + rtp_data[0] &= 0x7F; // Reset the first but to 0 to indicate last block. + return 1; + } + rtp_data[1] = timestamp_offset >> 6; + rtp_data[2] = (timestamp_offset & 0x3F) << 2; + rtp_data[2] |= block_length >> 8; + rtp_data[3] = block_length & 0xFF; + return 4; +} +} // namespace + +TEST(TestPacket, RED) { + const size_t kPacketLengthBytes = 100; + rtc::CopyOnWriteBuffer packet_memory(kPacketLengthBytes); + const uint8_t kRedPayloadType = 17; + const uint16_t kSequenceNumber = 4711; + const uint32_t kTimestamp = 47114711; + const uint32_t kSsrc = 0x12345678; + MakeRtpHeader(kRedPayloadType, kSequenceNumber, kTimestamp, kSsrc, + packet_memory.MutableData()); + // Create four RED headers. + // Payload types are just the same as the block index the offset is 100 times + // the block index. + const int kRedBlocks = 4; + uint8_t* payload_ptr = packet_memory.MutableData() + + kHeaderLengthBytes; // First byte after header. + for (int i = 0; i < kRedBlocks; ++i) { + int payload_type = i; + // Offset value is not used for the last block. + uint32_t timestamp_offset = 100 * i; + int block_length = 10 * i; + bool last_block = (i == kRedBlocks - 1) ? true : false; + payload_ptr += MakeRedHeader(payload_type, timestamp_offset, block_length, + last_block, payload_ptr); + } + const double kPacketTime = 1.0; + // Hand over ownership of `packet_memory` to `packet`. + Packet packet(packet_memory, kPacketLengthBytes, kPacketTime); + ASSERT_TRUE(packet.valid_header()); + EXPECT_EQ(kRedPayloadType, packet.header().payloadType); + EXPECT_EQ(kSequenceNumber, packet.header().sequenceNumber); + EXPECT_EQ(kTimestamp, packet.header().timestamp); + EXPECT_EQ(kSsrc, packet.header().ssrc); + EXPECT_EQ(0, packet.header().numCSRCs); + EXPECT_EQ(kPacketLengthBytes, packet.packet_length_bytes()); + EXPECT_EQ(kPacketLengthBytes - kHeaderLengthBytes, + packet.payload_length_bytes()); + EXPECT_EQ(kPacketLengthBytes, packet.virtual_packet_length_bytes()); + EXPECT_EQ(kPacketLengthBytes - kHeaderLengthBytes, + packet.virtual_payload_length_bytes()); + EXPECT_EQ(kPacketTime, packet.time_ms()); + std::list<RTPHeader*> red_headers; + EXPECT_TRUE(packet.ExtractRedHeaders(&red_headers)); + EXPECT_EQ(kRedBlocks, static_cast<int>(red_headers.size())); + int block_index = 0; + for (std::list<RTPHeader*>::reverse_iterator it = red_headers.rbegin(); + it != red_headers.rend(); ++it) { + // Reading list from the back, since the extraction puts the main payload + // (which is the last one on wire) first. + RTPHeader* red_block = *it; + EXPECT_EQ(block_index, red_block->payloadType); + EXPECT_EQ(kSequenceNumber, red_block->sequenceNumber); + if (block_index == kRedBlocks - 1) { + // Last block has zero offset per definition. + EXPECT_EQ(kTimestamp, red_block->timestamp); + } else { + EXPECT_EQ(kTimestamp - 100 * block_index, red_block->timestamp); + } + EXPECT_EQ(kSsrc, red_block->ssrc); + EXPECT_EQ(0, red_block->numCSRCs); + ++block_index; + } + Packet::DeleteRedHeaders(&red_headers); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.cc new file mode 100644 index 0000000000..5050e1fb17 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.cc @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/resample_input_audio_file.h" + +#include <memory> + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +bool ResampleInputAudioFile::Read(size_t samples, + int output_rate_hz, + int16_t* destination) { + const size_t samples_to_read = samples * file_rate_hz_ / output_rate_hz; + RTC_CHECK_EQ(samples_to_read * output_rate_hz, samples * file_rate_hz_) + << "Frame size and sample rates don't add up to an integer."; + std::unique_ptr<int16_t[]> temp_destination(new int16_t[samples_to_read]); + if (!InputAudioFile::Read(samples_to_read, temp_destination.get())) + return false; + resampler_.ResetIfNeeded(file_rate_hz_, output_rate_hz, 1); + size_t output_length = 0; + RTC_CHECK_EQ(resampler_.Push(temp_destination.get(), samples_to_read, + destination, samples, output_length), + 0); + RTC_CHECK_EQ(samples, output_length); + return true; +} + +bool ResampleInputAudioFile::Read(size_t samples, int16_t* destination) { + RTC_CHECK_GT(output_rate_hz_, 0) << "Output rate not set."; + return Read(samples, output_rate_hz_, destination); +} + +void ResampleInputAudioFile::set_output_rate_hz(int rate_hz) { + output_rate_hz_ = rate_hz; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.h new file mode 100644 index 0000000000..279fece616 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_RESAMPLE_INPUT_AUDIO_FILE_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_RESAMPLE_INPUT_AUDIO_FILE_H_ + +#include <string> + +#include "absl/strings/string_view.h" +#include "common_audio/resampler/include/resampler.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" + +namespace webrtc { +namespace test { + +// Class for handling a looping input audio file with resampling. +class ResampleInputAudioFile : public InputAudioFile { + public: + ResampleInputAudioFile(absl::string_view file_name, + int file_rate_hz, + bool loop_at_end = true) + : InputAudioFile(file_name, loop_at_end), + file_rate_hz_(file_rate_hz), + output_rate_hz_(-1) {} + ResampleInputAudioFile(absl::string_view file_name, + int file_rate_hz, + int output_rate_hz, + bool loop_at_end = true) + : InputAudioFile(file_name, loop_at_end), + file_rate_hz_(file_rate_hz), + output_rate_hz_(output_rate_hz) {} + + ResampleInputAudioFile(const ResampleInputAudioFile&) = delete; + ResampleInputAudioFile& operator=(const ResampleInputAudioFile&) = delete; + + bool Read(size_t samples, int output_rate_hz, int16_t* destination); + bool Read(size_t samples, int16_t* destination) override; + void set_output_rate_hz(int rate_hz); + + private: + const int file_rate_hz_; + int output_rate_hz_; + Resampler resampler_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_RESAMPLE_INPUT_AUDIO_FILE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtc_event_log_source.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtc_event_log_source.cc new file mode 100644 index 0000000000..1407aab5f2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtc_event_log_source.cc @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/rtc_event_log_source.h" + +#include <string.h> + +#include <iostream> +#include <limits> +#include <memory> +#include <set> +#include <utility> + +#include "absl/strings/string_view.h" +#include "logging/rtc_event_log/rtc_event_processor.h" +#include "modules/audio_coding/neteq/tools/packet.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +namespace { +bool ShouldSkipStream(ParsedRtcEventLog::MediaType media_type, + uint32_t ssrc, + absl::optional<uint32_t> ssrc_filter) { + if (media_type != ParsedRtcEventLog::MediaType::AUDIO) + return true; + if (ssrc_filter.has_value() && ssrc != *ssrc_filter) + return true; + return false; +} +} // namespace + +std::unique_ptr<RtcEventLogSource> RtcEventLogSource::CreateFromFile( + absl::string_view file_name, + absl::optional<uint32_t> ssrc_filter) { + auto source = std::unique_ptr<RtcEventLogSource>(new RtcEventLogSource()); + ParsedRtcEventLog parsed_log; + auto status = parsed_log.ParseFile(file_name); + if (!status.ok()) { + std::cerr << "Failed to parse event log: " << status.message() << std::endl; + std::cerr << "Skipping log." << std::endl; + return nullptr; + } + if (!source->Initialize(parsed_log, ssrc_filter)) { + std::cerr << "Failed to initialize source from event log, skipping." + << std::endl; + return nullptr; + } + return source; +} + +std::unique_ptr<RtcEventLogSource> RtcEventLogSource::CreateFromString( + absl::string_view file_contents, + absl::optional<uint32_t> ssrc_filter) { + auto source = std::unique_ptr<RtcEventLogSource>(new RtcEventLogSource()); + ParsedRtcEventLog parsed_log; + auto status = parsed_log.ParseString(file_contents); + if (!status.ok()) { + std::cerr << "Failed to parse event log: " << status.message() << std::endl; + std::cerr << "Skipping log." << std::endl; + return nullptr; + } + if (!source->Initialize(parsed_log, ssrc_filter)) { + std::cerr << "Failed to initialize source from event log, skipping." + << std::endl; + return nullptr; + } + return source; +} + +RtcEventLogSource::~RtcEventLogSource() {} + +std::unique_ptr<Packet> RtcEventLogSource::NextPacket() { + if (rtp_packet_index_ >= rtp_packets_.size()) + return nullptr; + + std::unique_ptr<Packet> packet = std::move(rtp_packets_[rtp_packet_index_++]); + return packet; +} + +int64_t RtcEventLogSource::NextAudioOutputEventMs() { + if (audio_output_index_ >= audio_outputs_.size()) + return std::numeric_limits<int64_t>::max(); + + int64_t output_time_ms = audio_outputs_[audio_output_index_++]; + return output_time_ms; +} + +RtcEventLogSource::RtcEventLogSource() : PacketSource() {} + +bool RtcEventLogSource::Initialize(const ParsedRtcEventLog& parsed_log, + absl::optional<uint32_t> ssrc_filter) { + const auto first_log_end_time_us = + parsed_log.stop_log_events().empty() + ? std::numeric_limits<int64_t>::max() + : parsed_log.stop_log_events().front().log_time_us(); + + std::set<uint32_t> packet_ssrcs; + auto handle_rtp_packet = + [this, first_log_end_time_us, + &packet_ssrcs](const webrtc::LoggedRtpPacketIncoming& incoming) { + if (!filter_.test(incoming.rtp.header.payloadType) && + incoming.log_time_us() < first_log_end_time_us) { + rtp_packets_.emplace_back(std::make_unique<Packet>( + incoming.rtp.header, incoming.rtp.total_length, + incoming.rtp.total_length - incoming.rtp.header_length, + static_cast<double>(incoming.log_time_ms()))); + packet_ssrcs.insert(rtp_packets_.back()->header().ssrc); + } + }; + + std::set<uint32_t> ignored_ssrcs; + auto handle_audio_playout = + [this, first_log_end_time_us, &packet_ssrcs, + &ignored_ssrcs](const webrtc::LoggedAudioPlayoutEvent& audio_playout) { + if (audio_playout.log_time_us() < first_log_end_time_us) { + if (packet_ssrcs.count(audio_playout.ssrc) > 0) { + audio_outputs_.emplace_back(audio_playout.log_time_ms()); + } else { + ignored_ssrcs.insert(audio_playout.ssrc); + } + } + }; + + // This wouldn't be needed if we knew that there was at most one audio stream. + webrtc::RtcEventProcessor event_processor; + for (const auto& rtp_packets : parsed_log.incoming_rtp_packets_by_ssrc()) { + ParsedRtcEventLog::MediaType media_type = + parsed_log.GetMediaType(rtp_packets.ssrc, webrtc::kIncomingPacket); + if (ShouldSkipStream(media_type, rtp_packets.ssrc, ssrc_filter)) { + continue; + } + event_processor.AddEvents(rtp_packets.incoming_packets, handle_rtp_packet); + // If no SSRC filter has been set, use the first SSRC only. The simulator + // does not work properly with interleaved packets from multiple SSRCs. + if (!ssrc_filter.has_value()) { + ssrc_filter = rtp_packets.ssrc; + } + } + + for (const auto& audio_playouts : parsed_log.audio_playout_events()) { + if (ssrc_filter.has_value() && audio_playouts.first != *ssrc_filter) + continue; + event_processor.AddEvents(audio_playouts.second, handle_audio_playout); + } + + // Fills in rtp_packets_ and audio_outputs_. + event_processor.ProcessEventsInOrder(); + + for (const auto& ssrc : ignored_ssrcs) { + std::cout << "Ignoring GetAudio events from SSRC 0x" << std::hex << ssrc + << " because no packets were found with a matching SSRC." + << std::endl; + } + + return true; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtc_event_log_source.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtc_event_log_source.h new file mode 100644 index 0000000000..c67912a67d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtc_event_log_source.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_RTC_EVENT_LOG_SOURCE_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_RTC_EVENT_LOG_SOURCE_H_ + +#include <memory> +#include <string> +#include <vector> + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "logging/rtc_event_log/rtc_event_log_parser.h" +#include "modules/audio_coding/neteq/tools/packet_source.h" +#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" + +namespace webrtc { + +class RtpHeaderParser; + +namespace test { + +class Packet; + +class RtcEventLogSource : public PacketSource { + public: + // Creates an RtcEventLogSource reading from `file_name`. If the file cannot + // be opened, or has the wrong format, NULL will be returned. + static std::unique_ptr<RtcEventLogSource> CreateFromFile( + absl::string_view file_name, + absl::optional<uint32_t> ssrc_filter); + // Same as above, but uses a string with the file contents. + static std::unique_ptr<RtcEventLogSource> CreateFromString( + absl::string_view file_contents, + absl::optional<uint32_t> ssrc_filter); + + virtual ~RtcEventLogSource(); + + RtcEventLogSource(const RtcEventLogSource&) = delete; + RtcEventLogSource& operator=(const RtcEventLogSource&) = delete; + + std::unique_ptr<Packet> NextPacket() override; + + // Returns the timestamp of the next audio output event, in milliseconds. The + // maximum value of int64_t is returned if there are no more audio output + // events available. + int64_t NextAudioOutputEventMs(); + + private: + RtcEventLogSource(); + + bool Initialize(const ParsedRtcEventLog& parsed_log, + absl::optional<uint32_t> ssrc_filter); + + std::vector<std::unique_ptr<Packet>> rtp_packets_; + size_t rtp_packet_index_ = 0; + std::vector<int64_t> audio_outputs_; + size_t audio_output_index_ = 0; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_RTC_EVENT_LOG_SOURCE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_analyze.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_analyze.cc new file mode 100644 index 0000000000..7ecf925ebb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_analyze.cc @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdio.h> + +#include <memory> +#include <vector> + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "modules/audio_coding/neteq/tools/packet.h" +#include "modules/audio_coding/neteq/tools/rtp_file_source.h" + +ABSL_FLAG(int, red, 117, "RTP payload type for RED"); +ABSL_FLAG(int, + audio_level, + -1, + "Extension ID for audio level (RFC 6464); " + "-1 not to print audio level"); +ABSL_FLAG(int, + abs_send_time, + -1, + "Extension ID for absolute sender time; " + "-1 not to print absolute send time"); + +int main(int argc, char* argv[]) { + std::vector<char*> args = absl::ParseCommandLine(argc, argv); + std::string usage = + "Tool for parsing an RTP dump file to text output.\n" + "Example usage:\n" + "./rtp_analyze input.rtp output.txt\n\n" + "Output is sent to stdout if no output file is given. " + "Note that this tool can read files with or without payloads.\n"; + if (args.size() != 2 && args.size() != 3) { + printf("%s", usage.c_str()); + return 1; + } + + RTC_CHECK(absl::GetFlag(FLAGS_red) >= 0 && + absl::GetFlag(FLAGS_red) <= 127); // Payload type + RTC_CHECK(absl::GetFlag(FLAGS_audio_level) == -1 || // Default + (absl::GetFlag(FLAGS_audio_level) > 0 && + absl::GetFlag(FLAGS_audio_level) <= 255)); // Extension ID + RTC_CHECK(absl::GetFlag(FLAGS_abs_send_time) == -1 || // Default + (absl::GetFlag(FLAGS_abs_send_time) > 0 && + absl::GetFlag(FLAGS_abs_send_time) <= 255)); // Extension ID + + printf("Input file: %s\n", args[1]); + std::unique_ptr<webrtc::test::RtpFileSource> file_source( + webrtc::test::RtpFileSource::Create(args[1])); + RTC_DCHECK(file_source.get()); + // Set RTP extension IDs. + bool print_audio_level = false; + if (absl::GetFlag(FLAGS_audio_level) != -1) { + print_audio_level = true; + file_source->RegisterRtpHeaderExtension(webrtc::kRtpExtensionAudioLevel, + absl::GetFlag(FLAGS_audio_level)); + } + bool print_abs_send_time = false; + if (absl::GetFlag(FLAGS_abs_send_time) != -1) { + print_abs_send_time = true; + file_source->RegisterRtpHeaderExtension( + webrtc::kRtpExtensionAbsoluteSendTime, + absl::GetFlag(FLAGS_abs_send_time)); + } + + FILE* out_file; + if (args.size() == 3) { + out_file = fopen(args[2], "wt"); + if (!out_file) { + printf("Cannot open output file %s\n", args[2]); + return -1; + } + printf("Output file: %s\n\n", args[2]); + } else { + out_file = stdout; + } + + // Print file header. + fprintf(out_file, "SeqNo TimeStamp SendTime Size PT M SSRC"); + if (print_audio_level) { + fprintf(out_file, " AuLvl (V)"); + } + if (print_abs_send_time) { + fprintf(out_file, " AbsSendTime"); + } + fprintf(out_file, "\n"); + + uint32_t max_abs_send_time = 0; + int cycles = -1; + std::unique_ptr<webrtc::test::Packet> packet; + while (true) { + packet = file_source->NextPacket(); + if (!packet.get()) { + // End of file reached. + break; + } + // Write packet data to file. Use virtual_packet_length_bytes so that the + // correct packet sizes are printed also for RTP header-only dumps. + fprintf(out_file, "%5u %10u %10u %5i %5i %2i %#08X", + packet->header().sequenceNumber, packet->header().timestamp, + static_cast<unsigned int>(packet->time_ms()), + static_cast<int>(packet->virtual_packet_length_bytes()), + packet->header().payloadType, packet->header().markerBit, + packet->header().ssrc); + if (print_audio_level && packet->header().extension.hasAudioLevel) { + fprintf(out_file, " %5u (%1i)", packet->header().extension.audioLevel, + packet->header().extension.voiceActivity); + } + if (print_abs_send_time && packet->header().extension.hasAbsoluteSendTime) { + if (cycles == -1) { + // Initialize. + max_abs_send_time = packet->header().extension.absoluteSendTime; + cycles = 0; + } + // Abs sender time is 24 bit 6.18 fixed point. Shift by 8 to normalize to + // 32 bits (unsigned). Calculate the difference between this packet's + // send time and the maximum observed. Cast to signed 32-bit to get the + // desired wrap-around behavior. + if (static_cast<int32_t>( + (packet->header().extension.absoluteSendTime << 8) - + (max_abs_send_time << 8)) >= 0) { + // The difference is non-negative, meaning that this packet is newer + // than the previously observed maximum absolute send time. + if (packet->header().extension.absoluteSendTime < max_abs_send_time) { + // Wrap detected. + cycles++; + } + max_abs_send_time = packet->header().extension.absoluteSendTime; + } + // Abs sender time is 24 bit 6.18 fixed point. Divide by 2^18 to convert + // to floating point representation. + double send_time_seconds = + static_cast<double>(packet->header().extension.absoluteSendTime) / + 262144 + + 64.0 * cycles; + fprintf(out_file, " %11f", send_time_seconds); + } + fprintf(out_file, "\n"); + + if (packet->header().payloadType == absl::GetFlag(FLAGS_red)) { + std::list<webrtc::RTPHeader*> red_headers; + packet->ExtractRedHeaders(&red_headers); + while (!red_headers.empty()) { + webrtc::RTPHeader* red = red_headers.front(); + RTC_DCHECK(red); + fprintf(out_file, "* %5u %10u %10u %5i\n", red->sequenceNumber, + red->timestamp, static_cast<unsigned int>(packet->time_ms()), + red->payloadType); + red_headers.pop_front(); + delete red; + } + } + } + + fclose(out_file); + + return 0; +} diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_encode.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_encode.cc new file mode 100644 index 0000000000..6aeeb6d129 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_encode.cc @@ -0,0 +1,359 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdio.h> + +#ifdef WIN32 +#include <winsock2.h> +#endif +#if defined(WEBRTC_LINUX) || defined(WEBRTC_FUCHSIA) +#include <netinet/in.h> +#endif + +#include <iostream> +#include <map> +#include <string> +#include <vector> + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "absl/memory/memory.h" +#include "api/audio/audio_frame.h" +#include "api/audio_codecs/L16/audio_encoder_L16.h" +#include "api/audio_codecs/g711/audio_encoder_g711.h" +#include "api/audio_codecs/g722/audio_encoder_g722.h" +#include "api/audio_codecs/ilbc/audio_encoder_ilbc.h" +#include "api/audio_codecs/isac/audio_encoder_isac.h" +#include "api/audio_codecs/opus/audio_encoder_opus.h" +#include "modules/audio_coding/codecs/cng/audio_encoder_cng.h" +#include "modules/audio_coding/include/audio_coding_module.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" +#include "rtc_base/numerics/safe_conversions.h" + +ABSL_FLAG(bool, list_codecs, false, "Enumerate all codecs"); +ABSL_FLAG(std::string, codec, "opus", "Codec to use"); +ABSL_FLAG(int, + frame_len, + 0, + "Frame length in ms; 0 indicates codec default value"); +ABSL_FLAG(int, bitrate, 0, "Bitrate in kbps; 0 indicates codec default value"); +ABSL_FLAG(int, + payload_type, + -1, + "RTP payload type; -1 indicates codec default value"); +ABSL_FLAG(int, + cng_payload_type, + -1, + "RTP payload type for CNG; -1 indicates default value"); +ABSL_FLAG(int, ssrc, 0, "SSRC to write to the RTP header"); +ABSL_FLAG(bool, dtx, false, "Use DTX/CNG"); +ABSL_FLAG(int, sample_rate, 48000, "Sample rate of the input file"); + +namespace webrtc { +namespace test { +namespace { + +// Add new codecs here, and to the map below. +enum class CodecType { + kOpus, + kPcmU, + kPcmA, + kG722, + kPcm16b8, + kPcm16b16, + kPcm16b32, + kPcm16b48, + kIlbc, + kIsac +}; + +struct CodecTypeAndInfo { + CodecType type; + int default_payload_type; + bool internal_dtx; +}; + +// List all supported codecs here. This map defines the command-line parameter +// value (the key string) for selecting each codec, together with information +// whether it is using internal or external DTX/CNG. +const std::map<std::string, CodecTypeAndInfo>& CodecList() { + static const auto* const codec_list = + new std::map<std::string, CodecTypeAndInfo>{ + {"opus", {CodecType::kOpus, 111, true}}, + {"pcmu", {CodecType::kPcmU, 0, false}}, + {"pcma", {CodecType::kPcmA, 8, false}}, + {"g722", {CodecType::kG722, 9, false}}, + {"pcm16b_8", {CodecType::kPcm16b8, 93, false}}, + {"pcm16b_16", {CodecType::kPcm16b16, 94, false}}, + {"pcm16b_32", {CodecType::kPcm16b32, 95, false}}, + {"pcm16b_48", {CodecType::kPcm16b48, 96, false}}, + {"ilbc", {CodecType::kIlbc, 102, false}}, + {"isac", {CodecType::kIsac, 103, false}}}; + return *codec_list; +} + +// This class will receive callbacks from ACM when a packet is ready, and write +// it to the output file. +class Packetizer : public AudioPacketizationCallback { + public: + Packetizer(FILE* out_file, uint32_t ssrc, int timestamp_rate_hz) + : out_file_(out_file), + ssrc_(ssrc), + timestamp_rate_hz_(timestamp_rate_hz) {} + + int32_t SendData(AudioFrameType frame_type, + uint8_t payload_type, + uint32_t timestamp, + const uint8_t* payload_data, + size_t payload_len_bytes, + int64_t absolute_capture_timestamp_ms) override { + if (payload_len_bytes == 0) { + return 0; + } + + constexpr size_t kRtpHeaderLength = 12; + constexpr size_t kRtpDumpHeaderLength = 8; + const uint16_t length = htons(rtc::checked_cast<uint16_t>( + kRtpHeaderLength + kRtpDumpHeaderLength + payload_len_bytes)); + const uint16_t plen = htons( + rtc::checked_cast<uint16_t>(kRtpHeaderLength + payload_len_bytes)); + const uint32_t offset = htonl(timestamp / (timestamp_rate_hz_ / 1000)); + RTC_CHECK_EQ(fwrite(&length, sizeof(uint16_t), 1, out_file_), 1); + RTC_CHECK_EQ(fwrite(&plen, sizeof(uint16_t), 1, out_file_), 1); + RTC_CHECK_EQ(fwrite(&offset, sizeof(uint32_t), 1, out_file_), 1); + + const uint8_t rtp_header[] = {0x80, + static_cast<uint8_t>(payload_type & 0x7F), + static_cast<uint8_t>(sequence_number_ >> 8), + static_cast<uint8_t>(sequence_number_), + static_cast<uint8_t>(timestamp >> 24), + static_cast<uint8_t>(timestamp >> 16), + static_cast<uint8_t>(timestamp >> 8), + static_cast<uint8_t>(timestamp), + static_cast<uint8_t>(ssrc_ >> 24), + static_cast<uint8_t>(ssrc_ >> 16), + static_cast<uint8_t>(ssrc_ >> 8), + static_cast<uint8_t>(ssrc_)}; + static_assert(sizeof(rtp_header) == kRtpHeaderLength, ""); + RTC_CHECK_EQ( + fwrite(rtp_header, sizeof(uint8_t), kRtpHeaderLength, out_file_), + kRtpHeaderLength); + ++sequence_number_; // Intended to wrap on overflow. + + RTC_CHECK_EQ( + fwrite(payload_data, sizeof(uint8_t), payload_len_bytes, out_file_), + payload_len_bytes); + + return 0; + } + + private: + FILE* const out_file_; + const uint32_t ssrc_; + const int timestamp_rate_hz_; + uint16_t sequence_number_ = 0; +}; + +void SetFrameLenIfFlagIsPositive(int* config_frame_len) { + if (absl::GetFlag(FLAGS_frame_len) > 0) { + *config_frame_len = absl::GetFlag(FLAGS_frame_len); + } +} + +template <typename T> +typename T::Config GetCodecConfig() { + typename T::Config config; + SetFrameLenIfFlagIsPositive(&config.frame_size_ms); + RTC_CHECK(config.IsOk()); + return config; +} + +AudioEncoderL16::Config Pcm16bConfig(CodecType codec_type) { + auto config = GetCodecConfig<AudioEncoderL16>(); + switch (codec_type) { + case CodecType::kPcm16b8: + config.sample_rate_hz = 8000; + return config; + case CodecType::kPcm16b16: + config.sample_rate_hz = 16000; + return config; + case CodecType::kPcm16b32: + config.sample_rate_hz = 32000; + return config; + case CodecType::kPcm16b48: + config.sample_rate_hz = 48000; + return config; + default: + RTC_DCHECK_NOTREACHED(); + return config; + } +} + +std::unique_ptr<AudioEncoder> CreateEncoder(CodecType codec_type, + int payload_type) { + switch (codec_type) { + case CodecType::kOpus: { + AudioEncoderOpus::Config config = GetCodecConfig<AudioEncoderOpus>(); + if (absl::GetFlag(FLAGS_bitrate) > 0) { + config.bitrate_bps = absl::GetFlag(FLAGS_bitrate); + } + config.dtx_enabled = absl::GetFlag(FLAGS_dtx); + RTC_CHECK(config.IsOk()); + return AudioEncoderOpus::MakeAudioEncoder(config, payload_type); + } + + case CodecType::kPcmU: + case CodecType::kPcmA: { + AudioEncoderG711::Config config = GetCodecConfig<AudioEncoderG711>(); + config.type = codec_type == CodecType::kPcmU + ? AudioEncoderG711::Config::Type::kPcmU + : AudioEncoderG711::Config::Type::kPcmA; + RTC_CHECK(config.IsOk()); + return AudioEncoderG711::MakeAudioEncoder(config, payload_type); + } + + case CodecType::kG722: { + return AudioEncoderG722::MakeAudioEncoder( + GetCodecConfig<AudioEncoderG722>(), payload_type); + } + + case CodecType::kPcm16b8: + case CodecType::kPcm16b16: + case CodecType::kPcm16b32: + case CodecType::kPcm16b48: { + return AudioEncoderL16::MakeAudioEncoder(Pcm16bConfig(codec_type), + payload_type); + } + + case CodecType::kIlbc: { + return AudioEncoderIlbc::MakeAudioEncoder( + GetCodecConfig<AudioEncoderIlbc>(), payload_type); + } + + case CodecType::kIsac: { + return AudioEncoderIsac::MakeAudioEncoder( + GetCodecConfig<AudioEncoderIsac>(), payload_type); + } + } + RTC_DCHECK_NOTREACHED(); + return nullptr; +} + +AudioEncoderCngConfig GetCngConfig(int sample_rate_hz) { + AudioEncoderCngConfig cng_config; + const auto default_payload_type = [&] { + switch (sample_rate_hz) { + case 8000: + return 13; + case 16000: + return 98; + case 32000: + return 99; + case 48000: + return 100; + default: + RTC_DCHECK_NOTREACHED(); + } + return 0; + }; + cng_config.payload_type = absl::GetFlag(FLAGS_cng_payload_type) != -1 + ? absl::GetFlag(FLAGS_cng_payload_type) + : default_payload_type(); + return cng_config; +} + +int RunRtpEncode(int argc, char* argv[]) { + std::vector<char*> args = absl::ParseCommandLine(argc, argv); + const std::string usage = + "Tool for generating an RTP dump file from audio input.\n" + "Example usage:\n" + "./rtp_encode input.pcm output.rtp --codec=[codec] " + "--frame_len=[frame_len] --bitrate=[bitrate]\n\n"; + if (!absl::GetFlag(FLAGS_list_codecs) && args.size() != 3) { + printf("%s", usage.c_str()); + return 1; + } + + if (absl::GetFlag(FLAGS_list_codecs)) { + printf("The following arguments are valid --codec parameters:\n"); + for (const auto& c : CodecList()) { + printf(" %s\n", c.first.c_str()); + } + return 0; + } + + const auto codec_it = CodecList().find(absl::GetFlag(FLAGS_codec)); + if (codec_it == CodecList().end()) { + printf("%s is not a valid codec name.\n", + absl::GetFlag(FLAGS_codec).c_str()); + printf("Use argument --list_codecs to see all valid codec names.\n"); + return 1; + } + + // Create the codec. + const int payload_type = absl::GetFlag(FLAGS_payload_type) == -1 + ? codec_it->second.default_payload_type + : absl::GetFlag(FLAGS_payload_type); + std::unique_ptr<AudioEncoder> codec = + CreateEncoder(codec_it->second.type, payload_type); + + // Create an external VAD/CNG encoder if needed. + if (absl::GetFlag(FLAGS_dtx) && !codec_it->second.internal_dtx) { + AudioEncoderCngConfig cng_config = GetCngConfig(codec->SampleRateHz()); + RTC_DCHECK(codec); + cng_config.speech_encoder = std::move(codec); + codec = CreateComfortNoiseEncoder(std::move(cng_config)); + } + RTC_DCHECK(codec); + + // Set up ACM. + const int timestamp_rate_hz = codec->RtpTimestampRateHz(); + AudioCodingModule::Config config; + std::unique_ptr<AudioCodingModule> acm(AudioCodingModule::Create(config)); + acm->SetEncoder(std::move(codec)); + + // Open files. + printf("Input file: %s\n", args[1]); + InputAudioFile input_file(args[1], false); // Open input in non-looping mode. + FILE* out_file = fopen(args[2], "wb"); + RTC_CHECK(out_file) << "Could not open file " << args[2] << " for writing"; + printf("Output file: %s\n", args[2]); + fprintf(out_file, "#!rtpplay1.0 \n"); //, + // Write 3 32-bit values followed by 2 16-bit values, all set to 0. This means + // a total of 16 bytes. + const uint8_t file_header[16] = {0}; + RTC_CHECK_EQ(fwrite(file_header, sizeof(file_header), 1, out_file), 1); + + // Create and register the packetizer, which will write the packets to file. + Packetizer packetizer(out_file, absl::GetFlag(FLAGS_ssrc), timestamp_rate_hz); + RTC_DCHECK_EQ(acm->RegisterTransportCallback(&packetizer), 0); + + AudioFrame audio_frame; + audio_frame.samples_per_channel_ = + absl::GetFlag(FLAGS_sample_rate) / 100; // 10 ms + audio_frame.sample_rate_hz_ = absl::GetFlag(FLAGS_sample_rate); + audio_frame.num_channels_ = 1; + + while (input_file.Read(audio_frame.samples_per_channel_, + audio_frame.mutable_data())) { + RTC_CHECK_GE(acm->Add10MsData(audio_frame), 0); + audio_frame.timestamp_ += audio_frame.samples_per_channel_; + } + + return 0; +} + +} // namespace +} // namespace test +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::test::RunRtpEncode(argc, argv); +} diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_file_source.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_file_source.cc new file mode 100644 index 0000000000..a43c29638c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_file_source.cc @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/rtp_file_source.h" + +#include <string.h> + +#include "absl/strings/string_view.h" +#ifndef WIN32 +#include <netinet/in.h> +#endif + +#include <memory> + +#include "modules/audio_coding/neteq/tools/packet.h" +#include "rtc_base/checks.h" +#include "test/rtp_file_reader.h" + +namespace webrtc { +namespace test { + +RtpFileSource* RtpFileSource::Create(absl::string_view file_name, + absl::optional<uint32_t> ssrc_filter) { + RtpFileSource* source = new RtpFileSource(ssrc_filter); + RTC_CHECK(source->OpenFile(file_name)); + return source; +} + +bool RtpFileSource::ValidRtpDump(absl::string_view file_name) { + std::unique_ptr<RtpFileReader> temp_file( + RtpFileReader::Create(RtpFileReader::kRtpDump, file_name)); + return !!temp_file; +} + +bool RtpFileSource::ValidPcap(absl::string_view file_name) { + std::unique_ptr<RtpFileReader> temp_file( + RtpFileReader::Create(RtpFileReader::kPcap, file_name)); + return !!temp_file; +} + +RtpFileSource::~RtpFileSource() {} + +bool RtpFileSource::RegisterRtpHeaderExtension(RTPExtensionType type, + uint8_t id) { + return rtp_header_extension_map_.RegisterByType(id, type); +} + +std::unique_ptr<Packet> RtpFileSource::NextPacket() { + while (true) { + RtpPacket temp_packet; + if (!rtp_reader_->NextPacket(&temp_packet)) { + return NULL; + } + if (temp_packet.original_length == 0) { + // May be an RTCP packet. + // Read the next one. + continue; + } + auto packet = std::make_unique<Packet>( + rtc::CopyOnWriteBuffer(temp_packet.data, temp_packet.length), + temp_packet.original_length, temp_packet.time_ms, + &rtp_header_extension_map_); + if (!packet->valid_header()) { + continue; + } + if (filter_.test(packet->header().payloadType) || + (ssrc_filter_ && packet->header().ssrc != *ssrc_filter_)) { + // This payload type should be filtered out. Continue to the next packet. + continue; + } + return packet; + } +} + +RtpFileSource::RtpFileSource(absl::optional<uint32_t> ssrc_filter) + : PacketSource(), + ssrc_filter_(ssrc_filter) {} + +bool RtpFileSource::OpenFile(absl::string_view file_name) { + rtp_reader_.reset(RtpFileReader::Create(RtpFileReader::kRtpDump, file_name)); + if (rtp_reader_) + return true; + rtp_reader_.reset(RtpFileReader::Create(RtpFileReader::kPcap, file_name)); + if (!rtp_reader_) { + RTC_FATAL() + << "Couldn't open input file as either a rtpdump or .pcap. Note " + << "that .pcapng is not supported."; + } + return true; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_file_source.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_file_source.h new file mode 100644 index 0000000000..55505be630 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_file_source.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_RTP_FILE_SOURCE_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_RTP_FILE_SOURCE_H_ + +#include <stdio.h> + +#include <memory> +#include <string> + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "modules/audio_coding/neteq/tools/packet_source.h" +#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" + +namespace webrtc { + +namespace test { + +class RtpFileReader; + +class RtpFileSource : public PacketSource { + public: + // Creates an RtpFileSource reading from `file_name`. If the file cannot be + // opened, or has the wrong format, NULL will be returned. + static RtpFileSource* Create( + absl::string_view file_name, + absl::optional<uint32_t> ssrc_filter = absl::nullopt); + + // Checks whether a files is a valid RTP dump or PCAP (Wireshark) file. + static bool ValidRtpDump(absl::string_view file_name); + static bool ValidPcap(absl::string_view file_name); + + ~RtpFileSource() override; + + RtpFileSource(const RtpFileSource&) = delete; + RtpFileSource& operator=(const RtpFileSource&) = delete; + + // Registers an RTP header extension and binds it to `id`. + virtual bool RegisterRtpHeaderExtension(RTPExtensionType type, uint8_t id); + + std::unique_ptr<Packet> NextPacket() override; + + private: + static const int kFirstLineLength = 40; + static const int kRtpFileHeaderSize = 4 + 4 + 4 + 2 + 2; + static const size_t kPacketHeaderSize = 8; + + explicit RtpFileSource(absl::optional<uint32_t> ssrc_filter); + + bool OpenFile(absl::string_view file_name); + + std::unique_ptr<RtpFileReader> rtp_reader_; + const absl::optional<uint32_t> ssrc_filter_; + RtpHeaderExtensionMap rtp_header_extension_map_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_RTP_FILE_SOURCE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_generator.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_generator.cc new file mode 100644 index 0000000000..e883fc11d6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_generator.cc @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/tools/rtp_generator.h" + + +namespace webrtc { +namespace test { + +uint32_t RtpGenerator::GetRtpHeader(uint8_t payload_type, + size_t payload_length_samples, + RTPHeader* rtp_header) { + RTC_DCHECK(rtp_header); + if (!rtp_header) { + return 0; + } + rtp_header->sequenceNumber = seq_number_++; + rtp_header->timestamp = timestamp_; + timestamp_ += static_cast<uint32_t>(payload_length_samples); + rtp_header->payloadType = payload_type; + rtp_header->markerBit = false; + rtp_header->ssrc = ssrc_; + rtp_header->numCSRCs = 0; + + uint32_t this_send_time = next_send_time_ms_; + RTC_DCHECK_GT(samples_per_ms_, 0); + next_send_time_ms_ += + ((1.0 + drift_factor_) * payload_length_samples) / samples_per_ms_; + return this_send_time; +} + +void RtpGenerator::set_drift_factor(double factor) { + if (factor > -1.0) { + drift_factor_ = factor; + } +} + +uint32_t TimestampJumpRtpGenerator::GetRtpHeader(uint8_t payload_type, + size_t payload_length_samples, + RTPHeader* rtp_header) { + uint32_t ret = RtpGenerator::GetRtpHeader(payload_type, + payload_length_samples, rtp_header); + if (timestamp_ - static_cast<uint32_t>(payload_length_samples) <= + jump_from_timestamp_ && + timestamp_ > jump_from_timestamp_) { + // We just moved across the `jump_from_timestamp_` timestamp. Do the jump. + timestamp_ = jump_to_timestamp_; + } + return ret; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_generator.h b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_generator.h new file mode 100644 index 0000000000..2e615adec5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_generator.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_TOOLS_RTP_GENERATOR_H_ +#define MODULES_AUDIO_CODING_NETEQ_TOOLS_RTP_GENERATOR_H_ + +#include "api/rtp_headers.h" + +namespace webrtc { +namespace test { + +// Class for generating RTP headers. +class RtpGenerator { + public: + RtpGenerator(int samples_per_ms, + uint16_t start_seq_number = 0, + uint32_t start_timestamp = 0, + uint32_t start_send_time_ms = 0, + uint32_t ssrc = 0x12345678) + : seq_number_(start_seq_number), + timestamp_(start_timestamp), + next_send_time_ms_(start_send_time_ms), + ssrc_(ssrc), + samples_per_ms_(samples_per_ms), + drift_factor_(0.0) {} + + virtual ~RtpGenerator() {} + + RtpGenerator(const RtpGenerator&) = delete; + RtpGenerator& operator=(const RtpGenerator&) = delete; + + // Writes the next RTP header to `rtp_header`, which will be of type + // `payload_type`. Returns the send time for this packet (in ms). The value of + // `payload_length_samples` determines the send time for the next packet. + virtual uint32_t GetRtpHeader(uint8_t payload_type, + size_t payload_length_samples, + RTPHeader* rtp_header); + + void set_drift_factor(double factor); + + protected: + uint16_t seq_number_; + uint32_t timestamp_; + uint32_t next_send_time_ms_; + const uint32_t ssrc_; + const int samples_per_ms_; + double drift_factor_; +}; + +class TimestampJumpRtpGenerator : public RtpGenerator { + public: + TimestampJumpRtpGenerator(int samples_per_ms, + uint16_t start_seq_number, + uint32_t start_timestamp, + uint32_t jump_from_timestamp, + uint32_t jump_to_timestamp) + : RtpGenerator(samples_per_ms, start_seq_number, start_timestamp), + jump_from_timestamp_(jump_from_timestamp), + jump_to_timestamp_(jump_to_timestamp) {} + + TimestampJumpRtpGenerator(const TimestampJumpRtpGenerator&) = delete; + TimestampJumpRtpGenerator& operator=(const TimestampJumpRtpGenerator&) = + delete; + + uint32_t GetRtpHeader(uint8_t payload_type, + size_t payload_length_samples, + RTPHeader* rtp_header) override; + + private: + uint32_t jump_from_timestamp_; + uint32_t jump_to_timestamp_; +}; + +} // namespace test +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_TOOLS_RTP_GENERATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_jitter.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_jitter.cc new file mode 100644 index 0000000000..cccaa9a3bb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtp_jitter.cc @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdio.h> + +#include <algorithm> +#include <fstream> +#include <iostream> +#include <vector> + +#include "api/array_view.h" +#include "modules/rtp_rtcp/source/byte_io.h" +#include "rtc_base/buffer.h" + +namespace webrtc { +namespace test { +namespace { + +constexpr size_t kRtpDumpHeaderLength = 8; + +// Returns the next packet or an empty buffer if end of file was encountered. +rtc::Buffer ReadNextPacket(FILE* file) { + // Read the rtpdump header for the next packet. + rtc::Buffer buffer; + buffer.SetData(kRtpDumpHeaderLength, [&](rtc::ArrayView<uint8_t> x) { + return fread(x.data(), 1, x.size(), file); + }); + if (buffer.size() != kRtpDumpHeaderLength) { + return rtc::Buffer(); + } + + // Get length field. This is the total length for this packet written to file, + // including the kRtpDumpHeaderLength bytes already read. + const uint16_t len = ByteReader<uint16_t>::ReadBigEndian(buffer.data()); + RTC_CHECK_GE(len, kRtpDumpHeaderLength); + + // Read remaining data from file directly into buffer. + buffer.AppendData(len - kRtpDumpHeaderLength, [&](rtc::ArrayView<uint8_t> x) { + return fread(x.data(), 1, x.size(), file); + }); + if (buffer.size() != len) { + buffer.Clear(); + } + return buffer; +} + +struct PacketAndTime { + rtc::Buffer packet; + int time; +}; + +void WritePacket(const PacketAndTime& packet, FILE* file) { + // Write the first 4 bytes from the original packet. + const auto* payload_ptr = packet.packet.data(); + RTC_CHECK_EQ(fwrite(payload_ptr, 4, 1, file), 1); + payload_ptr += 4; + + // Convert the new time offset to network endian, and write to file. + uint8_t time[sizeof(uint32_t)]; + ByteWriter<uint32_t, sizeof(uint32_t)>::WriteBigEndian(time, packet.time); + RTC_CHECK_EQ(fwrite(time, sizeof(uint32_t), 1, file), 1); + payload_ptr += 4; // Skip the old time in the original payload. + + // Write the remaining part of the payload. + RTC_DCHECK_EQ(payload_ptr - packet.packet.data(), kRtpDumpHeaderLength); + RTC_CHECK_EQ( + fwrite(payload_ptr, packet.packet.size() - kRtpDumpHeaderLength, 1, file), + 1); +} + +int RunRtpJitter(int argc, char* argv[]) { + const std::string program_name = argv[0]; + const std::string usage = + "Tool for alternating the arrival times in an RTP dump file.\n" + "Example usage:\n" + + program_name + " input.rtp arrival_times_ms.txt output.rtp\n\n"; + if (argc != 4) { + printf("%s", usage.c_str()); + return 1; + } + + printf("Input RTP file: %s\n", argv[1]); + FILE* in_file = fopen(argv[1], "rb"); + RTC_CHECK(in_file) << "Could not open file " << argv[1] << " for reading"; + printf("Timing file: %s\n", argv[2]); + std::ifstream timing_file(argv[2]); + printf("Output file: %s\n", argv[3]); + FILE* out_file = fopen(argv[3], "wb"); + RTC_CHECK(out_file) << "Could not open file " << argv[2] << " for writing"; + + // Copy the RTP file header to the output file. + char header_string[30]; + RTC_CHECK(fgets(header_string, 30, in_file)); + fprintf(out_file, "%s", header_string); + uint8_t file_header[16]; + RTC_CHECK_EQ(fread(file_header, sizeof(file_header), 1, in_file), 1); + RTC_CHECK_EQ(fwrite(file_header, sizeof(file_header), 1, out_file), 1); + + // Read all time values from the timing file. Store in a vector. + std::vector<int> new_arrival_times; + int new_time; + while (timing_file >> new_time) { + new_arrival_times.push_back(new_time); + } + + // Read all packets from the input RTP file, but no more than the number of + // new time values. Store RTP packets together with new time values. + auto time_it = new_arrival_times.begin(); + std::vector<PacketAndTime> packets; + while (1) { + auto packet = ReadNextPacket(in_file); + if (packet.empty() || time_it == new_arrival_times.end()) { + break; + } + packets.push_back({std::move(packet), *time_it}); + ++time_it; + } + + // Sort on new time values. + std::sort(packets.begin(), packets.end(), + [](const PacketAndTime& a, const PacketAndTime& b) { + return a.time < b.time; + }); + + // Write packets to output file. + for (const auto& p : packets) { + WritePacket(p, out_file); + } + + fclose(in_file); + fclose(out_file); + return 0; +} + +} // namespace +} // namespace test +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::test::RunRtpJitter(argc, argv); +} diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtpcat.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtpcat.cc new file mode 100644 index 0000000000..431de553ae --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/rtpcat.cc @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdio.h> + +#include <memory> + +#include "rtc_base/checks.h" +#include "test/rtp_file_reader.h" +#include "test/rtp_file_writer.h" + +using webrtc::test::RtpFileReader; +using webrtc::test::RtpFileWriter; + +int main(int argc, char* argv[]) { + if (argc < 3) { + printf("Concatenates multiple rtpdump files into one.\n"); + printf("Usage: rtpcat in1.rtp int2.rtp [...] out.rtp\n"); + exit(1); + } + + std::unique_ptr<RtpFileWriter> output( + RtpFileWriter::Create(RtpFileWriter::kRtpDump, argv[argc - 1])); + RTC_CHECK(output.get() != NULL) << "Cannot open output file."; + printf("Output RTP file: %s\n", argv[argc - 1]); + + for (int i = 1; i < argc - 1; i++) { + std::unique_ptr<RtpFileReader> input( + RtpFileReader::Create(RtpFileReader::kRtpDump, argv[i])); + RTC_CHECK(input.get() != NULL) << "Cannot open input file " << argv[i]; + printf("Input RTP file: %s\n", argv[i]); + + webrtc::test::RtpPacket packet; + while (input->NextPacket(&packet)) + RTC_CHECK(output->WritePacket(&packet)); + } + return 0; +} diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/underrun_optimizer.cc b/third_party/libwebrtc/modules/audio_coding/neteq/underrun_optimizer.cc new file mode 100644 index 0000000000..baed812327 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/underrun_optimizer.cc @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/underrun_optimizer.h" + +#include <algorithm> + +namespace webrtc { + +namespace { + +constexpr int kDelayBuckets = 100; +constexpr int kBucketSizeMs = 20; + +} // namespace + +UnderrunOptimizer::UnderrunOptimizer(const TickTimer* tick_timer, + int histogram_quantile, + int forget_factor, + absl::optional<int> start_forget_weight, + absl::optional<int> resample_interval_ms) + : tick_timer_(tick_timer), + histogram_(kDelayBuckets, forget_factor, start_forget_weight), + histogram_quantile_(histogram_quantile), + resample_interval_ms_(resample_interval_ms) {} + +void UnderrunOptimizer::Update(int relative_delay_ms) { + absl::optional<int> histogram_update; + if (resample_interval_ms_) { + if (!resample_stopwatch_) { + resample_stopwatch_ = tick_timer_->GetNewStopwatch(); + } + if (static_cast<int>(resample_stopwatch_->ElapsedMs()) > + *resample_interval_ms_) { + histogram_update = max_delay_in_interval_ms_; + resample_stopwatch_ = tick_timer_->GetNewStopwatch(); + max_delay_in_interval_ms_ = 0; + } + max_delay_in_interval_ms_ = + std::max(max_delay_in_interval_ms_, relative_delay_ms); + } else { + histogram_update = relative_delay_ms; + } + if (!histogram_update) { + return; + } + + const int index = *histogram_update / kBucketSizeMs; + if (index < histogram_.NumBuckets()) { + // Maximum delay to register is 2000 ms. + histogram_.Add(index); + } + int bucket_index = histogram_.Quantile(histogram_quantile_); + optimal_delay_ms_ = (1 + bucket_index) * kBucketSizeMs; +} + +void UnderrunOptimizer::Reset() { + histogram_.Reset(); + resample_stopwatch_.reset(); + max_delay_in_interval_ms_ = 0; + optimal_delay_ms_.reset(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/underrun_optimizer.h b/third_party/libwebrtc/modules/audio_coding/neteq/underrun_optimizer.h new file mode 100644 index 0000000000..b37ce18795 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/underrun_optimizer.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_UNDERRUN_OPTIMIZER_H_ +#define MODULES_AUDIO_CODING_NETEQ_UNDERRUN_OPTIMIZER_H_ + +#include <memory> + +#include "absl/types/optional.h" +#include "api/neteq/tick_timer.h" +#include "modules/audio_coding/neteq/histogram.h" + +namespace webrtc { + +// Estimates probability of buffer underrun due to late packet arrival. +// The optimal delay is decided such that the probability of underrun is lower +// than 1 - `histogram_quantile`. +class UnderrunOptimizer { + public: + UnderrunOptimizer(const TickTimer* tick_timer, + int histogram_quantile, + int forget_factor, + absl::optional<int> start_forget_weight, + absl::optional<int> resample_interval_ms); + + void Update(int relative_delay_ms); + + absl::optional<int> GetOptimalDelayMs() const { return optimal_delay_ms_; } + + void Reset(); + + private: + const TickTimer* tick_timer_; + Histogram histogram_; + const int histogram_quantile_; // In Q30. + const absl::optional<int> resample_interval_ms_; + std::unique_ptr<TickTimer::Stopwatch> resample_stopwatch_; + int max_delay_in_interval_ms_ = 0; + absl::optional<int> optimal_delay_ms_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_NETEQ_UNDERRUN_OPTIMIZER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/underrun_optimizer_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/underrun_optimizer_unittest.cc new file mode 100644 index 0000000000..a86e9cf107 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/neteq/underrun_optimizer_unittest.cc @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/underrun_optimizer.h" + +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +constexpr int kDefaultHistogramQuantile = 1020054733; // 0.95 in Q30. +constexpr int kForgetFactor = 32745; // 0.9993 in Q15. + +} // namespace + +TEST(UnderrunOptimizerTest, ResamplePacketDelays) { + TickTimer tick_timer; + constexpr int kResampleIntervalMs = 500; + UnderrunOptimizer underrun_optimizer(&tick_timer, kDefaultHistogramQuantile, + kForgetFactor, absl::nullopt, + kResampleIntervalMs); + + // The histogram should be updated once with the maximum delay observed for + // the following sequence of updates. + for (int i = 0; i < 500; i += 20) { + underrun_optimizer.Update(i); + EXPECT_FALSE(underrun_optimizer.GetOptimalDelayMs()); + } + tick_timer.Increment(kResampleIntervalMs / tick_timer.ms_per_tick() + 1); + underrun_optimizer.Update(0); + EXPECT_EQ(underrun_optimizer.GetOptimalDelayMs(), 500); +} + +} // namespace webrtc |