diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-06-12 05:35:37 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-06-12 05:35:37 +0000 |
commit | a90a5cba08fdf6c0ceb95101c275108a152a3aed (patch) | |
tree | 532507288f3defd7f4dcf1af49698bcb76034855 /third_party/libwebrtc/modules/audio_coding | |
parent | Adding debian version 126.0.1-1. (diff) | |
download | firefox-a90a5cba08fdf6c0ceb95101c275108a152a3aed.tar.xz firefox-a90a5cba08fdf6c0ceb95101c275108a152a3aed.zip |
Merging upstream version 127.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/libwebrtc/modules/audio_coding')
27 files changed, 324 insertions, 480 deletions
diff --git a/third_party/libwebrtc/modules/audio_coding/BUILD.gn b/third_party/libwebrtc/modules/audio_coding/BUILD.gn index ddd1fd2656..a49df7e7d2 100644 --- a/third_party/libwebrtc/modules/audio_coding/BUILD.gn +++ b/third_party/libwebrtc/modules/audio_coding/BUILD.gn @@ -689,8 +689,6 @@ rtc_library("neteq") { "neteq/packet_arrival_history.h", "neteq/packet_buffer.cc", "neteq/packet_buffer.h", - "neteq/post_decode_vad.cc", - "neteq/post_decode_vad.h", "neteq/preemptive_expand.cc", "neteq/preemptive_expand.h", "neteq/random_vector.cc", @@ -1655,7 +1653,6 @@ if (rtc_include_tests) { "neteq/normal_unittest.cc", "neteq/packet_arrival_history_unittest.cc", "neteq/packet_buffer_unittest.cc", - "neteq/post_decode_vad_unittest.cc", "neteq/random_vector_unittest.cc", "neteq/red_payload_splitter_unittest.cc", "neteq/reorder_optimizer_unittest.cc", diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver.cc b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver.cc index a5bf88e547..4deabdf7ff 100644 --- a/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver.cc +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver.cc @@ -50,11 +50,7 @@ std::unique_ptr<NetEq> CreateNetEq( AcmReceiver::Config::Config( rtc::scoped_refptr<AudioDecoderFactory> decoder_factory) - : clock(*Clock::GetRealTimeClockRaw()), decoder_factory(decoder_factory) { - // Post-decode VAD is disabled by default in NetEq, however, Audio - // Conference Mixer relies on VAD decisions and fails without them. - neteq_config.enable_post_decode_vad = true; -} + : clock(*Clock::GetRealTimeClockRaw()), decoder_factory(decoder_factory) {} AcmReceiver::Config::Config(const Config&) = default; AcmReceiver::Config::~Config() = default; diff --git a/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc index cda6688157..8b35f4a621 100644 --- a/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc +++ b/third_party/libwebrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc @@ -190,9 +190,6 @@ class AcmReceiverTestFaxModeOldApi : public AcmReceiverTestOldApi { const size_t output_channels = info.num_channels; const size_t samples_per_ms = rtc::checked_cast<size_t>( rtc::CheckedDivExact(output_sample_rate_hz, 1000)); - const AudioFrame::VADActivity expected_vad_activity = - output_sample_rate_hz > 16000 ? AudioFrame::kVadActive - : AudioFrame::kVadPassive; // Expect the first output timestamp to be 5*fs/8000 samples before the // first inserted timestamp (because of NetEq's look-ahead). (This value is @@ -217,7 +214,6 @@ class AcmReceiverTestFaxModeOldApi : public AcmReceiverTestOldApi { EXPECT_EQ(output_sample_rate_hz, frame.sample_rate_hz_); EXPECT_EQ(output_channels, frame.num_channels_); EXPECT_EQ(AudioFrame::kNormalSpeech, frame.speech_type_); - EXPECT_EQ(expected_vad_activity, frame.vad_activity_); EXPECT_FALSE(muted); } } @@ -243,61 +239,6 @@ TEST_F(AcmReceiverTestFaxModeOldApi, MAYBE_VerifyAudioFrameOpus) { } #if defined(WEBRTC_ANDROID) -#define MAYBE_PostdecodingVad DISABLED_PostdecodingVad -#else -#define MAYBE_PostdecodingVad PostdecodingVad -#endif -TEST_F(AcmReceiverTestOldApi, MAYBE_PostdecodingVad) { - EXPECT_TRUE(config_.neteq_config.enable_post_decode_vad); - constexpr int payload_type = 34; - const SdpAudioFormat codec = {"L16", 16000, 1}; - const AudioCodecInfo info = SetEncoder(payload_type, codec); - receiver_->SetCodecs({{payload_type, codec}}); - constexpr int kNumPackets = 5; - AudioFrame frame; - for (int n = 0; n < kNumPackets; ++n) { - const int num_10ms_frames = InsertOnePacketOfSilence(info); - for (int k = 0; k < num_10ms_frames; ++k) { - bool muted; - ASSERT_EQ(0, receiver_->GetAudio(info.sample_rate_hz, &frame, &muted)); - } - } - EXPECT_EQ(AudioFrame::kVadPassive, frame.vad_activity_); -} - -class AcmReceiverTestPostDecodeVadPassiveOldApi : public AcmReceiverTestOldApi { - protected: - AcmReceiverTestPostDecodeVadPassiveOldApi() { - config_.neteq_config.enable_post_decode_vad = false; - } -}; - -#if defined(WEBRTC_ANDROID) -#define MAYBE_PostdecodingVad DISABLED_PostdecodingVad -#else -#define MAYBE_PostdecodingVad PostdecodingVad -#endif -TEST_F(AcmReceiverTestPostDecodeVadPassiveOldApi, MAYBE_PostdecodingVad) { - EXPECT_FALSE(config_.neteq_config.enable_post_decode_vad); - constexpr int payload_type = 34; - const SdpAudioFormat codec = {"L16", 16000, 1}; - const AudioCodecInfo info = SetEncoder(payload_type, codec); - auto const value = encoder_factory_->QueryAudioEncoder(codec); - ASSERT_TRUE(value.has_value()); - receiver_->SetCodecs({{payload_type, codec}}); - const int kNumPackets = 5; - AudioFrame frame; - for (int n = 0; n < kNumPackets; ++n) { - const int num_10ms_frames = InsertOnePacketOfSilence(info); - for (int k = 0; k < num_10ms_frames; ++k) { - bool muted; - ASSERT_EQ(0, receiver_->GetAudio(info.sample_rate_hz, &frame, &muted)); - } - } - EXPECT_EQ(AudioFrame::kVadUnknown, frame.vad_activity_); -} - -#if defined(WEBRTC_ANDROID) #define MAYBE_LastAudioCodec DISABLED_LastAudioCodec #else #define MAYBE_LastAudioCodec LastAudioCodec diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.cc b/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.cc index 46ac671b30..ff7e919d9b 100644 --- a/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.cc +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.cc @@ -58,6 +58,11 @@ int AudioDecoderPcmU::PacketDuration(const uint8_t* encoded, return static_cast<int>(encoded_len / Channels()); } +int AudioDecoderPcmU::PacketDurationRedundant(const uint8_t* encoded, + size_t encoded_len) const { + return PacketDuration(encoded, encoded_len); +} + void AudioDecoderPcmA::Reset() {} std::vector<AudioDecoder::ParseResult> AudioDecoderPcmA::ParsePayload( @@ -99,4 +104,9 @@ int AudioDecoderPcmA::PacketDuration(const uint8_t* encoded, return static_cast<int>(encoded_len / Channels()); } +int AudioDecoderPcmA::PacketDurationRedundant(const uint8_t* encoded, + size_t encoded_len) const { + return PacketDuration(encoded, encoded_len); +} + } // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.h b/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.h index 3fa42cba30..5531d6e7f0 100644 --- a/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.h +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.h @@ -35,6 +35,8 @@ class AudioDecoderPcmU final : public AudioDecoder { std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload, uint32_t timestamp) override; int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override; + int PacketDurationRedundant(const uint8_t* encoded, + size_t encoded_len) const override; int SampleRateHz() const override; size_t Channels() const override; @@ -62,6 +64,8 @@ class AudioDecoderPcmA final : public AudioDecoder { std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload, uint32_t timestamp) override; int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override; + int PacketDurationRedundant(const uint8_t* encoded, + size_t encoded_len) const override; int SampleRateHz() const override; size_t Channels() const override; diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.cc b/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.cc index e969ed1189..bca47cea13 100644 --- a/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.cc +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.cc @@ -63,6 +63,11 @@ int AudioDecoderG722Impl::PacketDuration(const uint8_t* encoded, return static_cast<int>(2 * encoded_len / Channels()); } +int AudioDecoderG722Impl::PacketDurationRedundant(const uint8_t* encoded, + size_t encoded_len) const { + return PacketDuration(encoded, encoded_len); +} + int AudioDecoderG722Impl::SampleRateHz() const { return 16000; } diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.h b/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.h index 5872fad5de..e7083c3fd6 100644 --- a/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.h +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.h @@ -30,6 +30,8 @@ class AudioDecoderG722Impl final : public AudioDecoder { std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload, uint32_t timestamp) override; int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override; + int PacketDurationRedundant(const uint8_t* encoded, + size_t encoded_len) const override; int SampleRateHz() const override; size_t Channels() const override; diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.cc index cff9685548..0f53409f48 100644 --- a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.cc +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.cc @@ -17,12 +17,15 @@ #include "api/array_view.h" #include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h" #include "rtc_base/checks.h" +#include "system_wrappers/include/field_trial.h" namespace webrtc { AudioDecoderOpusImpl::AudioDecoderOpusImpl(size_t num_channels, int sample_rate_hz) - : channels_{num_channels}, sample_rate_hz_{sample_rate_hz} { + : channels_(num_channels), + sample_rate_hz_(sample_rate_hz), + generate_plc_(field_trial::IsEnabled("WebRTC-Audio-OpusGeneratePlc")) { RTC_DCHECK(num_channels == 1 || num_channels == 2); RTC_DCHECK(sample_rate_hz == 16000 || sample_rate_hz == 48000); const int error = @@ -125,4 +128,22 @@ size_t AudioDecoderOpusImpl::Channels() const { return channels_; } +void AudioDecoderOpusImpl::GeneratePlc( + size_t requested_samples_per_channel, + rtc::BufferT<int16_t>* concealment_audio) { + if (!generate_plc_) { + return; + } + int plc_size = WebRtcOpus_PlcDuration(dec_state_) * channels_; + concealment_audio->AppendData(plc_size, [&](rtc::ArrayView<int16_t> decoded) { + int16_t temp_type = 1; + int ret = + WebRtcOpus_Decode(dec_state_, nullptr, 0, decoded.data(), &temp_type); + if (ret < 0) { + return 0; + } + return ret; + }); +} + } // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.h b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.h index e8fd0440bc..2dd62fd4ee 100644 --- a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.h +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.h @@ -40,6 +40,8 @@ class AudioDecoderOpusImpl final : public AudioDecoder { bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const override; int SampleRateHz() const override; size_t Channels() const override; + void GeneratePlc(size_t requested_samples_per_channel, + rtc::BufferT<int16_t>* concealment_audio) override; protected: int DecodeInternal(const uint8_t* encoded, @@ -57,6 +59,7 @@ class AudioDecoderOpusImpl final : public AudioDecoder { OpusDecInst* dec_state_; const size_t channels_; const int sample_rate_hz_; + const bool generate_plc_; }; } // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.cc b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.cc index 7761efe8b3..1e2b5db331 100644 --- a/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.cc +++ b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.cc @@ -67,4 +67,9 @@ int AudioDecoderPcm16B::PacketDuration(const uint8_t* encoded, return static_cast<int>(encoded_len / (2 * Channels())); } +int AudioDecoderPcm16B::PacketDurationRedundant(const uint8_t* encoded, + size_t encoded_len) const { + return PacketDuration(encoded, encoded_len); +} + } // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h index 6f50161d3f..c31cc5d0a2 100644 --- a/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h +++ b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h @@ -32,6 +32,8 @@ class AudioDecoderPcm16B final : public AudioDecoder { std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload, uint32_t timestamp) override; int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override; + int PacketDurationRedundant(const uint8_t* encoded, + size_t encoded_len) const override; int SampleRateHz() const override; size_t Channels() const override; diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/background_noise.cc b/third_party/libwebrtc/modules/audio_coding/neteq/background_noise.cc index 2c95d3b390..0c33dba47a 100644 --- a/third_party/libwebrtc/modules/audio_coding/neteq/background_noise.cc +++ b/third_party/libwebrtc/modules/audio_coding/neteq/background_noise.cc @@ -17,7 +17,6 @@ #include "common_audio/signal_processing/include/signal_processing_library.h" #include "modules/audio_coding/neteq/audio_multi_vector.h" #include "modules/audio_coding/neteq/cross_correlation.h" -#include "modules/audio_coding/neteq/post_decode_vad.h" namespace webrtc { namespace { @@ -44,17 +43,11 @@ void BackgroundNoise::Reset() { } } -bool BackgroundNoise::Update(const AudioMultiVector& input, - const PostDecodeVad& vad) { +bool BackgroundNoise::Update(const AudioMultiVector& sync_buffer) { bool filter_params_saved = false; - if (vad.running() && vad.active_speech()) { - // Do not update the background noise parameters if we know that the signal - // is active speech. - return filter_params_saved; - } int32_t auto_correlation[kMaxLpcOrder + 1]; - int16_t fiter_output[kMaxLpcOrder + kResidualLength]; + int16_t filter_output[kMaxLpcOrder + kResidualLength]; int16_t reflection_coefficients[kMaxLpcOrder]; int16_t lpc_coefficients[kMaxLpcOrder + 1]; @@ -62,14 +55,13 @@ bool BackgroundNoise::Update(const AudioMultiVector& input, ChannelParameters& parameters = channel_parameters_[channel_ix]; int16_t temp_signal_array[kVecLen + kMaxLpcOrder] = {0}; int16_t* temp_signal = &temp_signal_array[kMaxLpcOrder]; - RTC_DCHECK_GE(input.Size(), kVecLen); - input[channel_ix].CopyTo(kVecLen, input.Size() - kVecLen, temp_signal); + RTC_DCHECK_GE(sync_buffer.Size(), kVecLen); + sync_buffer[channel_ix].CopyTo(kVecLen, sync_buffer.Size() - kVecLen, + temp_signal); int32_t sample_energy = CalculateAutoCorrelation(temp_signal, kVecLen, auto_correlation); - if ((!vad.running() && - sample_energy < parameters.energy_update_threshold) || - (vad.running() && !vad.active_speech())) { + if (sample_energy < parameters.energy_update_threshold) { // Generate LPC coefficients. if (auto_correlation[0] <= 0) { // Center value in auto-correlation is not positive. Do not update. @@ -95,10 +87,10 @@ bool BackgroundNoise::Update(const AudioMultiVector& input, // Generate the CNG gain factor by looking at the energy of the residual. WebRtcSpl_FilterMAFastQ12(temp_signal + kVecLen - kResidualLength, - fiter_output, lpc_coefficients, + filter_output, lpc_coefficients, kMaxLpcOrder + 1, kResidualLength); int32_t residual_energy = WebRtcSpl_DotProductWithScale( - fiter_output, fiter_output, kResidualLength, 0); + filter_output, filter_output, kResidualLength, 0); // Check spectral flatness. // Comparing the residual variance with the input signal variance tells @@ -117,9 +109,8 @@ bool BackgroundNoise::Update(const AudioMultiVector& input, filter_params_saved = true; } } else { - // Will only happen if post-decode VAD is disabled and `sample_energy` is - // not low enough. Increase the threshold for update so that it increases - // by a factor 4 in 4 seconds. + // Will only happen if `sample_energy` is not low enough. Increase the + // threshold for update so that it increases by a factor 4 in 4 seconds. IncrementEnergyThreshold(channel_ix, sample_energy); } } diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/background_noise.h b/third_party/libwebrtc/modules/audio_coding/neteq/background_noise.h index 8e6d5890a0..9ef0131c92 100644 --- a/third_party/libwebrtc/modules/audio_coding/neteq/background_noise.h +++ b/third_party/libwebrtc/modules/audio_coding/neteq/background_noise.h @@ -39,9 +39,9 @@ class BackgroundNoise { void Reset(); // Updates the parameter estimates based on the signal currently in the - // `sync_buffer`, and on the latest decision in `vad` if it is running. + // `sync_buffer`. // Returns true if the filter parameters are updated. - bool Update(const AudioMultiVector& sync_buffer, const PostDecodeVad& vad); + bool Update(const AudioMultiVector& sync_buffer); // Generates background noise given a random vector and writes the output to // `buffer`. diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic.cc b/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic.cc index 6648fd8709..f68c05767d 100644 --- a/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic.cc +++ b/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic.cc @@ -14,7 +14,6 @@ #include <cstdint> #include <memory> -#include <string> #include "absl/types/optional.h" #include "api/neteq/neteq.h" @@ -22,7 +21,6 @@ #include "modules/audio_coding/neteq/packet_arrival_history.h" #include "modules/audio_coding/neteq/packet_buffer.h" #include "rtc_base/checks.h" -#include "rtc_base/experiments/field_trial_parser.h" #include "rtc_base/experiments/struct_parameters_parser.h" #include "rtc_base/logging.h" #include "rtc_base/numerics/safe_conversions.h" @@ -102,6 +100,7 @@ DecisionLogic::DecisionLogic( packet_arrival_history_(packet_arrival_history ? std::move(packet_arrival_history) : std::make_unique<PacketArrivalHistory>( + config.tick_timer, config_.packet_history_size_ms)), tick_timer_(config.tick_timer), disallow_time_stretching_(!config.allow_time_stretching), @@ -221,14 +220,14 @@ absl::optional<int> DecisionLogic::PacketArrived( packet_length_samples_ = info.packet_length_samples; delay_manager_->SetPacketAudioLength(packet_length_samples_ * 1000 / fs_hz); } - int64_t time_now_ms = tick_timer_->ticks() * tick_timer_->ms_per_tick(); - packet_arrival_history_->Insert(info.main_timestamp, time_now_ms); - if (packet_arrival_history_->size() < 2) { + bool inserted = packet_arrival_history_->Insert(info.main_timestamp, + info.packet_length_samples); + if (!inserted || packet_arrival_history_->size() < 2) { // No meaningful delay estimate unless at least 2 packets have arrived. return absl::nullopt; } int arrival_delay_ms = - packet_arrival_history_->GetDelayMs(info.main_timestamp, time_now_ms); + packet_arrival_history_->GetDelayMs(info.main_timestamp); bool reordered = !packet_arrival_history_->IsNewestRtpTimestamp(info.main_timestamp); delay_manager_->Update(arrival_delay_ms, reordered); @@ -464,8 +463,7 @@ int DecisionLogic::GetPlayoutDelayMs( NetEqController::NetEqStatus status) const { uint32_t playout_timestamp = status.target_timestamp - status.sync_buffer_samples; - return packet_arrival_history_->GetDelayMs( - playout_timestamp, tick_timer_->ticks() * tick_timer_->ms_per_tick()); + return packet_arrival_history_->GetDelayMs(playout_timestamp); } } // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic_unittest.cc index 9e9902af50..4b306f2639 100644 --- a/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic_unittest.cc +++ b/third_party/libwebrtc/modules/audio_coding/neteq/decision_logic_unittest.cc @@ -14,12 +14,10 @@ #include "api/neteq/neteq_controller.h" #include "api/neteq/tick_timer.h" -#include "modules/audio_coding/neteq/buffer_level_filter.h" #include "modules/audio_coding/neteq/delay_manager.h" #include "modules/audio_coding/neteq/mock/mock_buffer_level_filter.h" #include "modules/audio_coding/neteq/mock/mock_delay_manager.h" #include "modules/audio_coding/neteq/mock/mock_packet_arrival_history.h" -#include "test/field_trial.h" #include "test/gtest.h" namespace webrtc { @@ -64,7 +62,8 @@ class DecisionLogicTest : public ::testing::Test { mock_delay_manager_ = delay_manager.get(); auto buffer_level_filter = std::make_unique<MockBufferLevelFilter>(); mock_buffer_level_filter_ = buffer_level_filter.get(); - auto packet_arrival_history = std::make_unique<MockPacketArrivalHistory>(); + auto packet_arrival_history = + std::make_unique<MockPacketArrivalHistory>(&tick_timer_); mock_packet_arrival_history_ = packet_arrival_history.get(); decision_logic_ = std::make_unique<DecisionLogic>( config, std::move(delay_manager), std::move(buffer_level_filter), @@ -82,7 +81,7 @@ class DecisionLogicTest : public ::testing::Test { TEST_F(DecisionLogicTest, NormalOperation) { EXPECT_CALL(*mock_delay_manager_, TargetDelayMs()) .WillRepeatedly(Return(100)); - EXPECT_CALL(*mock_packet_arrival_history_, GetDelayMs(_, _)) + EXPECT_CALL(*mock_packet_arrival_history_, GetDelayMs(_)) .WillRepeatedly(Return(100)); EXPECT_CALL(*mock_packet_arrival_history_, GetMaxDelayMs()) .WillRepeatedly(Return(0)); @@ -98,7 +97,7 @@ TEST_F(DecisionLogicTest, NormalOperation) { TEST_F(DecisionLogicTest, Accelerate) { EXPECT_CALL(*mock_delay_manager_, TargetDelayMs()) .WillRepeatedly(Return(100)); - EXPECT_CALL(*mock_packet_arrival_history_, GetDelayMs(_, _)) + EXPECT_CALL(*mock_packet_arrival_history_, GetDelayMs(_)) .WillRepeatedly(Return(150)); EXPECT_CALL(*mock_packet_arrival_history_, GetMaxDelayMs()) .WillRepeatedly(Return(0)); @@ -114,7 +113,7 @@ TEST_F(DecisionLogicTest, Accelerate) { TEST_F(DecisionLogicTest, FastAccelerate) { EXPECT_CALL(*mock_delay_manager_, TargetDelayMs()) .WillRepeatedly(Return(100)); - EXPECT_CALL(*mock_packet_arrival_history_, GetDelayMs(_, _)) + EXPECT_CALL(*mock_packet_arrival_history_, GetDelayMs(_)) .WillRepeatedly(Return(500)); EXPECT_CALL(*mock_packet_arrival_history_, GetMaxDelayMs()) .WillRepeatedly(Return(0)); @@ -130,7 +129,7 @@ TEST_F(DecisionLogicTest, FastAccelerate) { TEST_F(DecisionLogicTest, PreemptiveExpand) { EXPECT_CALL(*mock_delay_manager_, TargetDelayMs()) .WillRepeatedly(Return(100)); - EXPECT_CALL(*mock_packet_arrival_history_, GetDelayMs(_, _)) + EXPECT_CALL(*mock_packet_arrival_history_, GetDelayMs(_)) .WillRepeatedly(Return(50)); EXPECT_CALL(*mock_packet_arrival_history_, GetMaxDelayMs()) .WillRepeatedly(Return(0)); diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_packet_arrival_history.h b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_packet_arrival_history.h index 1b2080cd94..d4217cf2f8 100644 --- a/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_packet_arrival_history.h +++ b/third_party/libwebrtc/modules/audio_coding/neteq/mock/mock_packet_arrival_history.h @@ -11,6 +11,7 @@ #ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_PACKET_ARRIVAL_HISTORY_H_ #define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_PACKET_ARRIVAL_HISTORY_H_ +#include "api/neteq/tick_timer.h" #include "modules/audio_coding/neteq/packet_arrival_history.h" #include "test/gmock.h" @@ -18,12 +19,10 @@ namespace webrtc { class MockPacketArrivalHistory : public PacketArrivalHistory { public: - MockPacketArrivalHistory() : PacketArrivalHistory(0) {} + MockPacketArrivalHistory(const TickTimer* tick_timer) + : PacketArrivalHistory(tick_timer, 0) {} - MOCK_METHOD(int, - GetDelayMs, - (uint32_t rtp_timestamp, int64_t time_ms), - (const override)); + MOCK_METHOD(int, GetDelayMs, (uint32_t rtp_timestamp), (const override)); MOCK_METHOD(int, GetMaxDelayMs, (), (const override)); }; diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl.cc b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl.cc index e5c8bf6c08..6a76096b49 100644 --- a/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl.cc +++ b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl.cc @@ -20,6 +20,7 @@ #include <vector> #include "api/audio_codecs/audio_decoder.h" +#include "api/neteq/neteq_controller.h" #include "api/neteq/tick_timer.h" #include "common_audio/signal_processing/include/signal_processing_library.h" #include "modules/audio_coding/codecs/cng/webrtc_cng.h" @@ -36,7 +37,6 @@ #include "modules/audio_coding/neteq/normal.h" #include "modules/audio_coding/neteq/packet.h" #include "modules/audio_coding/neteq/packet_buffer.h" -#include "modules/audio_coding/neteq/post_decode_vad.h" #include "modules/audio_coding/neteq/preemptive_expand.h" #include "modules/audio_coding/neteq/red_payload_splitter.h" #include "modules/audio_coding/neteq/statistics_calculator.h" @@ -50,6 +50,7 @@ #include "rtc_base/strings/audio_format_to_string.h" #include "rtc_base/trace_event.h" #include "system_wrappers/include/clock.h" +#include "system_wrappers/include/field_trial.h" namespace webrtc { namespace { @@ -70,49 +71,26 @@ std::unique_ptr<NetEqController> CreateNetEqController( return controller_factory.CreateNetEqController(config); } -void SetAudioFrameActivityAndType(bool vad_enabled, - NetEqImpl::OutputType type, - AudioFrame::VADActivity last_vad_activity, - AudioFrame* audio_frame) { +AudioFrame::SpeechType ToSpeechType(NetEqImpl::OutputType type) { switch (type) { case NetEqImpl::OutputType::kNormalSpeech: { - audio_frame->speech_type_ = AudioFrame::kNormalSpeech; - audio_frame->vad_activity_ = AudioFrame::kVadActive; - break; - } - case NetEqImpl::OutputType::kVadPassive: { - // This should only be reached if the VAD is enabled. - RTC_DCHECK(vad_enabled); - audio_frame->speech_type_ = AudioFrame::kNormalSpeech; - audio_frame->vad_activity_ = AudioFrame::kVadPassive; - break; + return AudioFrame::kNormalSpeech; } case NetEqImpl::OutputType::kCNG: { - audio_frame->speech_type_ = AudioFrame::kCNG; - audio_frame->vad_activity_ = AudioFrame::kVadPassive; - break; + return AudioFrame::kCNG; } case NetEqImpl::OutputType::kPLC: { - audio_frame->speech_type_ = AudioFrame::kPLC; - audio_frame->vad_activity_ = last_vad_activity; - break; + return AudioFrame::kPLC; } case NetEqImpl::OutputType::kPLCCNG: { - audio_frame->speech_type_ = AudioFrame::kPLCCNG; - audio_frame->vad_activity_ = AudioFrame::kVadPassive; - break; + return AudioFrame::kPLCCNG; } case NetEqImpl::OutputType::kCodecPLC: { - audio_frame->speech_type_ = AudioFrame::kCodecPLC; - audio_frame->vad_activity_ = last_vad_activity; - break; + return AudioFrame::kCodecPLC; } default: RTC_DCHECK_NOTREACHED(); - } - if (!vad_enabled) { - // Always set kVadUnknown when receive VAD is inactive. - audio_frame->vad_activity_ = AudioFrame::kVadUnknown; + return AudioFrame::kUndefined; } } @@ -169,11 +147,12 @@ NetEqImpl::NetEqImpl(const NetEq::Config& config, packet_buffer_(std::move(deps.packet_buffer)), red_payload_splitter_(std::move(deps.red_payload_splitter)), timestamp_scaler_(std::move(deps.timestamp_scaler)), - vad_(new PostDecodeVad()), expand_factory_(std::move(deps.expand_factory)), accelerate_factory_(std::move(deps.accelerate_factory)), preemptive_expand_factory_(std::move(deps.preemptive_expand_factory)), stats_(std::move(deps.stats)), + enable_fec_delay_adaptation_( + !field_trial::IsDisabled("WebRTC-Audio-NetEqFecDelayAdaptation")), controller_(std::move(deps.neteq_controller)), last_mode_(Mode::kNormal), decoded_buffer_length_(kMaxFrameSize), @@ -211,10 +190,6 @@ NetEqImpl::NetEqImpl(const NetEq::Config& config, if (create_components) { SetSampleRateAndChannels(fs, 1); // Default is 1 channel. } - RTC_DCHECK(!vad_->enabled()); - if (config.enable_post_decode_vad) { - vad_->Enable(); - } } NetEqImpl::~NetEqImpl() = default; @@ -252,9 +227,7 @@ int NetEqImpl::GetAudio(AudioFrame* audio_frame, audio_frame->sample_rate_hz_, rtc::dchecked_cast<int>(audio_frame->samples_per_channel_ * 100)); RTC_DCHECK_EQ(*muted, audio_frame->muted()); - SetAudioFrameActivityAndType(vad_->enabled(), LastOutputType(), - last_vad_activity_, audio_frame); - last_vad_activity_ = audio_frame->vad_activity_; + audio_frame->speech_type_ = ToSpeechType(LastOutputType()); last_output_sample_rate_hz_ = audio_frame->sample_rate_hz_; RTC_DCHECK(last_output_sample_rate_hz_ == 8000 || last_output_sample_rate_hz_ == 16000 || @@ -398,18 +371,6 @@ NetEqOperationsAndState NetEqImpl::GetOperationsAndState() const { return result; } -void NetEqImpl::EnableVad() { - MutexLock lock(&mutex_); - RTC_DCHECK(vad_.get()); - vad_->Enable(); -} - -void NetEqImpl::DisableVad() { - MutexLock lock(&mutex_); - RTC_DCHECK(vad_.get()); - vad_->Disable(); -} - absl::optional<uint32_t> NetEqImpl::GetPlayoutTimestamp() const { MutexLock lock(&mutex_); if (first_packet_ || last_mode_ == Mode::kRfc3389Cng || @@ -695,6 +656,7 @@ int NetEqImpl::InsertPacketInternal(const RTPHeader& rtp_header, packet_buffer_->Flush(); buffer_flush_occured = true; } + NetEqController::PacketArrivedInfo info = ToPacketArrivedInfo(packet); int return_val = packet_buffer_->InsertPacket(std::move(packet)); if (return_val == PacketBuffer::kFlushed) { buffer_flush_occured = true; @@ -702,6 +664,15 @@ int NetEqImpl::InsertPacketInternal(const RTPHeader& rtp_header, // An error occurred. return kOtherError; } + if (enable_fec_delay_adaptation_) { + info.buffer_flush = buffer_flush_occured; + const bool should_update_stats = !new_codec_ && !buffer_flush_occured; + auto relative_delay = + controller_->PacketArrived(fs_hz_, should_update_stats, info); + if (relative_delay) { + stats_->RelativePacketArrivalDelay(relative_delay.value()); + } + } } if (buffer_flush_occured) { @@ -752,24 +723,26 @@ int NetEqImpl::InsertPacketInternal(const RTPHeader& rtp_header, } } - const DecoderDatabase::DecoderInfo* dec_info = - decoder_database_->GetDecoderInfo(main_payload_type); - RTC_DCHECK(dec_info); // Already checked that the payload type is known. - - NetEqController::PacketArrivedInfo info; - info.is_cng_or_dtmf = dec_info->IsComfortNoise() || dec_info->IsDtmf(); - info.packet_length_samples = - number_of_primary_packets * decoder_frame_length_; - info.main_timestamp = main_timestamp; - info.main_sequence_number = main_sequence_number; - info.is_dtx = is_dtx; - info.buffer_flush = buffer_flush_occured; - - const bool should_update_stats = !new_codec_; - auto relative_delay = - controller_->PacketArrived(fs_hz_, should_update_stats, info); - if (relative_delay) { - stats_->RelativePacketArrivalDelay(relative_delay.value()); + if (!enable_fec_delay_adaptation_) { + const DecoderDatabase::DecoderInfo* dec_info = + decoder_database_->GetDecoderInfo(main_payload_type); + RTC_DCHECK(dec_info); // Already checked that the payload type is known. + + NetEqController::PacketArrivedInfo info; + info.is_cng_or_dtmf = dec_info->IsComfortNoise() || dec_info->IsDtmf(); + info.packet_length_samples = + number_of_primary_packets * decoder_frame_length_; + info.main_timestamp = main_timestamp; + info.main_sequence_number = main_sequence_number; + info.is_dtx = is_dtx; + info.buffer_flush = buffer_flush_occured; + + const bool should_update_stats = !new_codec_; + auto relative_delay = + controller_->PacketArrived(fs_hz_, should_update_stats, info); + if (relative_delay) { + stats_->RelativePacketArrivalDelay(relative_delay.value()); + } } return 0; } @@ -858,11 +831,8 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame, last_decoded_type_ = speech_type; } - RTC_DCHECK(vad_.get()); bool sid_frame_available = (operation == Operation::kRfc3389Cng && !packet_list.empty()); - vad_->Update(decoded_buffer_.get(), static_cast<size_t>(length), speech_type, - sid_frame_available, fs_hz_); // This is the criterion that we did decode some data through the speech // decoder, and the operation resulted in comfort noise. @@ -1012,7 +982,7 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame, (last_mode_ == Mode::kPreemptiveExpandFail) || (last_mode_ == Mode::kRfc3389Cng) || (last_mode_ == Mode::kCodecInternalCng)) { - background_noise_->Update(*sync_buffer_, *vad_.get()); + background_noise_->Update(*sync_buffer_); } if (operation == Operation::kDtmf) { @@ -2088,10 +2058,6 @@ void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) { if (cng_decoder) cng_decoder->Reset(); - // Reinit post-decode VAD with new sample rate. - RTC_DCHECK(vad_.get()); // Cannot be NULL here. - vad_->Init(); - // Delete algorithm buffer and create a new one. algorithm_buffer_.reset(new AudioMultiVector(channels)); @@ -2132,7 +2098,6 @@ void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) { } NetEqImpl::OutputType NetEqImpl::LastOutputType() { - RTC_DCHECK(vad_.get()); RTC_DCHECK(expand_.get()); if (last_mode_ == Mode::kCodecInternalCng || last_mode_ == Mode::kRfc3389Cng) { @@ -2142,12 +2107,27 @@ NetEqImpl::OutputType NetEqImpl::LastOutputType() { return OutputType::kPLCCNG; } else if (last_mode_ == Mode::kExpand) { return OutputType::kPLC; - } else if (vad_->running() && !vad_->active_speech()) { - return OutputType::kVadPassive; } else if (last_mode_ == Mode::kCodecPlc) { return OutputType::kCodecPLC; } else { return OutputType::kNormalSpeech; } } + +NetEqController::PacketArrivedInfo NetEqImpl::ToPacketArrivedInfo( + const Packet& packet) const { + const DecoderDatabase::DecoderInfo* dec_info = + decoder_database_->GetDecoderInfo(packet.payload_type); + + NetEqController::PacketArrivedInfo info; + info.is_cng_or_dtmf = + dec_info && (dec_info->IsComfortNoise() || dec_info->IsDtmf()); + info.packet_length_samples = + packet.frame ? packet.frame->Duration() : decoder_frame_length_; + info.main_timestamp = packet.timestamp; + info.main_sequence_number = packet.sequence_number; + info.is_dtx = packet.frame && packet.frame->IsDtxPacket(); + return info; +} + } // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl.h b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl.h index f8f2b06410..eed7645e7d 100644 --- a/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl.h +++ b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_impl.h @@ -48,7 +48,6 @@ class Merge; class NackTracker; class Normal; class RedPayloadSplitter; -class PostDecodeVad; class PreemptiveExpand; class RandomVector; class SyncBuffer; @@ -171,13 +170,6 @@ class NetEqImpl : public webrtc::NetEq { NetEqOperationsAndState GetOperationsAndState() const override; - // Enables post-decode VAD. When enabled, GetAudio() will return - // kOutputVADPassive when the signal contains no speech. - void EnableVad() override; - - // Disables post-decode VAD. - void DisableVad() override; - absl::optional<uint32_t> GetPlayoutTimestamp() const override; int last_output_sample_rate_hz() const override; @@ -342,6 +334,9 @@ class NetEqImpl : public webrtc::NetEq { NetEqNetworkStatistics CurrentNetworkStatisticsInternal() const RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + NetEqController::PacketArrivedInfo ToPacketArrivedInfo( + const Packet& packet) const RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + Clock* const clock_; mutable Mutex mutex_; @@ -356,13 +351,13 @@ class NetEqImpl : public webrtc::NetEq { RTC_GUARDED_BY(mutex_); const std::unique_ptr<TimestampScaler> timestamp_scaler_ RTC_GUARDED_BY(mutex_); - const std::unique_ptr<PostDecodeVad> vad_ RTC_GUARDED_BY(mutex_); const std::unique_ptr<ExpandFactory> expand_factory_ RTC_GUARDED_BY(mutex_); const std::unique_ptr<AccelerateFactory> accelerate_factory_ RTC_GUARDED_BY(mutex_); const std::unique_ptr<PreemptiveExpandFactory> preemptive_expand_factory_ RTC_GUARDED_BY(mutex_); const std::unique_ptr<StatisticsCalculator> stats_ RTC_GUARDED_BY(mutex_); + const bool enable_fec_delay_adaptation_ RTC_GUARDED_BY(mutex_); std::unique_ptr<BackgroundNoise> background_noise_ RTC_GUARDED_BY(mutex_); std::unique_ptr<NetEqController> controller_ RTC_GUARDED_BY(mutex_); @@ -397,8 +392,6 @@ class NetEqImpl : public webrtc::NetEq { std::unique_ptr<NackTracker> nack_ RTC_GUARDED_BY(mutex_); bool nack_enabled_ RTC_GUARDED_BY(mutex_); const bool enable_muted_state_ RTC_GUARDED_BY(mutex_); - AudioFrame::VADActivity last_vad_activity_ RTC_GUARDED_BY(mutex_) = - AudioFrame::kVadPassive; std::unique_ptr<TickTimer::Stopwatch> generated_noise_stopwatch_ RTC_GUARDED_BY(mutex_); std::vector<RtpPacketInfo> last_decoded_packet_infos_ RTC_GUARDED_BY(mutex_); diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/neteq_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_unittest.cc index aec7e580ec..7104b7a6dc 100644 --- a/third_party/libwebrtc/modules/audio_coding/neteq/neteq_unittest.cc +++ b/third_party/libwebrtc/modules/audio_coding/neteq/neteq_unittest.cc @@ -76,12 +76,13 @@ TEST_F(NetEqDecodingTest, MAYBE_TestOpusBitExactness) { webrtc::test::ResourcePath("audio_coding/neteq_opus", "rtp"); const std::string output_checksum = - "2efdbea92c3fb2383c59f89d881efec9f94001d0|" - "a6831b946b59913852ae3e53f99fa8f209bb23cd"; + "434bdc4ec08546510ee903d001c8be1a01c44e24|" + "4336be0091e2faad7a194c16ee0a05e727325727|" + "cefd2de4adfa8f6a9b66a3639ad63c2f6779d0cd"; const std::string network_stats_checksum = - "dfaf4399fd60293405290476ccf1c05c807c71a0|" - "076662525572dba753b11578330bd491923f7f5e"; + "5f2c8e3dff9cff55dd7a9f4167939de001566d95|" + "80ab17c17da030d4f2dfbf314ac44aacdadd7f0c"; DecodeAndCompare(input_rtp_file, output_checksum, network_stats_checksum, absl::GetFlag(FLAGS_gen_ref)); diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.cc b/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.cc index 2077383f76..a36c8a2b06 100644 --- a/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.cc +++ b/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.cc @@ -11,95 +11,122 @@ #include "modules/audio_coding/neteq/packet_arrival_history.h" #include <algorithm> +#include <cstdint> #include "api/neteq/tick_timer.h" +#include "rtc_base/checks.h" namespace webrtc { -PacketArrivalHistory::PacketArrivalHistory(int window_size_ms) - : window_size_ms_(window_size_ms) {} +PacketArrivalHistory::PacketArrivalHistory(const TickTimer* tick_timer, + int window_size_ms) + : tick_timer_(tick_timer), window_size_ms_(window_size_ms) {} -void PacketArrivalHistory::Insert(uint32_t rtp_timestamp, - int64_t arrival_time_ms) { - RTC_DCHECK(sample_rate_khz_ > 0); - int64_t unwrapped_rtp_timestamp = timestamp_unwrapper_.Unwrap(rtp_timestamp); - if (!newest_rtp_timestamp_ || - unwrapped_rtp_timestamp > *newest_rtp_timestamp_) { - newest_rtp_timestamp_ = unwrapped_rtp_timestamp; +bool PacketArrivalHistory::Insert(uint32_t rtp_timestamp, + int packet_length_samples) { + int64_t arrival_timestamp = + tick_timer_->ticks() * tick_timer_->ms_per_tick() * sample_rate_khz_; + PacketArrival packet(timestamp_unwrapper_.Unwrap(rtp_timestamp), + arrival_timestamp, packet_length_samples); + if (IsObsolete(packet)) { + return false; } - history_.emplace_back(unwrapped_rtp_timestamp / sample_rate_khz_, - arrival_time_ms); - MaybeUpdateCachedArrivals(history_.back()); - while (history_.front().rtp_timestamp_ms + window_size_ms_ < - unwrapped_rtp_timestamp / sample_rate_khz_) { - if (&history_.front() == min_packet_arrival_) { - min_packet_arrival_ = nullptr; - } - if (&history_.front() == max_packet_arrival_) { - max_packet_arrival_ = nullptr; - } - history_.pop_front(); + if (Contains(packet)) { + return false; + } + history_.emplace(packet.rtp_timestamp, packet); + if (packet != history_.rbegin()->second) { + // Packet was reordered. + return true; } - if (!min_packet_arrival_ || !max_packet_arrival_) { - for (const PacketArrival& packet : history_) { - MaybeUpdateCachedArrivals(packet); + // Remove old packets. + while (IsObsolete(history_.begin()->second)) { + if (history_.begin()->second == min_packet_arrivals_.front()) { + min_packet_arrivals_.pop_front(); } + if (history_.begin()->second == max_packet_arrivals_.front()) { + max_packet_arrivals_.pop_front(); + } + history_.erase(history_.begin()); } -} - -void PacketArrivalHistory::MaybeUpdateCachedArrivals( - const PacketArrival& packet_arrival) { - if (!min_packet_arrival_ || packet_arrival <= *min_packet_arrival_) { - min_packet_arrival_ = &packet_arrival; + // Ensure ordering constraints. + while (!min_packet_arrivals_.empty() && + packet <= min_packet_arrivals_.back()) { + min_packet_arrivals_.pop_back(); } - if (!max_packet_arrival_ || packet_arrival >= *max_packet_arrival_) { - max_packet_arrival_ = &packet_arrival; + while (!max_packet_arrivals_.empty() && + packet >= max_packet_arrivals_.back()) { + max_packet_arrivals_.pop_back(); } + min_packet_arrivals_.push_back(packet); + max_packet_arrivals_.push_back(packet); + return true; } void PacketArrivalHistory::Reset() { history_.clear(); - min_packet_arrival_ = nullptr; - max_packet_arrival_ = nullptr; + min_packet_arrivals_.clear(); + max_packet_arrivals_.clear(); timestamp_unwrapper_.Reset(); - newest_rtp_timestamp_ = absl::nullopt; } -int PacketArrivalHistory::GetDelayMs(uint32_t rtp_timestamp, - int64_t time_ms) const { - RTC_DCHECK(sample_rate_khz_ > 0); - int64_t unwrapped_rtp_timestamp_ms = - timestamp_unwrapper_.PeekUnwrap(rtp_timestamp) / sample_rate_khz_; - PacketArrival packet(unwrapped_rtp_timestamp_ms, time_ms); +int PacketArrivalHistory::GetDelayMs(uint32_t rtp_timestamp) const { + int64_t unwrapped_rtp_timestamp = + timestamp_unwrapper_.PeekUnwrap(rtp_timestamp); + int64_t current_timestamp = + tick_timer_->ticks() * tick_timer_->ms_per_tick() * sample_rate_khz_; + PacketArrival packet(unwrapped_rtp_timestamp, current_timestamp, + /*duration_ms=*/0); return GetPacketArrivalDelayMs(packet); } int PacketArrivalHistory::GetMaxDelayMs() const { - if (!max_packet_arrival_) { + if (max_packet_arrivals_.empty()) { return 0; } - return GetPacketArrivalDelayMs(*max_packet_arrival_); + return GetPacketArrivalDelayMs(max_packet_arrivals_.front()); } bool PacketArrivalHistory::IsNewestRtpTimestamp(uint32_t rtp_timestamp) const { - if (!newest_rtp_timestamp_) { - return false; + if (history_.empty()) { + return true; } int64_t unwrapped_rtp_timestamp = timestamp_unwrapper_.PeekUnwrap(rtp_timestamp); - return unwrapped_rtp_timestamp == *newest_rtp_timestamp_; + return unwrapped_rtp_timestamp == history_.rbegin()->second.rtp_timestamp; } int PacketArrivalHistory::GetPacketArrivalDelayMs( const PacketArrival& packet_arrival) const { - if (!min_packet_arrival_) { + if (min_packet_arrivals_.empty()) { return 0; } - return std::max(static_cast<int>(packet_arrival.arrival_time_ms - - min_packet_arrival_->arrival_time_ms - - (packet_arrival.rtp_timestamp_ms - - min_packet_arrival_->rtp_timestamp_ms)), - 0); + RTC_DCHECK_NE(sample_rate_khz_, 0); + // TODO(jakobi): Timestamps are first converted to millis for bit-exactness. + return std::max<int>( + packet_arrival.arrival_timestamp / sample_rate_khz_ - + min_packet_arrivals_.front().arrival_timestamp / sample_rate_khz_ - + (packet_arrival.rtp_timestamp / sample_rate_khz_ - + min_packet_arrivals_.front().rtp_timestamp / sample_rate_khz_), + 0); +} + +bool PacketArrivalHistory::IsObsolete( + const PacketArrival& packet_arrival) const { + if (history_.empty()) { + return false; + } + return packet_arrival.rtp_timestamp + window_size_ms_ * sample_rate_khz_ < + history_.rbegin()->second.rtp_timestamp; +} + +bool PacketArrivalHistory::Contains(const PacketArrival& packet_arrival) const { + auto it = history_.upper_bound(packet_arrival.rtp_timestamp); + if (it == history_.begin()) { + return false; + } + --it; + return it->second.contains(packet_arrival); } } // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.h b/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.h index 722caf5688..3fa1ea1fa9 100644 --- a/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.h +++ b/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.h @@ -11,10 +11,11 @@ #ifndef MODULES_AUDIO_CODING_NETEQ_PACKET_ARRIVAL_HISTORY_H_ #define MODULES_AUDIO_CODING_NETEQ_PACKET_ARRIVAL_HISTORY_H_ +#include <cstddef> #include <cstdint> #include <deque> +#include <map> -#include "absl/types/optional.h" #include "api/neteq/tick_timer.h" #include "rtc_base/numerics/sequence_number_unwrapper.h" @@ -25,19 +26,22 @@ namespace webrtc { // pruned. class PacketArrivalHistory { public: - explicit PacketArrivalHistory(int window_size_ms); + explicit PacketArrivalHistory(const TickTimer* tick_timer, + int window_size_ms); virtual ~PacketArrivalHistory() = default; - // Insert packet with `rtp_timestamp` and `arrival_time_ms` into the history. - void Insert(uint32_t rtp_timestamp, int64_t arrival_time_ms); + // Insert packet with `rtp_timestamp` into the history. Returns true if the + // packet was inserted, false if the timestamp is too old or if the timestamp + // already exists. + bool Insert(uint32_t rtp_timestamp, int packet_length_samples); - // The delay for `rtp_timestamp` at `time_ms` is calculated as - // `(time_ms - p.arrival_time_ms) - (rtp_timestamp - p.rtp_timestamp)` - // where `p` is chosen as the packet arrival in the history that maximizes the - // delay. - virtual int GetDelayMs(uint32_t rtp_timestamp, int64_t time_ms) const; + // The delay for `rtp_timestamp` at time `now` is calculated as + // `(now - p.arrival_timestamp) - (rtp_timestamp - p.rtp_timestamp)` where `p` + // is chosen as the packet arrival in the history that maximizes the delay. + virtual int GetDelayMs(uint32_t rtp_timestamp) const; - // Get the maximum packet arrival delay observed in the history. + // Get the maximum packet arrival delay observed in the history, excluding + // reordered packets. virtual int GetMaxDelayMs() const; bool IsNewestRtpTimestamp(uint32_t rtp_timestamp) const; @@ -52,30 +56,53 @@ class PacketArrivalHistory { private: struct PacketArrival { - PacketArrival(int64_t rtp_timestamp_ms, int64_t arrival_time_ms) - : rtp_timestamp_ms(rtp_timestamp_ms), - arrival_time_ms(arrival_time_ms) {} - int64_t rtp_timestamp_ms; - int64_t arrival_time_ms; + PacketArrival(int64_t rtp_timestamp, + int64_t arrival_timestamp, + int length_samples) + : rtp_timestamp(rtp_timestamp), + arrival_timestamp(arrival_timestamp), + length_samples(length_samples) {} + PacketArrival() = default; + int64_t rtp_timestamp; + int64_t arrival_timestamp; + int length_samples; + bool operator==(const PacketArrival& other) const { + return rtp_timestamp == other.rtp_timestamp && + arrival_timestamp == other.arrival_timestamp && + length_samples == other.length_samples; + } + bool operator!=(const PacketArrival& other) const { + return !(*this == other); + } bool operator<=(const PacketArrival& other) const { - return arrival_time_ms - rtp_timestamp_ms <= - other.arrival_time_ms - other.rtp_timestamp_ms; + return arrival_timestamp - rtp_timestamp <= + other.arrival_timestamp - other.rtp_timestamp; } bool operator>=(const PacketArrival& other) const { - return arrival_time_ms - rtp_timestamp_ms >= - other.arrival_time_ms - other.rtp_timestamp_ms; + return arrival_timestamp - rtp_timestamp >= + other.arrival_timestamp - other.rtp_timestamp; + } + bool contains(const PacketArrival& other) const { + return rtp_timestamp <= other.rtp_timestamp && + rtp_timestamp + length_samples >= + other.rtp_timestamp + other.length_samples; } }; - std::deque<PacketArrival> history_; int GetPacketArrivalDelayMs(const PacketArrival& packet_arrival) const; - // Updates `min_packet_arrival_` and `max_packet_arrival_`. - void MaybeUpdateCachedArrivals(const PacketArrival& packet); - const PacketArrival* min_packet_arrival_ = nullptr; - const PacketArrival* max_packet_arrival_ = nullptr; + // Checks if the packet is older than the window size. + bool IsObsolete(const PacketArrival& packet_arrival) const; + // Check if the packet exists or fully overlaps with a packet in the history. + bool Contains(const PacketArrival& packet_arrival) const; + const TickTimer* tick_timer_; const int window_size_ms_; - RtpTimestampUnwrapper timestamp_unwrapper_; - absl::optional<int64_t> newest_rtp_timestamp_; int sample_rate_khz_ = 0; + RtpTimestampUnwrapper timestamp_unwrapper_; + // Packet history ordered by rtp timestamp. + std::map<int64_t, PacketArrival> history_; + // Tracks min/max packet arrivals in `history_` in ascending/descending order. + // Reordered packets are excluded. + std::deque<PacketArrival> min_packet_arrivals_; + std::deque<PacketArrival> max_packet_arrivals_; }; } // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history_unittest.cc index 539a318fe1..dd95fec0f7 100644 --- a/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history_unittest.cc +++ b/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history_unittest.cc @@ -21,32 +21,36 @@ namespace { constexpr int kFs = 8000; constexpr int kFsKhz = kFs / 1000; constexpr int kFrameSizeMs = 20; +constexpr int kFrameSizeSamples = kFrameSizeMs * kFsKhz; constexpr int kWindowSizeMs = 1000; class PacketArrivalHistoryTest : public testing::Test { public: - PacketArrivalHistoryTest() : history_(kWindowSizeMs) { + PacketArrivalHistoryTest() : history_(&tick_timer_, kWindowSizeMs) { history_.set_sample_rate(kFs); } - void IncrementTime(int delta_ms) { time_ms_ += delta_ms; } + void IncrementTime(int delta_ms) { + tick_timer_.Increment(delta_ms / tick_timer_.ms_per_tick()); + } int InsertPacketAndGetDelay(int timestamp_delta_ms) { uint32_t timestamp = timestamp_ + timestamp_delta_ms * kFsKhz; if (timestamp_delta_ms > 0) { timestamp_ = timestamp; } - history_.Insert(timestamp, time_ms_); + EXPECT_TRUE(history_.Insert(timestamp, kFrameSizeSamples)); EXPECT_EQ(history_.IsNewestRtpTimestamp(timestamp), timestamp_delta_ms >= 0); - return history_.GetDelayMs(timestamp, time_ms_); + return history_.GetDelayMs(timestamp); } protected: - int64_t time_ms_ = 0; + TickTimer tick_timer_; PacketArrivalHistory history_; uint32_t timestamp_ = 0x12345678; }; TEST_F(PacketArrivalHistoryTest, RelativeArrivalDelay) { + // Insert first packet. EXPECT_EQ(InsertPacketAndGetDelay(0), 0); IncrementTime(kFrameSizeMs); @@ -56,7 +60,7 @@ TEST_F(PacketArrivalHistoryTest, RelativeArrivalDelay) { EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 20); // Reordered packet. - EXPECT_EQ(InsertPacketAndGetDelay(-2 * kFrameSizeMs), 60); + EXPECT_EQ(InsertPacketAndGetDelay(-3 * kFrameSizeMs), 80); IncrementTime(2 * kFrameSizeMs); EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 40); @@ -68,7 +72,7 @@ TEST_F(PacketArrivalHistoryTest, RelativeArrivalDelay) { EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 20); // Earlier packet is now more delayed due to the new reference packet. - EXPECT_EQ(history_.GetMaxDelayMs(), 100); + EXPECT_EQ(history_.GetMaxDelayMs(), 80); } TEST_F(PacketArrivalHistoryTest, ReorderedPackets) { @@ -86,7 +90,7 @@ TEST_F(PacketArrivalHistoryTest, ReorderedPackets) { IncrementTime(4 * kFrameSizeMs); EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 60); - EXPECT_EQ(history_.GetMaxDelayMs(), 80); + EXPECT_EQ(history_.GetMaxDelayMs(), 60); } TEST_F(PacketArrivalHistoryTest, MaxHistorySize) { @@ -117,7 +121,7 @@ TEST_F(PacketArrivalHistoryTest, TimestampWraparound) { // Insert another in-order packet after the wraparound. EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 0); - EXPECT_EQ(history_.GetMaxDelayMs(), 3 * kFrameSizeMs); + EXPECT_EQ(history_.GetMaxDelayMs(), kFrameSizeMs); } TEST_F(PacketArrivalHistoryTest, TimestampWraparoundBackwards) { @@ -134,7 +138,33 @@ TEST_F(PacketArrivalHistoryTest, TimestampWraparoundBackwards) { // Insert another in-order packet after the wraparound. EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 0); - EXPECT_EQ(history_.GetMaxDelayMs(), 3 * kFrameSizeMs); + EXPECT_EQ(history_.GetMaxDelayMs(), kFrameSizeMs); +} + +TEST_F(PacketArrivalHistoryTest, OldPacketShouldNotBeInserted) { + // Insert first packet as reference. + EXPECT_EQ(InsertPacketAndGetDelay(0), 0); + // Insert packet with timestamp older than the window size compared to the + // first packet. + EXPECT_FALSE(history_.Insert(timestamp_ - kWindowSizeMs * kFsKhz - 1, + kFrameSizeSamples)); +} + +TEST_F(PacketArrivalHistoryTest, DuplicatePacketShouldNotBeInserted) { + // Insert first packet as reference. + uint32_t first_timestamp = timestamp_; + EXPECT_EQ(InsertPacketAndGetDelay(0), 0); + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 0); + // Same timestamp as the first packet. + EXPECT_FALSE(history_.Insert(first_timestamp, kFrameSizeSamples)); +} + +TEST_F(PacketArrivalHistoryTest, OverlappingPacketShouldNotBeInserted) { + // Insert first packet as reference. + EXPECT_EQ(InsertPacketAndGetDelay(0), 0); + // 10 ms overlap with the previous packet. + EXPECT_FALSE(history_.Insert(timestamp_ + kFrameSizeSamples / 2, + kFrameSizeSamples / 2)); } } // namespace diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad.cc b/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad.cc deleted file mode 100644 index 9999d6764b..0000000000 --- a/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad.cc +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "modules/audio_coding/neteq/post_decode_vad.h" - -namespace webrtc { - -PostDecodeVad::~PostDecodeVad() { - if (vad_instance_) - WebRtcVad_Free(vad_instance_); -} - -void PostDecodeVad::Enable() { - if (!vad_instance_) { - // Create the instance. - vad_instance_ = WebRtcVad_Create(); - if (vad_instance_ == nullptr) { - // Failed to create instance. - Disable(); - return; - } - } - Init(); - enabled_ = true; -} - -void PostDecodeVad::Disable() { - enabled_ = false; - running_ = false; -} - -void PostDecodeVad::Init() { - running_ = false; - if (vad_instance_) { - WebRtcVad_Init(vad_instance_); - WebRtcVad_set_mode(vad_instance_, kVadMode); - running_ = true; - } -} - -void PostDecodeVad::Update(int16_t* signal, - size_t length, - AudioDecoder::SpeechType speech_type, - bool sid_frame, - int fs_hz) { - if (!vad_instance_ || !enabled_) { - return; - } - - if (speech_type == AudioDecoder::kComfortNoise || sid_frame || - fs_hz > 16000) { - // TODO(hlundin): Remove restriction on fs_hz. - running_ = false; - active_speech_ = true; - sid_interval_counter_ = 0; - } else if (!running_) { - ++sid_interval_counter_; - } - - if (sid_interval_counter_ >= kVadAutoEnable) { - Init(); - } - - if (length > 0 && running_) { - size_t vad_sample_index = 0; - active_speech_ = false; - // Loop through frame sizes 30, 20, and 10 ms. - for (int vad_frame_size_ms = 30; vad_frame_size_ms >= 10; - vad_frame_size_ms -= 10) { - size_t vad_frame_size_samples = - static_cast<size_t>(vad_frame_size_ms * fs_hz / 1000); - while (length - vad_sample_index >= vad_frame_size_samples) { - int vad_return = - WebRtcVad_Process(vad_instance_, fs_hz, &signal[vad_sample_index], - vad_frame_size_samples); - active_speech_ |= (vad_return == 1); - vad_sample_index += vad_frame_size_samples; - } - } - } -} - -} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad.h b/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad.h deleted file mode 100644 index 3bd91b9edb..0000000000 --- a/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_ -#define MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_ - -#include <stddef.h> -#include <stdint.h> - -#include "api/audio_codecs/audio_decoder.h" -#include "common_audio/vad/include/webrtc_vad.h" - -namespace webrtc { - -class PostDecodeVad { - public: - PostDecodeVad() - : enabled_(false), - running_(false), - active_speech_(true), - sid_interval_counter_(0), - vad_instance_(NULL) {} - - virtual ~PostDecodeVad(); - - PostDecodeVad(const PostDecodeVad&) = delete; - PostDecodeVad& operator=(const PostDecodeVad&) = delete; - - // Enables post-decode VAD. - void Enable(); - - // Disables post-decode VAD. - void Disable(); - - // Initializes post-decode VAD. - void Init(); - - // Updates post-decode VAD with the audio data in `signal` having `length` - // samples. The data is of type `speech_type`, at the sample rate `fs_hz`. - void Update(int16_t* signal, - size_t length, - AudioDecoder::SpeechType speech_type, - bool sid_frame, - int fs_hz); - - // Accessors. - bool enabled() const { return enabled_; } - bool running() const { return running_; } - bool active_speech() const { return active_speech_; } - - private: - static const int kVadMode = 0; // Sets aggressiveness to "Normal". - // Number of Update() calls without CNG/SID before re-enabling VAD. - static const int kVadAutoEnable = 3000; - - bool enabled_; - bool running_; - bool active_speech_; - int sid_interval_counter_; - ::VadInst* vad_instance_; -}; - -} // namespace webrtc -#endif // MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad_unittest.cc b/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad_unittest.cc deleted file mode 100644 index da3e4e864e..0000000000 --- a/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad_unittest.cc +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -// Unit tests for PostDecodeVad class. - -#include "modules/audio_coding/neteq/post_decode_vad.h" - -#include "test/gtest.h" - -namespace webrtc { - -TEST(PostDecodeVad, CreateAndDestroy) { - PostDecodeVad vad; -} - -// TODO(hlundin): Write more tests. - -} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_replacement_input.cc b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_replacement_input.cc index 081bd9631f..f1a46cd2df 100644 --- a/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_replacement_input.cc +++ b/third_party/libwebrtc/modules/audio_coding/neteq/tools/neteq_replacement_input.cc @@ -107,7 +107,7 @@ void NetEqReplacementInput::ReplacePacket() { next_hdr->timestamp - packet_->header.timestamp; const bool opus_dtx = packet_->payload.size() <= 2; if (next_hdr->sequenceNumber == packet_->header.sequenceNumber + 1 && - timestamp_diff <= 120 * 48 && !opus_dtx) { + timestamp_diff <= 120 * 48 && timestamp_diff > 0 && !opus_dtx) { // Packets are in order and the timestamp diff is less than 5760 samples. // Accept the timestamp diff as a valid frame size. input_frame_size_timestamps = timestamp_diff; diff --git a/third_party/libwebrtc/modules/audio_coding/neteq_gn/moz.build b/third_party/libwebrtc/modules/audio_coding/neteq_gn/moz.build index 834a8d1265..9b2996fa22 100644 --- a/third_party/libwebrtc/modules/audio_coding/neteq_gn/moz.build +++ b/third_party/libwebrtc/modules/audio_coding/neteq_gn/moz.build @@ -58,7 +58,6 @@ UNIFIED_SOURCES += [ "/third_party/libwebrtc/modules/audio_coding/neteq/packet.cc", "/third_party/libwebrtc/modules/audio_coding/neteq/packet_arrival_history.cc", "/third_party/libwebrtc/modules/audio_coding/neteq/packet_buffer.cc", - "/third_party/libwebrtc/modules/audio_coding/neteq/post_decode_vad.cc", "/third_party/libwebrtc/modules/audio_coding/neteq/preemptive_expand.cc", "/third_party/libwebrtc/modules/audio_coding/neteq/random_vector.cc", "/third_party/libwebrtc/modules/audio_coding/neteq/red_payload_splitter.cc", |