diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/libwebrtc/modules/audio_coding/codecs | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/libwebrtc/modules/audio_coding/codecs')
233 files changed, 28644 insertions, 0 deletions
diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/audio_decoder.h b/third_party/libwebrtc/modules/audio_coding/codecs/audio_decoder.h new file mode 100644 index 0000000000..b7b15cdd6e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/audio_decoder.h @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This file is for backwards compatibility only! Use +// webrtc/api/audio_codecs/audio_decoder.h instead! +// TODO(kwiberg): Remove it. + +#ifndef MODULES_AUDIO_CODING_CODECS_AUDIO_DECODER_H_ +#define MODULES_AUDIO_CODING_CODECS_AUDIO_DECODER_H_ + +#include "api/audio_codecs/audio_decoder.h" + +#endif // MODULES_AUDIO_CODING_CODECS_AUDIO_DECODER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/audio_encoder.h b/third_party/libwebrtc/modules/audio_coding/codecs/audio_encoder.h new file mode 100644 index 0000000000..010ae6705f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/audio_encoder.h @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This file is for backwards compatibility only! Use +// webrtc/api/audio_codecs/audio_encoder.h instead! +// TODO(ossu): Remove it. + +#ifndef MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_ +#define MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_ + +#include "api/audio_codecs/audio_encoder.h" + +#endif // MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/builtin_audio_decoder_factory_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/builtin_audio_decoder_factory_unittest.cc new file mode 100644 index 0000000000..4a2b261a59 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/builtin_audio_decoder_factory_unittest.cc @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "api/audio_codecs/builtin_audio_decoder_factory.h" + +#include <memory> + +#include "test/gtest.h" + +namespace webrtc { + +TEST(AudioDecoderFactoryTest, CreateUnknownDecoder) { + rtc::scoped_refptr<AudioDecoderFactory> adf = + CreateBuiltinAudioDecoderFactory(); + ASSERT_TRUE(adf); + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("rey", 8000, 1), absl::nullopt)); +} + +TEST(AudioDecoderFactoryTest, CreatePcmu) { + rtc::scoped_refptr<AudioDecoderFactory> adf = + CreateBuiltinAudioDecoderFactory(); + ASSERT_TRUE(adf); + // PCMu supports 8 kHz, and any number of channels. + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("pcmu", 8000, 0), absl::nullopt)); + EXPECT_TRUE( + adf->MakeAudioDecoder(SdpAudioFormat("pcmu", 8000, 1), absl::nullopt)); + EXPECT_TRUE( + adf->MakeAudioDecoder(SdpAudioFormat("pcmu", 8000, 2), absl::nullopt)); + EXPECT_TRUE( + adf->MakeAudioDecoder(SdpAudioFormat("pcmu", 8000, 3), absl::nullopt)); + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("pcmu", 16000, 1), absl::nullopt)); +} + +TEST(AudioDecoderFactoryTest, CreatePcma) { + rtc::scoped_refptr<AudioDecoderFactory> adf = + CreateBuiltinAudioDecoderFactory(); + ASSERT_TRUE(adf); + // PCMa supports 8 kHz, and any number of channels. + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("pcma", 8000, 0), absl::nullopt)); + EXPECT_TRUE( + adf->MakeAudioDecoder(SdpAudioFormat("pcma", 8000, 1), absl::nullopt)); + EXPECT_TRUE( + adf->MakeAudioDecoder(SdpAudioFormat("pcma", 8000, 2), absl::nullopt)); + EXPECT_TRUE( + adf->MakeAudioDecoder(SdpAudioFormat("pcma", 8000, 3), absl::nullopt)); + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("pcma", 16000, 1), absl::nullopt)); +} + +TEST(AudioDecoderFactoryTest, CreateIlbc) { + rtc::scoped_refptr<AudioDecoderFactory> adf = + CreateBuiltinAudioDecoderFactory(); + ASSERT_TRUE(adf); + // iLBC supports 8 kHz, 1 channel. + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("ilbc", 8000, 0), absl::nullopt)); +#ifdef WEBRTC_CODEC_ILBC + EXPECT_TRUE( + adf->MakeAudioDecoder(SdpAudioFormat("ilbc", 8000, 1), absl::nullopt)); +#endif + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("ilbc", 8000, 2), absl::nullopt)); + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("ilbc", 16000, 1), absl::nullopt)); +} + +TEST(AudioDecoderFactoryTest, CreateL16) { + rtc::scoped_refptr<AudioDecoderFactory> adf = + CreateBuiltinAudioDecoderFactory(); + ASSERT_TRUE(adf); + // L16 supports any clock rate and any number of channels up to 24. + const int clockrates[] = {8000, 16000, 32000, 48000}; + const int num_channels[] = {1, 2, 3, 24}; + for (int clockrate : clockrates) { + EXPECT_FALSE(adf->MakeAudioDecoder(SdpAudioFormat("l16", clockrate, 0), + absl::nullopt)); + for (int channels : num_channels) { + EXPECT_TRUE(adf->MakeAudioDecoder( + SdpAudioFormat("l16", clockrate, channels), absl::nullopt)); + } + } +} + +// Tests that using more channels than the maximum does not work +TEST(AudioDecoderFactoryTest, MaxNrOfChannels) { + rtc::scoped_refptr<AudioDecoderFactory> adf = + CreateBuiltinAudioDecoderFactory(); + std::vector<std::string> codecs = { +#ifdef WEBRTC_CODEC_OPUS + "opus", +#endif +#ifdef WEBRTC_CODEC_ILBC + "ilbc", +#endif + "pcmu", + "pcma", + "l16", + "G722", + "G711", + }; + + for (auto codec : codecs) { + EXPECT_FALSE(adf->MakeAudioDecoder( + SdpAudioFormat(codec, 32000, AudioDecoder::kMaxNumberOfChannels + 1), + absl::nullopt)); + } +} + +TEST(AudioDecoderFactoryTest, CreateG722) { + rtc::scoped_refptr<AudioDecoderFactory> adf = + CreateBuiltinAudioDecoderFactory(); + ASSERT_TRUE(adf); + // g722 supports 8 kHz, 1-2 channels. + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("g722", 8000, 0), absl::nullopt)); + EXPECT_TRUE( + adf->MakeAudioDecoder(SdpAudioFormat("g722", 8000, 1), absl::nullopt)); + EXPECT_TRUE( + adf->MakeAudioDecoder(SdpAudioFormat("g722", 8000, 2), absl::nullopt)); + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("g722", 8000, 3), absl::nullopt)); + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("g722", 16000, 1), absl::nullopt)); + EXPECT_FALSE( + adf->MakeAudioDecoder(SdpAudioFormat("g722", 32000, 1), absl::nullopt)); + + // g722 actually uses a 16 kHz sample rate instead of the nominal 8 kHz. + std::unique_ptr<AudioDecoder> dec = + adf->MakeAudioDecoder(SdpAudioFormat("g722", 8000, 1), absl::nullopt); + EXPECT_EQ(16000, dec->SampleRateHz()); +} + +TEST(AudioDecoderFactoryTest, CreateOpus) { + rtc::scoped_refptr<AudioDecoderFactory> adf = + CreateBuiltinAudioDecoderFactory(); + ASSERT_TRUE(adf); + // Opus supports 48 kHz, 2 channels, and wants a "stereo" parameter whose + // value is either "0" or "1". + for (int hz : {8000, 16000, 32000, 48000}) { + for (int channels : {0, 1, 2, 3}) { + for (std::string stereo : {"XX", "0", "1", "2"}) { + SdpAudioFormat::Parameters params; + if (stereo != "XX") { + params["stereo"] = stereo; + } + const bool good = (hz == 48000 && channels == 2 && + (stereo == "XX" || stereo == "0" || stereo == "1")); + EXPECT_EQ(good, + static_cast<bool>(adf->MakeAudioDecoder( + SdpAudioFormat("opus", hz, channels, std::move(params)), + absl::nullopt))); + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/builtin_audio_encoder_factory_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/builtin_audio_encoder_factory_unittest.cc new file mode 100644 index 0000000000..26ae1eda8a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/builtin_audio_encoder_factory_unittest.cc @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "api/audio_codecs/builtin_audio_encoder_factory.h" + +#include <limits> +#include <memory> +#include <vector> + +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { + +class AudioEncoderFactoryTest + : public ::testing::TestWithParam<rtc::scoped_refptr<AudioEncoderFactory>> { +}; + +TEST_P(AudioEncoderFactoryTest, SupportsAtLeastOneFormat) { + auto factory = GetParam(); + auto supported_encoders = factory->GetSupportedEncoders(); + EXPECT_FALSE(supported_encoders.empty()); +} + +TEST_P(AudioEncoderFactoryTest, CanQueryAllSupportedFormats) { + auto factory = GetParam(); + auto supported_encoders = factory->GetSupportedEncoders(); + for (const auto& spec : supported_encoders) { + auto info = factory->QueryAudioEncoder(spec.format); + EXPECT_TRUE(info); + } +} + +TEST_P(AudioEncoderFactoryTest, CanConstructAllSupportedEncoders) { + auto factory = GetParam(); + auto supported_encoders = factory->GetSupportedEncoders(); + for (const auto& spec : supported_encoders) { + auto info = factory->QueryAudioEncoder(spec.format); + auto encoder = factory->MakeAudioEncoder(127, spec.format, absl::nullopt); + EXPECT_TRUE(encoder); + EXPECT_EQ(encoder->SampleRateHz(), info->sample_rate_hz); + EXPECT_EQ(encoder->NumChannels(), info->num_channels); + EXPECT_EQ(encoder->RtpTimestampRateHz(), spec.format.clockrate_hz); + } +} + +TEST_P(AudioEncoderFactoryTest, CanRunAllSupportedEncoders) { + constexpr int kTestPayloadType = 127; + auto factory = GetParam(); + auto supported_encoders = factory->GetSupportedEncoders(); + for (const auto& spec : supported_encoders) { + auto encoder = + factory->MakeAudioEncoder(kTestPayloadType, spec.format, absl::nullopt); + EXPECT_TRUE(encoder); + encoder->Reset(); + const int num_samples = rtc::checked_cast<int>( + encoder->SampleRateHz() * encoder->NumChannels() / 100); + rtc::Buffer out; + rtc::BufferT<int16_t> audio; + audio.SetData(num_samples, [](rtc::ArrayView<int16_t> audio) { + for (size_t i = 0; i != audio.size(); ++i) { + // Just put some numbers in there, ensure they're within range. + audio[i] = + static_cast<int16_t>(i & std::numeric_limits<int16_t>::max()); + } + return audio.size(); + }); + // This is here to stop the test going forever with a broken encoder. + constexpr int kMaxEncodeCalls = 100; + int blocks = 0; + for (; blocks < kMaxEncodeCalls; ++blocks) { + AudioEncoder::EncodedInfo info = encoder->Encode( + blocks * encoder->RtpTimestampRateHz() / 100, audio, &out); + EXPECT_EQ(info.encoded_bytes, out.size()); + if (info.encoded_bytes > 0) { + EXPECT_EQ(0u, info.encoded_timestamp); + EXPECT_EQ(kTestPayloadType, info.payload_type); + break; + } + } + ASSERT_LT(blocks, kMaxEncodeCalls); + const unsigned int next_timestamp = + blocks * encoder->RtpTimestampRateHz() / 100; + out.Clear(); + for (; blocks < kMaxEncodeCalls; ++blocks) { + AudioEncoder::EncodedInfo info = encoder->Encode( + blocks * encoder->RtpTimestampRateHz() / 100, audio, &out); + EXPECT_EQ(info.encoded_bytes, out.size()); + if (info.encoded_bytes > 0) { + EXPECT_EQ(next_timestamp, info.encoded_timestamp); + EXPECT_EQ(kTestPayloadType, info.payload_type); + break; + } + } + ASSERT_LT(blocks, kMaxEncodeCalls); + } +} + +INSTANTIATE_TEST_SUITE_P(BuiltinAudioEncoderFactoryTest, + AudioEncoderFactoryTest, + ::testing::Values(CreateBuiltinAudioEncoderFactory())); + +TEST(BuiltinAudioEncoderFactoryTest, SupportsTheExpectedFormats) { + using ::testing::ElementsAreArray; + // Check that we claim to support the formats we expect from build flags, and + // we've ordered them correctly. + auto factory = CreateBuiltinAudioEncoderFactory(); + auto specs = factory->GetSupportedEncoders(); + + const std::vector<SdpAudioFormat> supported_formats = [&specs] { + std::vector<SdpAudioFormat> formats; + formats.reserve(specs.size()); + for (const auto& spec : specs) { + formats.push_back(spec.format); + } + return formats; + }(); + + const std::vector<SdpAudioFormat> expected_formats = { +#ifdef WEBRTC_CODEC_OPUS + {"opus", 48000, 2, {{"minptime", "10"}, {"useinbandfec", "1"}}}, +#endif +#if defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX) + {"isac", 16000, 1}, +#endif +#ifdef WEBRTC_CODEC_ISAC + {"isac", 32000, 1}, +#endif + {"G722", 8000, 1}, +#ifdef WEBRTC_CODEC_ILBC + {"ilbc", 8000, 1}, +#endif + {"pcmu", 8000, 1}, + {"pcma", 8000, 1} + }; + + ASSERT_THAT(supported_formats, ElementsAreArray(expected_formats)); +} + +// Tests that using more channels than the maximum does not work. +TEST(BuiltinAudioEncoderFactoryTest, MaxNrOfChannels) { + rtc::scoped_refptr<AudioEncoderFactory> aef = + CreateBuiltinAudioEncoderFactory(); + std::vector<std::string> codecs = { +#ifdef WEBRTC_CODEC_OPUS + "opus", +#endif +#if defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX) + "isac", +#endif +#ifdef WEBRTC_CODEC_ILBC + "ilbc", +#endif + "pcmu", + "pcma", + "l16", + "G722", + "G711", + }; + + for (auto codec : codecs) { + EXPECT_FALSE(aef->MakeAudioEncoder( + /*payload_type=*/111, + /*format=*/ + SdpAudioFormat(codec, 32000, AudioEncoder::kMaxNumberOfChannels + 1), + /*codec_pair_id=*/absl::nullopt)); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc b/third_party/libwebrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc new file mode 100644 index 0000000000..7546ac178f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc @@ -0,0 +1,322 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/cng/audio_encoder_cng.h" + +#include <cstdint> +#include <memory> +#include <utility> + +#include "absl/types/optional.h" +#include "api/units/time_delta.h" +#include "modules/audio_coding/codecs/cng/webrtc_cng.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +const int kMaxFrameSizeMs = 60; + +class AudioEncoderCng final : public AudioEncoder { + public: + explicit AudioEncoderCng(AudioEncoderCngConfig&& config); + ~AudioEncoderCng() override; + + // Not copyable or moveable. + AudioEncoderCng(const AudioEncoderCng&) = delete; + AudioEncoderCng(AudioEncoderCng&&) = delete; + AudioEncoderCng& operator=(const AudioEncoderCng&) = delete; + AudioEncoderCng& operator=(AudioEncoderCng&&) = delete; + + int SampleRateHz() const override; + size_t NumChannels() const override; + int RtpTimestampRateHz() const override; + size_t Num10MsFramesInNextPacket() const override; + size_t Max10MsFramesInAPacket() const override; + int GetTargetBitrate() const override; + EncodedInfo EncodeImpl(uint32_t rtp_timestamp, + rtc::ArrayView<const int16_t> audio, + rtc::Buffer* encoded) override; + void Reset() override; + bool SetFec(bool enable) override; + bool SetDtx(bool enable) override; + bool SetApplication(Application application) override; + void SetMaxPlaybackRate(int frequency_hz) override; + rtc::ArrayView<std::unique_ptr<AudioEncoder>> ReclaimContainedEncoders() + override; + void OnReceivedUplinkPacketLossFraction( + float uplink_packet_loss_fraction) override; + void OnReceivedUplinkBandwidth( + int target_audio_bitrate_bps, + absl::optional<int64_t> bwe_period_ms) override; + absl::optional<std::pair<TimeDelta, TimeDelta>> GetFrameLengthRange() + const override; + + private: + EncodedInfo EncodePassive(size_t frames_to_encode, rtc::Buffer* encoded); + EncodedInfo EncodeActive(size_t frames_to_encode, rtc::Buffer* encoded); + size_t SamplesPer10msFrame() const; + + std::unique_ptr<AudioEncoder> speech_encoder_; + const int cng_payload_type_; + const int num_cng_coefficients_; + const int sid_frame_interval_ms_; + std::vector<int16_t> speech_buffer_; + std::vector<uint32_t> rtp_timestamps_; + bool last_frame_active_; + std::unique_ptr<Vad> vad_; + std::unique_ptr<ComfortNoiseEncoder> cng_encoder_; +}; + +AudioEncoderCng::AudioEncoderCng(AudioEncoderCngConfig&& config) + : speech_encoder_((static_cast<void>([&] { + RTC_CHECK(config.IsOk()) << "Invalid configuration."; + }()), + std::move(config.speech_encoder))), + cng_payload_type_(config.payload_type), + num_cng_coefficients_(config.num_cng_coefficients), + sid_frame_interval_ms_(config.sid_frame_interval_ms), + last_frame_active_(true), + vad_(config.vad ? std::unique_ptr<Vad>(config.vad) + : CreateVad(config.vad_mode)), + cng_encoder_(new ComfortNoiseEncoder(SampleRateHz(), + sid_frame_interval_ms_, + num_cng_coefficients_)) {} + +AudioEncoderCng::~AudioEncoderCng() = default; + +int AudioEncoderCng::SampleRateHz() const { + return speech_encoder_->SampleRateHz(); +} + +size_t AudioEncoderCng::NumChannels() const { + return 1; +} + +int AudioEncoderCng::RtpTimestampRateHz() const { + return speech_encoder_->RtpTimestampRateHz(); +} + +size_t AudioEncoderCng::Num10MsFramesInNextPacket() const { + return speech_encoder_->Num10MsFramesInNextPacket(); +} + +size_t AudioEncoderCng::Max10MsFramesInAPacket() const { + return speech_encoder_->Max10MsFramesInAPacket(); +} + +int AudioEncoderCng::GetTargetBitrate() const { + return speech_encoder_->GetTargetBitrate(); +} + +AudioEncoder::EncodedInfo AudioEncoderCng::EncodeImpl( + uint32_t rtp_timestamp, + rtc::ArrayView<const int16_t> audio, + rtc::Buffer* encoded) { + const size_t samples_per_10ms_frame = SamplesPer10msFrame(); + RTC_CHECK_EQ(speech_buffer_.size(), + rtp_timestamps_.size() * samples_per_10ms_frame); + rtp_timestamps_.push_back(rtp_timestamp); + RTC_DCHECK_EQ(samples_per_10ms_frame, audio.size()); + speech_buffer_.insert(speech_buffer_.end(), audio.cbegin(), audio.cend()); + const size_t frames_to_encode = speech_encoder_->Num10MsFramesInNextPacket(); + if (rtp_timestamps_.size() < frames_to_encode) { + return EncodedInfo(); + } + RTC_CHECK_LE(frames_to_encode * 10, kMaxFrameSizeMs) + << "Frame size cannot be larger than " << kMaxFrameSizeMs + << " ms when using VAD/CNG."; + + // Group several 10 ms blocks per VAD call. Call VAD once or twice using the + // following split sizes: + // 10 ms = 10 + 0 ms; 20 ms = 20 + 0 ms; 30 ms = 30 + 0 ms; + // 40 ms = 20 + 20 ms; 50 ms = 30 + 20 ms; 60 ms = 30 + 30 ms. + size_t blocks_in_first_vad_call = + (frames_to_encode > 3 ? 3 : frames_to_encode); + if (frames_to_encode == 4) + blocks_in_first_vad_call = 2; + RTC_CHECK_GE(frames_to_encode, blocks_in_first_vad_call); + const size_t blocks_in_second_vad_call = + frames_to_encode - blocks_in_first_vad_call; + + // Check if all of the buffer is passive speech. Start with checking the first + // block. + Vad::Activity activity = vad_->VoiceActivity( + &speech_buffer_[0], samples_per_10ms_frame * blocks_in_first_vad_call, + SampleRateHz()); + if (activity == Vad::kPassive && blocks_in_second_vad_call > 0) { + // Only check the second block if the first was passive. + activity = vad_->VoiceActivity( + &speech_buffer_[samples_per_10ms_frame * blocks_in_first_vad_call], + samples_per_10ms_frame * blocks_in_second_vad_call, SampleRateHz()); + } + + EncodedInfo info; + switch (activity) { + case Vad::kPassive: { + info = EncodePassive(frames_to_encode, encoded); + last_frame_active_ = false; + break; + } + case Vad::kActive: { + info = EncodeActive(frames_to_encode, encoded); + last_frame_active_ = true; + break; + } + default: { + RTC_CHECK_NOTREACHED(); + } + } + + speech_buffer_.erase( + speech_buffer_.begin(), + speech_buffer_.begin() + frames_to_encode * samples_per_10ms_frame); + rtp_timestamps_.erase(rtp_timestamps_.begin(), + rtp_timestamps_.begin() + frames_to_encode); + return info; +} + +void AudioEncoderCng::Reset() { + speech_encoder_->Reset(); + speech_buffer_.clear(); + rtp_timestamps_.clear(); + last_frame_active_ = true; + vad_->Reset(); + cng_encoder_.reset(new ComfortNoiseEncoder( + SampleRateHz(), sid_frame_interval_ms_, num_cng_coefficients_)); +} + +bool AudioEncoderCng::SetFec(bool enable) { + return speech_encoder_->SetFec(enable); +} + +bool AudioEncoderCng::SetDtx(bool enable) { + return speech_encoder_->SetDtx(enable); +} + +bool AudioEncoderCng::SetApplication(Application application) { + return speech_encoder_->SetApplication(application); +} + +void AudioEncoderCng::SetMaxPlaybackRate(int frequency_hz) { + speech_encoder_->SetMaxPlaybackRate(frequency_hz); +} + +rtc::ArrayView<std::unique_ptr<AudioEncoder>> +AudioEncoderCng::ReclaimContainedEncoders() { + return rtc::ArrayView<std::unique_ptr<AudioEncoder>>(&speech_encoder_, 1); +} + +void AudioEncoderCng::OnReceivedUplinkPacketLossFraction( + float uplink_packet_loss_fraction) { + speech_encoder_->OnReceivedUplinkPacketLossFraction( + uplink_packet_loss_fraction); +} + +void AudioEncoderCng::OnReceivedUplinkBandwidth( + int target_audio_bitrate_bps, + absl::optional<int64_t> bwe_period_ms) { + speech_encoder_->OnReceivedUplinkBandwidth(target_audio_bitrate_bps, + bwe_period_ms); +} + +absl::optional<std::pair<TimeDelta, TimeDelta>> +AudioEncoderCng::GetFrameLengthRange() const { + return speech_encoder_->GetFrameLengthRange(); +} + +AudioEncoder::EncodedInfo AudioEncoderCng::EncodePassive( + size_t frames_to_encode, + rtc::Buffer* encoded) { + bool force_sid = last_frame_active_; + bool output_produced = false; + const size_t samples_per_10ms_frame = SamplesPer10msFrame(); + AudioEncoder::EncodedInfo info; + + for (size_t i = 0; i < frames_to_encode; ++i) { + // It's important not to pass &info.encoded_bytes directly to + // WebRtcCng_Encode(), since later loop iterations may return zero in + // that value, in which case we don't want to overwrite any value from + // an earlier iteration. + size_t encoded_bytes_tmp = + cng_encoder_->Encode(rtc::ArrayView<const int16_t>( + &speech_buffer_[i * samples_per_10ms_frame], + samples_per_10ms_frame), + force_sid, encoded); + + if (encoded_bytes_tmp > 0) { + RTC_CHECK(!output_produced); + info.encoded_bytes = encoded_bytes_tmp; + output_produced = true; + force_sid = false; + } + } + + info.encoded_timestamp = rtp_timestamps_.front(); + info.payload_type = cng_payload_type_; + info.send_even_if_empty = true; + info.speech = false; + return info; +} + +AudioEncoder::EncodedInfo AudioEncoderCng::EncodeActive(size_t frames_to_encode, + rtc::Buffer* encoded) { + const size_t samples_per_10ms_frame = SamplesPer10msFrame(); + AudioEncoder::EncodedInfo info; + for (size_t i = 0; i < frames_to_encode; ++i) { + info = + speech_encoder_->Encode(rtp_timestamps_.front(), + rtc::ArrayView<const int16_t>( + &speech_buffer_[i * samples_per_10ms_frame], + samples_per_10ms_frame), + encoded); + if (i + 1 == frames_to_encode) { + RTC_CHECK_GT(info.encoded_bytes, 0) << "Encoder didn't deliver data."; + } else { + RTC_CHECK_EQ(info.encoded_bytes, 0) + << "Encoder delivered data too early."; + } + } + return info; +} + +size_t AudioEncoderCng::SamplesPer10msFrame() const { + return rtc::CheckedDivExact(10 * SampleRateHz(), 1000); +} + +} // namespace + +AudioEncoderCngConfig::AudioEncoderCngConfig() = default; +AudioEncoderCngConfig::AudioEncoderCngConfig(AudioEncoderCngConfig&&) = default; +AudioEncoderCngConfig::~AudioEncoderCngConfig() = default; + +bool AudioEncoderCngConfig::IsOk() const { + if (num_channels != 1) + return false; + if (!speech_encoder) + return false; + if (num_channels != speech_encoder->NumChannels()) + return false; + if (sid_frame_interval_ms < + static_cast<int>(speech_encoder->Max10MsFramesInAPacket() * 10)) + return false; + if (num_cng_coefficients > WEBRTC_CNG_MAX_LPC_ORDER || + num_cng_coefficients <= 0) + return false; + return true; +} + +std::unique_ptr<AudioEncoder> CreateComfortNoiseEncoder( + AudioEncoderCngConfig&& config) { + return std::make_unique<AudioEncoderCng>(std::move(config)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.h b/third_party/libwebrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.h new file mode 100644 index 0000000000..8a1183489f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_CNG_AUDIO_ENCODER_CNG_H_ +#define MODULES_AUDIO_CODING_CODECS_CNG_AUDIO_ENCODER_CNG_H_ + +#include <stddef.h> + +#include <memory> + +#include "api/audio_codecs/audio_encoder.h" +#include "common_audio/vad/include/vad.h" + +namespace webrtc { + +struct AudioEncoderCngConfig { + // Moveable, not copyable. + AudioEncoderCngConfig(); + AudioEncoderCngConfig(AudioEncoderCngConfig&&); + ~AudioEncoderCngConfig(); + + bool IsOk() const; + + size_t num_channels = 1; + int payload_type = 13; + std::unique_ptr<AudioEncoder> speech_encoder; + Vad::Aggressiveness vad_mode = Vad::kVadNormal; + int sid_frame_interval_ms = 100; + int num_cng_coefficients = 8; + // The Vad pointer is mainly for testing. If a NULL pointer is passed, the + // AudioEncoderCng creates (and destroys) a Vad object internally. If an + // object is passed, the AudioEncoderCng assumes ownership of the Vad + // object. + Vad* vad = nullptr; +}; + +std::unique_ptr<AudioEncoder> CreateComfortNoiseEncoder( + AudioEncoderCngConfig&& config); + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_CNG_AUDIO_ENCODER_CNG_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/cng/audio_encoder_cng_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/cng/audio_encoder_cng_unittest.cc new file mode 100644 index 0000000000..c688004363 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/cng/audio_encoder_cng_unittest.cc @@ -0,0 +1,520 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/cng/audio_encoder_cng.h" + +#include <memory> +#include <vector> + +#include "common_audio/vad/mock/mock_vad.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gtest.h" +#include "test/mock_audio_encoder.h" +#include "test/testsupport/rtc_expect_death.h" + +using ::testing::_; +using ::testing::Eq; +using ::testing::InSequence; +using ::testing::Invoke; +using ::testing::Not; +using ::testing::Optional; +using ::testing::Return; +using ::testing::SetArgPointee; + +namespace webrtc { + +namespace { +static const size_t kMaxNumSamples = 48 * 10 * 2; // 10 ms @ 48 kHz stereo. +static const size_t kMockReturnEncodedBytes = 17; +static const int kCngPayloadType = 18; +} // namespace + +class AudioEncoderCngTest : public ::testing::Test { + protected: + AudioEncoderCngTest() + : mock_encoder_owner_(new MockAudioEncoder), + mock_encoder_(mock_encoder_owner_.get()), + mock_vad_(new MockVad), + timestamp_(4711), + num_audio_samples_10ms_(0), + sample_rate_hz_(8000) { + memset(audio_, 0, kMaxNumSamples * 2); + EXPECT_CALL(*mock_encoder_, NumChannels()).WillRepeatedly(Return(1)); + } + + AudioEncoderCngTest(const AudioEncoderCngTest&) = delete; + AudioEncoderCngTest& operator=(const AudioEncoderCngTest&) = delete; + + void TearDown() override { + EXPECT_CALL(*mock_vad_, Die()).Times(1); + cng_.reset(); + } + + AudioEncoderCngConfig MakeCngConfig() { + AudioEncoderCngConfig config; + config.speech_encoder = std::move(mock_encoder_owner_); + EXPECT_TRUE(config.speech_encoder); + + // Let the AudioEncoderCng object use a MockVad instead of its internally + // created Vad object. + config.vad = mock_vad_; + config.payload_type = kCngPayloadType; + + return config; + } + + void CreateCng(AudioEncoderCngConfig&& config) { + num_audio_samples_10ms_ = static_cast<size_t>(10 * sample_rate_hz_ / 1000); + ASSERT_LE(num_audio_samples_10ms_, kMaxNumSamples); + if (config.speech_encoder) { + EXPECT_CALL(*mock_encoder_, SampleRateHz()) + .WillRepeatedly(Return(sample_rate_hz_)); + // Max10MsFramesInAPacket() is just used to verify that the SID frame + // period is not too small. The return value does not matter that much, + // as long as it is smaller than 10. + EXPECT_CALL(*mock_encoder_, Max10MsFramesInAPacket()) + .WillOnce(Return(1u)); + } + cng_ = CreateComfortNoiseEncoder(std::move(config)); + } + + void Encode() { + ASSERT_TRUE(cng_) << "Must call CreateCng() first."; + encoded_info_ = cng_->Encode( + timestamp_, + rtc::ArrayView<const int16_t>(audio_, num_audio_samples_10ms_), + &encoded_); + timestamp_ += static_cast<uint32_t>(num_audio_samples_10ms_); + } + + // Expect `num_calls` calls to the encoder, all successful. The last call + // claims to have encoded `kMockReturnEncodedBytes` bytes, and all the + // preceding ones 0 bytes. + void ExpectEncodeCalls(size_t num_calls) { + InSequence s; + AudioEncoder::EncodedInfo info; + for (size_t j = 0; j < num_calls - 1; ++j) { + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)).WillOnce(Return(info)); + } + info.encoded_bytes = kMockReturnEncodedBytes; + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce( + Invoke(MockAudioEncoder::FakeEncoding(kMockReturnEncodedBytes))); + } + + // Verifies that the cng_ object waits until it has collected + // `blocks_per_frame` blocks of audio, and then dispatches all of them to + // the underlying codec (speech or cng). + void CheckBlockGrouping(size_t blocks_per_frame, bool active_speech) { + EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(blocks_per_frame)); + auto config = MakeCngConfig(); + const int num_cng_coefficients = config.num_cng_coefficients; + CreateCng(std::move(config)); + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillRepeatedly(Return(active_speech ? Vad::kActive : Vad::kPassive)); + + // Don't expect any calls to the encoder yet. + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)).Times(0); + for (size_t i = 0; i < blocks_per_frame - 1; ++i) { + Encode(); + EXPECT_EQ(0u, encoded_info_.encoded_bytes); + } + if (active_speech) + ExpectEncodeCalls(blocks_per_frame); + Encode(); + if (active_speech) { + EXPECT_EQ(kMockReturnEncodedBytes, encoded_info_.encoded_bytes); + } else { + EXPECT_EQ(static_cast<size_t>(num_cng_coefficients + 1), + encoded_info_.encoded_bytes); + } + } + + // Verifies that the audio is partitioned into larger blocks before calling + // the VAD. + void CheckVadInputSize(int input_frame_size_ms, + int expected_first_block_size_ms, + int expected_second_block_size_ms) { + const size_t blocks_per_frame = + static_cast<size_t>(input_frame_size_ms / 10); + + EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(blocks_per_frame)); + + // Expect nothing to happen before the last block is sent to cng_. + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)).Times(0); + for (size_t i = 0; i < blocks_per_frame - 1; ++i) { + Encode(); + } + + // Let the VAD decision be passive, since an active decision may lead to + // early termination of the decision loop. + InSequence s; + EXPECT_CALL( + *mock_vad_, + VoiceActivity(_, expected_first_block_size_ms * sample_rate_hz_ / 1000, + sample_rate_hz_)) + .WillOnce(Return(Vad::kPassive)); + if (expected_second_block_size_ms > 0) { + EXPECT_CALL(*mock_vad_, + VoiceActivity( + _, expected_second_block_size_ms * sample_rate_hz_ / 1000, + sample_rate_hz_)) + .WillOnce(Return(Vad::kPassive)); + } + + // With this call to Encode(), `mock_vad_` should be called according to the + // above expectations. + Encode(); + } + + // Tests a frame with both active and passive speech. Returns true if the + // decision was active speech, false if it was passive. + bool CheckMixedActivePassive(Vad::Activity first_type, + Vad::Activity second_type) { + // Set the speech encoder frame size to 60 ms, to ensure that the VAD will + // be called twice. + const size_t blocks_per_frame = 6; + EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(blocks_per_frame)); + InSequence s; + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(first_type)); + if (first_type == Vad::kPassive) { + // Expect a second call to the VAD only if the first frame was passive. + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(second_type)); + } + encoded_info_.payload_type = 0; + for (size_t i = 0; i < blocks_per_frame; ++i) { + Encode(); + } + return encoded_info_.payload_type != kCngPayloadType; + } + + std::unique_ptr<AudioEncoder> cng_; + std::unique_ptr<MockAudioEncoder> mock_encoder_owner_; + MockAudioEncoder* mock_encoder_; + MockVad* mock_vad_; // Ownership is transferred to `cng_`. + uint32_t timestamp_; + int16_t audio_[kMaxNumSamples]; + size_t num_audio_samples_10ms_; + rtc::Buffer encoded_; + AudioEncoder::EncodedInfo encoded_info_; + int sample_rate_hz_; +}; + +TEST_F(AudioEncoderCngTest, CreateAndDestroy) { + CreateCng(MakeCngConfig()); +} + +TEST_F(AudioEncoderCngTest, CheckFrameSizePropagation) { + CreateCng(MakeCngConfig()); + EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket()) + .WillOnce(Return(17U)); + EXPECT_EQ(17U, cng_->Num10MsFramesInNextPacket()); +} + +TEST_F(AudioEncoderCngTest, CheckTargetAudioBitratePropagation) { + CreateCng(MakeCngConfig()); + EXPECT_CALL(*mock_encoder_, + OnReceivedUplinkBandwidth(4711, absl::optional<int64_t>())); + cng_->OnReceivedUplinkBandwidth(4711, absl::nullopt); +} + +TEST_F(AudioEncoderCngTest, CheckPacketLossFractionPropagation) { + CreateCng(MakeCngConfig()); + EXPECT_CALL(*mock_encoder_, OnReceivedUplinkPacketLossFraction(0.5)); + cng_->OnReceivedUplinkPacketLossFraction(0.5); +} + +TEST_F(AudioEncoderCngTest, CheckGetFrameLengthRangePropagation) { + CreateCng(MakeCngConfig()); + auto expected_range = + std::make_pair(TimeDelta::Millis(20), TimeDelta::Millis(20)); + EXPECT_CALL(*mock_encoder_, GetFrameLengthRange()) + .WillRepeatedly(Return(absl::make_optional(expected_range))); + EXPECT_THAT(cng_->GetFrameLengthRange(), Optional(Eq(expected_range))); +} + +TEST_F(AudioEncoderCngTest, EncodeCallsVad) { + EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(1U)); + CreateCng(MakeCngConfig()); + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(Vad::kPassive)); + Encode(); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects1BlockPassiveSpeech) { + CheckBlockGrouping(1, false); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects2BlocksPassiveSpeech) { + CheckBlockGrouping(2, false); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects3BlocksPassiveSpeech) { + CheckBlockGrouping(3, false); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects1BlockActiveSpeech) { + CheckBlockGrouping(1, true); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects2BlocksActiveSpeech) { + CheckBlockGrouping(2, true); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects3BlocksActiveSpeech) { + CheckBlockGrouping(3, true); +} + +TEST_F(AudioEncoderCngTest, EncodePassive) { + const size_t kBlocksPerFrame = 3; + EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(kBlocksPerFrame)); + auto config = MakeCngConfig(); + const auto sid_frame_interval_ms = config.sid_frame_interval_ms; + const auto num_cng_coefficients = config.num_cng_coefficients; + CreateCng(std::move(config)); + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillRepeatedly(Return(Vad::kPassive)); + // Expect no calls at all to the speech encoder mock. + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)).Times(0); + uint32_t expected_timestamp = timestamp_; + for (size_t i = 0; i < 100; ++i) { + Encode(); + // Check if it was time to call the cng encoder. This is done once every + // `kBlocksPerFrame` calls. + if ((i + 1) % kBlocksPerFrame == 0) { + // Now check if a SID interval has elapsed. + if ((i % (sid_frame_interval_ms / 10)) < kBlocksPerFrame) { + // If so, verify that we got a CNG encoding. + EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type); + EXPECT_FALSE(encoded_info_.speech); + EXPECT_EQ(static_cast<size_t>(num_cng_coefficients) + 1, + encoded_info_.encoded_bytes); + EXPECT_EQ(expected_timestamp, encoded_info_.encoded_timestamp); + } + expected_timestamp += rtc::checked_cast<uint32_t>( + kBlocksPerFrame * num_audio_samples_10ms_); + } else { + // Otherwise, expect no output. + EXPECT_EQ(0u, encoded_info_.encoded_bytes); + } + } +} + +// Verifies that the correct action is taken for frames with both active and +// passive speech. +TEST_F(AudioEncoderCngTest, MixedActivePassive) { + CreateCng(MakeCngConfig()); + + // All of the frame is active speech. + ExpectEncodeCalls(6); + EXPECT_TRUE(CheckMixedActivePassive(Vad::kActive, Vad::kActive)); + EXPECT_TRUE(encoded_info_.speech); + + // First half of the frame is active speech. + ExpectEncodeCalls(6); + EXPECT_TRUE(CheckMixedActivePassive(Vad::kActive, Vad::kPassive)); + EXPECT_TRUE(encoded_info_.speech); + + // Second half of the frame is active speech. + ExpectEncodeCalls(6); + EXPECT_TRUE(CheckMixedActivePassive(Vad::kPassive, Vad::kActive)); + EXPECT_TRUE(encoded_info_.speech); + + // All of the frame is passive speech. Expect no calls to `mock_encoder_`. + EXPECT_FALSE(CheckMixedActivePassive(Vad::kPassive, Vad::kPassive)); + EXPECT_FALSE(encoded_info_.speech); +} + +// These tests verify that the audio is partitioned into larger blocks before +// calling the VAD. +// The parameters for CheckVadInputSize are: +// CheckVadInputSize(frame_size, expected_first_block_size, +// expected_second_block_size); +TEST_F(AudioEncoderCngTest, VadInputSize10Ms) { + CreateCng(MakeCngConfig()); + CheckVadInputSize(10, 10, 0); +} +TEST_F(AudioEncoderCngTest, VadInputSize20Ms) { + CreateCng(MakeCngConfig()); + CheckVadInputSize(20, 20, 0); +} +TEST_F(AudioEncoderCngTest, VadInputSize30Ms) { + CreateCng(MakeCngConfig()); + CheckVadInputSize(30, 30, 0); +} +TEST_F(AudioEncoderCngTest, VadInputSize40Ms) { + CreateCng(MakeCngConfig()); + CheckVadInputSize(40, 20, 20); +} +TEST_F(AudioEncoderCngTest, VadInputSize50Ms) { + CreateCng(MakeCngConfig()); + CheckVadInputSize(50, 30, 20); +} +TEST_F(AudioEncoderCngTest, VadInputSize60Ms) { + CreateCng(MakeCngConfig()); + CheckVadInputSize(60, 30, 30); +} + +// Verifies that the correct payload type is set when CNG is encoded. +TEST_F(AudioEncoderCngTest, VerifyCngPayloadType) { + CreateCng(MakeCngConfig()); + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)).Times(0); + EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket()).WillOnce(Return(1U)); + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(Vad::kPassive)); + encoded_info_.payload_type = 0; + Encode(); + EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type); +} + +// Verifies that a SID frame is encoded immediately as the signal changes from +// active speech to passive. +TEST_F(AudioEncoderCngTest, VerifySidFrameAfterSpeech) { + auto config = MakeCngConfig(); + const auto num_cng_coefficients = config.num_cng_coefficients; + CreateCng(std::move(config)); + EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(1U)); + // Start with encoding noise. + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .Times(2) + .WillRepeatedly(Return(Vad::kPassive)); + Encode(); + EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type); + EXPECT_EQ(static_cast<size_t>(num_cng_coefficients) + 1, + encoded_info_.encoded_bytes); + // Encode again, and make sure we got no frame at all (since the SID frame + // period is 100 ms by default). + Encode(); + EXPECT_EQ(0u, encoded_info_.encoded_bytes); + + // Now encode active speech. + encoded_info_.payload_type = 0; + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(Vad::kActive)); + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce( + Invoke(MockAudioEncoder::FakeEncoding(kMockReturnEncodedBytes))); + Encode(); + EXPECT_EQ(kMockReturnEncodedBytes, encoded_info_.encoded_bytes); + + // Go back to noise again, and verify that a SID frame is emitted. + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(Vad::kPassive)); + Encode(); + EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type); + EXPECT_EQ(static_cast<size_t>(num_cng_coefficients) + 1, + encoded_info_.encoded_bytes); +} + +// Resetting the CNG should reset both the VAD and the encoder. +TEST_F(AudioEncoderCngTest, Reset) { + CreateCng(MakeCngConfig()); + EXPECT_CALL(*mock_encoder_, Reset()).Times(1); + EXPECT_CALL(*mock_vad_, Reset()).Times(1); + cng_->Reset(); +} + +#if GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// This test fixture tests various error conditions that makes the +// AudioEncoderCng die via CHECKs. +class AudioEncoderCngDeathTest : public AudioEncoderCngTest { + protected: + AudioEncoderCngDeathTest() : AudioEncoderCngTest() { + EXPECT_CALL(*mock_vad_, Die()).Times(1); + delete mock_vad_; + mock_vad_ = nullptr; + } + + // Override AudioEncoderCngTest::TearDown, since that one expects a call to + // the destructor of `mock_vad_`. In this case, that object is already + // deleted. + void TearDown() override { cng_.reset(); } + + AudioEncoderCngConfig MakeCngConfig() { + // Don't provide a Vad mock object, since it would leak when the test dies. + auto config = AudioEncoderCngTest::MakeCngConfig(); + config.vad = nullptr; + return config; + } + + void TryWrongNumCoefficients(int num) { + RTC_EXPECT_DEATH( + [&] { + auto config = MakeCngConfig(); + config.num_cng_coefficients = num; + CreateCng(std::move(config)); + }(), + "Invalid configuration"); + } +}; + +TEST_F(AudioEncoderCngDeathTest, WrongFrameSize) { + CreateCng(MakeCngConfig()); + num_audio_samples_10ms_ *= 2; // 20 ms frame. + RTC_EXPECT_DEATH(Encode(), ""); + num_audio_samples_10ms_ = 0; // Zero samples. + RTC_EXPECT_DEATH(Encode(), ""); +} + +TEST_F(AudioEncoderCngDeathTest, WrongNumCoefficientsA) { + TryWrongNumCoefficients(-1); +} + +TEST_F(AudioEncoderCngDeathTest, WrongNumCoefficientsB) { + TryWrongNumCoefficients(0); +} + +TEST_F(AudioEncoderCngDeathTest, WrongNumCoefficientsC) { + TryWrongNumCoefficients(13); +} + +TEST_F(AudioEncoderCngDeathTest, NullSpeechEncoder) { + auto config = MakeCngConfig(); + config.speech_encoder = nullptr; + RTC_EXPECT_DEATH(CreateCng(std::move(config)), ""); +} + +TEST_F(AudioEncoderCngDeathTest, StereoEncoder) { + EXPECT_CALL(*mock_encoder_, NumChannels()).WillRepeatedly(Return(2)); + RTC_EXPECT_DEATH(CreateCng(MakeCngConfig()), "Invalid configuration"); +} + +TEST_F(AudioEncoderCngDeathTest, StereoConfig) { + RTC_EXPECT_DEATH( + [&] { + auto config = MakeCngConfig(); + config.num_channels = 2; + CreateCng(std::move(config)); + }(), + "Invalid configuration"); +} + +TEST_F(AudioEncoderCngDeathTest, EncoderFrameSizeTooLarge) { + CreateCng(MakeCngConfig()); + EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(7U)); + for (int i = 0; i < 6; ++i) + Encode(); + RTC_EXPECT_DEATH( + Encode(), "Frame size cannot be larger than 60 ms when using VAD/CNG."); +} + +#endif // GTEST_HAS_DEATH_TEST + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/cng/cng_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/cng/cng_unittest.cc new file mode 100644 index 0000000000..0e6ab79394 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/cng/cng_unittest.cc @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include <memory> +#include <string> + +#include "modules/audio_coding/codecs/cng/webrtc_cng.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +enum { + kSidShortIntervalUpdate = 1, + kSidNormalIntervalUpdate = 100, + kSidLongIntervalUpdate = 10000 +}; + +enum : size_t { + kCNGNumParamsLow = 0, + kCNGNumParamsNormal = 8, + kCNGNumParamsHigh = WEBRTC_CNG_MAX_LPC_ORDER, + kCNGNumParamsTooHigh = WEBRTC_CNG_MAX_LPC_ORDER + 1 +}; + +enum { kNoSid, kForceSid }; + +class CngTest : public ::testing::Test { + protected: + virtual void SetUp(); + + void TestCngEncode(int sample_rate_hz, int quality); + + int16_t speech_data_[640]; // Max size of CNG internal buffers. +}; + +class CngDeathTest : public CngTest {}; + +void CngTest::SetUp() { + FILE* input_file; + const std::string file_name = + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + input_file = fopen(file_name.c_str(), "rb"); + ASSERT_TRUE(input_file != NULL); + ASSERT_EQ(640, static_cast<int32_t>( + fread(speech_data_, sizeof(int16_t), 640, input_file))); + fclose(input_file); + input_file = NULL; +} + +void CngTest::TestCngEncode(int sample_rate_hz, int quality) { + const size_t num_samples_10ms = rtc::CheckedDivExact(sample_rate_hz, 100); + rtc::Buffer sid_data; + + ComfortNoiseEncoder cng_encoder(sample_rate_hz, kSidNormalIntervalUpdate, + quality); + EXPECT_EQ(0U, cng_encoder.Encode(rtc::ArrayView<const int16_t>( + speech_data_, num_samples_10ms), + kNoSid, &sid_data)); + EXPECT_EQ(static_cast<size_t>(quality + 1), + cng_encoder.Encode( + rtc::ArrayView<const int16_t>(speech_data_, num_samples_10ms), + kForceSid, &sid_data)); +} + +#if GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Create CNG encoder, init with faulty values, free CNG encoder. +TEST_F(CngDeathTest, CngInitFail) { + // Call with too few parameters. + EXPECT_DEATH( + { + ComfortNoiseEncoder(8000, kSidNormalIntervalUpdate, kCNGNumParamsLow); + }, + ""); + // Call with too many parameters. + EXPECT_DEATH( + { + ComfortNoiseEncoder(8000, kSidNormalIntervalUpdate, + kCNGNumParamsTooHigh); + }, + ""); +} + +// Encode Cng with too long input vector. +TEST_F(CngDeathTest, CngEncodeTooLong) { + rtc::Buffer sid_data; + + // Create encoder. + ComfortNoiseEncoder cng_encoder(8000, kSidNormalIntervalUpdate, + kCNGNumParamsNormal); + // Run encoder with too much data. + EXPECT_DEATH( + cng_encoder.Encode(rtc::ArrayView<const int16_t>(speech_data_, 641), + kNoSid, &sid_data), + ""); +} +#endif // GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +TEST_F(CngTest, CngEncode8000) { + TestCngEncode(8000, kCNGNumParamsNormal); +} + +TEST_F(CngTest, CngEncode16000) { + TestCngEncode(16000, kCNGNumParamsNormal); +} + +TEST_F(CngTest, CngEncode32000) { + TestCngEncode(32000, kCNGNumParamsHigh); +} + +TEST_F(CngTest, CngEncode48000) { + TestCngEncode(48000, kCNGNumParamsNormal); +} + +TEST_F(CngTest, CngEncode64000) { + TestCngEncode(64000, kCNGNumParamsNormal); +} + +// Update SID parameters, for both 9 and 16 parameters. +TEST_F(CngTest, CngUpdateSid) { + rtc::Buffer sid_data; + + // Create and initialize encoder and decoder. + ComfortNoiseEncoder cng_encoder(16000, kSidNormalIntervalUpdate, + kCNGNumParamsNormal); + ComfortNoiseDecoder cng_decoder; + + // Run normal Encode and UpdateSid. + EXPECT_EQ(kCNGNumParamsNormal + 1, + cng_encoder.Encode(rtc::ArrayView<const int16_t>(speech_data_, 160), + kForceSid, &sid_data)); + cng_decoder.UpdateSid(sid_data); + + // Reinit with new length. + cng_encoder.Reset(16000, kSidNormalIntervalUpdate, kCNGNumParamsHigh); + cng_decoder.Reset(); + + // Expect 0 because of unstable parameters after switching length. + EXPECT_EQ(0U, + cng_encoder.Encode(rtc::ArrayView<const int16_t>(speech_data_, 160), + kForceSid, &sid_data)); + EXPECT_EQ( + kCNGNumParamsHigh + 1, + cng_encoder.Encode(rtc::ArrayView<const int16_t>(speech_data_ + 160, 160), + kForceSid, &sid_data)); + cng_decoder.UpdateSid( + rtc::ArrayView<const uint8_t>(sid_data.data(), kCNGNumParamsNormal + 1)); +} + +// Update SID parameters, with wrong parameters or without calling decode. +TEST_F(CngTest, CngUpdateSidErroneous) { + rtc::Buffer sid_data; + + // Encode. + ComfortNoiseEncoder cng_encoder(16000, kSidNormalIntervalUpdate, + kCNGNumParamsNormal); + ComfortNoiseDecoder cng_decoder; + EXPECT_EQ(kCNGNumParamsNormal + 1, + cng_encoder.Encode(rtc::ArrayView<const int16_t>(speech_data_, 160), + kForceSid, &sid_data)); + + // First run with valid parameters, then with too many CNG parameters. + // The function will operate correctly by only reading the maximum number of + // parameters, skipping the extra. + EXPECT_EQ(kCNGNumParamsNormal + 1, sid_data.size()); + cng_decoder.UpdateSid(sid_data); + + // Make sure the input buffer is large enough. Since Encode() appends data, we + // need to set the size manually only afterwards, or the buffer will be bigger + // than anticipated. + sid_data.SetSize(kCNGNumParamsTooHigh + 1); + cng_decoder.UpdateSid(sid_data); +} + +// Test to generate cng data, by forcing SID. Both normal and faulty condition. +TEST_F(CngTest, CngGenerate) { + rtc::Buffer sid_data; + int16_t out_data[640]; + + // Create and initialize encoder and decoder. + ComfortNoiseEncoder cng_encoder(16000, kSidNormalIntervalUpdate, + kCNGNumParamsNormal); + ComfortNoiseDecoder cng_decoder; + + // Normal Encode. + EXPECT_EQ(kCNGNumParamsNormal + 1, + cng_encoder.Encode(rtc::ArrayView<const int16_t>(speech_data_, 160), + kForceSid, &sid_data)); + + // Normal UpdateSid. + cng_decoder.UpdateSid(sid_data); + + // Two normal Generate, one with new_period. + EXPECT_TRUE(cng_decoder.Generate(rtc::ArrayView<int16_t>(out_data, 640), 1)); + EXPECT_TRUE(cng_decoder.Generate(rtc::ArrayView<int16_t>(out_data, 640), 0)); + + // Call Genereate with too much data. + EXPECT_FALSE(cng_decoder.Generate(rtc::ArrayView<int16_t>(out_data, 641), 0)); +} + +// Test automatic SID. +TEST_F(CngTest, CngAutoSid) { + rtc::Buffer sid_data; + + // Create and initialize encoder and decoder. + ComfortNoiseEncoder cng_encoder(16000, kSidNormalIntervalUpdate, + kCNGNumParamsNormal); + ComfortNoiseDecoder cng_decoder; + + // Normal Encode, 100 msec, where no SID data should be generated. + for (int i = 0; i < 10; i++) { + EXPECT_EQ( + 0U, cng_encoder.Encode(rtc::ArrayView<const int16_t>(speech_data_, 160), + kNoSid, &sid_data)); + } + + // We have reached 100 msec, and SID data should be generated. + EXPECT_EQ(kCNGNumParamsNormal + 1, + cng_encoder.Encode(rtc::ArrayView<const int16_t>(speech_data_, 160), + kNoSid, &sid_data)); +} + +// Test automatic SID, with very short interval. +TEST_F(CngTest, CngAutoSidShort) { + rtc::Buffer sid_data; + + // Create and initialize encoder and decoder. + ComfortNoiseEncoder cng_encoder(16000, kSidShortIntervalUpdate, + kCNGNumParamsNormal); + ComfortNoiseDecoder cng_decoder; + + // First call will never generate SID, unless forced to. + EXPECT_EQ(0U, + cng_encoder.Encode(rtc::ArrayView<const int16_t>(speech_data_, 160), + kNoSid, &sid_data)); + + // Normal Encode, 100 msec, SID data should be generated all the time. + for (int i = 0; i < 10; i++) { + EXPECT_EQ( + kCNGNumParamsNormal + 1, + cng_encoder.Encode(rtc::ArrayView<const int16_t>(speech_data_, 160), + kNoSid, &sid_data)); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/cng/webrtc_cng.cc b/third_party/libwebrtc/modules/audio_coding/codecs/cng/webrtc_cng.cc new file mode 100644 index 0000000000..48f1b8c296 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/cng/webrtc_cng.cc @@ -0,0 +1,436 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/cng/webrtc_cng.h" + +#include <algorithm> + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +namespace { + +const size_t kCngMaxOutsizeOrder = 640; + +// TODO(ossu): Rename the left-over WebRtcCng according to style guide. +void WebRtcCng_K2a16(int16_t* k, int useOrder, int16_t* a); + +const int32_t WebRtcCng_kDbov[94] = { + 1081109975, 858756178, 682134279, 541838517, 430397633, 341876992, + 271562548, 215709799, 171344384, 136103682, 108110997, 85875618, + 68213428, 54183852, 43039763, 34187699, 27156255, 21570980, + 17134438, 13610368, 10811100, 8587562, 6821343, 5418385, + 4303976, 3418770, 2715625, 2157098, 1713444, 1361037, + 1081110, 858756, 682134, 541839, 430398, 341877, + 271563, 215710, 171344, 136104, 108111, 85876, + 68213, 54184, 43040, 34188, 27156, 21571, + 17134, 13610, 10811, 8588, 6821, 5418, + 4304, 3419, 2716, 2157, 1713, 1361, + 1081, 859, 682, 542, 430, 342, + 272, 216, 171, 136, 108, 86, + 68, 54, 43, 34, 27, 22, + 17, 14, 11, 9, 7, 5, + 4, 3, 3, 2, 2, 1, + 1, 1, 1, 1}; + +const int16_t WebRtcCng_kCorrWindow[WEBRTC_CNG_MAX_LPC_ORDER] = { + 32702, 32636, 32570, 32505, 32439, 32374, + 32309, 32244, 32179, 32114, 32049, 31985}; + +} // namespace + +ComfortNoiseDecoder::ComfortNoiseDecoder() { + /* Needed to get the right function pointers in SPLIB. */ + Reset(); +} + +void ComfortNoiseDecoder::Reset() { + dec_seed_ = 7777; /* For debugging only. */ + dec_target_energy_ = 0; + dec_used_energy_ = 0; + for (auto& c : dec_target_reflCoefs_) + c = 0; + for (auto& c : dec_used_reflCoefs_) + c = 0; + for (auto& c : dec_filtstate_) + c = 0; + for (auto& c : dec_filtstateLow_) + c = 0; + dec_order_ = 5; + dec_target_scale_factor_ = 0; + dec_used_scale_factor_ = 0; +} + +void ComfortNoiseDecoder::UpdateSid(rtc::ArrayView<const uint8_t> sid) { + int16_t refCs[WEBRTC_CNG_MAX_LPC_ORDER]; + int32_t targetEnergy; + size_t length = sid.size(); + /* Throw away reflection coefficients of higher order than we can handle. */ + if (length > (WEBRTC_CNG_MAX_LPC_ORDER + 1)) + length = WEBRTC_CNG_MAX_LPC_ORDER + 1; + + dec_order_ = static_cast<uint16_t>(length - 1); + + uint8_t sid0 = std::min<uint8_t>(sid[0], 93); + targetEnergy = WebRtcCng_kDbov[sid0]; + /* Take down target energy to 75%. */ + targetEnergy = targetEnergy >> 1; + targetEnergy += targetEnergy >> 2; + + dec_target_energy_ = targetEnergy; + + /* Reconstruct coeffs with tweak for WebRtc implementation of RFC3389. */ + if (dec_order_ == WEBRTC_CNG_MAX_LPC_ORDER) { + for (size_t i = 0; i < (dec_order_); i++) { + refCs[i] = sid[i + 1] << 8; /* Q7 to Q15*/ + dec_target_reflCoefs_[i] = refCs[i]; + } + } else { + for (size_t i = 0; i < (dec_order_); i++) { + refCs[i] = (sid[i + 1] - 127) * (1 << 8); /* Q7 to Q15. */ + dec_target_reflCoefs_[i] = refCs[i]; + } + } + + for (size_t i = (dec_order_); i < WEBRTC_CNG_MAX_LPC_ORDER; i++) { + refCs[i] = 0; + dec_target_reflCoefs_[i] = refCs[i]; + } +} + +bool ComfortNoiseDecoder::Generate(rtc::ArrayView<int16_t> out_data, + bool new_period) { + int16_t excitation[kCngMaxOutsizeOrder]; + int16_t low[kCngMaxOutsizeOrder]; + int16_t lpPoly[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int16_t ReflBetaStd = 26214; /* 0.8 in q15. */ + int16_t ReflBetaCompStd = 6553; /* 0.2 in q15. */ + int16_t ReflBetaNewP = 19661; /* 0.6 in q15. */ + int16_t ReflBetaCompNewP = 13107; /* 0.4 in q15. */ + int16_t Beta, BetaC; /* These are in Q15. */ + int32_t targetEnergy; + int16_t En; + int16_t temp16; + const size_t num_samples = out_data.size(); + + if (num_samples > kCngMaxOutsizeOrder) { + return false; + } + + if (new_period) { + dec_used_scale_factor_ = dec_target_scale_factor_; + Beta = ReflBetaNewP; + BetaC = ReflBetaCompNewP; + } else { + Beta = ReflBetaStd; + BetaC = ReflBetaCompStd; + } + + /* Calculate new scale factor in Q13 */ + dec_used_scale_factor_ = rtc::checked_cast<int16_t>( + WEBRTC_SPL_MUL_16_16_RSFT(dec_used_scale_factor_, Beta >> 2, 13) + + WEBRTC_SPL_MUL_16_16_RSFT(dec_target_scale_factor_, BetaC >> 2, 13)); + + dec_used_energy_ = dec_used_energy_ >> 1; + dec_used_energy_ += dec_target_energy_ >> 1; + + /* Do the same for the reflection coeffs, albeit in Q15. */ + for (size_t i = 0; i < WEBRTC_CNG_MAX_LPC_ORDER; i++) { + dec_used_reflCoefs_[i] = + (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_used_reflCoefs_[i], Beta, 15); + dec_used_reflCoefs_[i] += + (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_target_reflCoefs_[i], BetaC, 15); + } + + /* Compute the polynomial coefficients. */ + WebRtcCng_K2a16(dec_used_reflCoefs_, WEBRTC_CNG_MAX_LPC_ORDER, lpPoly); + + targetEnergy = dec_used_energy_; + + /* Calculate scaling factor based on filter energy. */ + En = 8192; /* 1.0 in Q13. */ + for (size_t i = 0; i < (WEBRTC_CNG_MAX_LPC_ORDER); i++) { + /* Floating point value for reference. + E *= 1.0 - (dec_used_reflCoefs_[i] / 32768.0) * + (dec_used_reflCoefs_[i] / 32768.0); + */ + + /* Same in fixed point. */ + /* K(i).^2 in Q15. */ + temp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_used_reflCoefs_[i], + dec_used_reflCoefs_[i], 15); + /* 1 - K(i).^2 in Q15. */ + temp16 = 0x7fff - temp16; + En = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(En, temp16, 15); + } + + /* float scaling= sqrt(E * dec_target_energy_ / (1 << 24)); */ + + /* Calculate sqrt(En * target_energy / excitation energy) */ + targetEnergy = WebRtcSpl_Sqrt(dec_used_energy_); + + En = (int16_t)WebRtcSpl_Sqrt(En) << 6; + En = (En * 3) >> 1; /* 1.5 estimates sqrt(2). */ + dec_used_scale_factor_ = (int16_t)((En * targetEnergy) >> 12); + + /* Generate excitation. */ + /* Excitation energy per sample is 2.^24 - Q13 N(0,1). */ + for (size_t i = 0; i < num_samples; i++) { + excitation[i] = WebRtcSpl_RandN(&dec_seed_) >> 1; + } + + /* Scale to correct energy. */ + WebRtcSpl_ScaleVector(excitation, excitation, dec_used_scale_factor_, + num_samples, 13); + + /* `lpPoly` - Coefficients in Q12. + * `excitation` - Speech samples. + * `nst->dec_filtstate` - State preservation. + * `out_data` - Filtered speech samples. */ + WebRtcSpl_FilterAR(lpPoly, WEBRTC_CNG_MAX_LPC_ORDER + 1, excitation, + num_samples, dec_filtstate_, WEBRTC_CNG_MAX_LPC_ORDER, + dec_filtstateLow_, WEBRTC_CNG_MAX_LPC_ORDER, + out_data.data(), low, num_samples); + + return true; +} + +ComfortNoiseEncoder::ComfortNoiseEncoder(int fs, int interval, int quality) + : enc_nrOfCoefs_(quality), + enc_sampfreq_(fs), + enc_interval_(interval), + enc_msSinceSid_(0), + enc_Energy_(0), + enc_reflCoefs_{0}, + enc_corrVector_{0}, + enc_seed_(7777) /* For debugging only. */ { + RTC_CHECK_GT(quality, 0); + RTC_CHECK_LE(quality, WEBRTC_CNG_MAX_LPC_ORDER); +} + +void ComfortNoiseEncoder::Reset(int fs, int interval, int quality) { + RTC_CHECK_GT(quality, 0); + RTC_CHECK_LE(quality, WEBRTC_CNG_MAX_LPC_ORDER); + enc_nrOfCoefs_ = quality; + enc_sampfreq_ = fs; + enc_interval_ = interval; + enc_msSinceSid_ = 0; + enc_Energy_ = 0; + for (auto& c : enc_reflCoefs_) + c = 0; + for (auto& c : enc_corrVector_) + c = 0; + enc_seed_ = 7777; /* For debugging only. */ +} + +size_t ComfortNoiseEncoder::Encode(rtc::ArrayView<const int16_t> speech, + bool force_sid, + rtc::Buffer* output) { + int16_t arCoefs[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int32_t corrVector[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int16_t refCs[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int16_t hanningW[kCngMaxOutsizeOrder]; + int16_t ReflBeta = 19661; /* 0.6 in q15. */ + int16_t ReflBetaComp = 13107; /* 0.4 in q15. */ + int32_t outEnergy; + int outShifts; + size_t i; + int stab; + int acorrScale; + size_t index; + size_t ind, factor; + int32_t* bptr; + int32_t blo, bhi; + int16_t negate; + const int16_t* aptr; + int16_t speechBuf[kCngMaxOutsizeOrder]; + + const size_t num_samples = speech.size(); + RTC_CHECK_LE(num_samples, kCngMaxOutsizeOrder); + + for (i = 0; i < num_samples; i++) { + speechBuf[i] = speech[i]; + } + + factor = num_samples; + + /* Calculate energy and a coefficients. */ + outEnergy = WebRtcSpl_Energy(speechBuf, num_samples, &outShifts); + while (outShifts > 0) { + /* We can only do 5 shifts without destroying accuracy in + * division factor. */ + if (outShifts > 5) { + outEnergy <<= (outShifts - 5); + outShifts = 5; + } else { + factor /= 2; + outShifts--; + } + } + outEnergy = WebRtcSpl_DivW32W16(outEnergy, (int16_t)factor); + + if (outEnergy > 1) { + /* Create Hanning Window. */ + WebRtcSpl_GetHanningWindow(hanningW, num_samples / 2); + for (i = 0; i < (num_samples / 2); i++) + hanningW[num_samples - i - 1] = hanningW[i]; + + WebRtcSpl_ElementwiseVectorMult(speechBuf, hanningW, speechBuf, num_samples, + 14); + + WebRtcSpl_AutoCorrelation(speechBuf, num_samples, enc_nrOfCoefs_, + corrVector, &acorrScale); + + if (*corrVector == 0) + *corrVector = WEBRTC_SPL_WORD16_MAX; + + /* Adds the bandwidth expansion. */ + aptr = WebRtcCng_kCorrWindow; + bptr = corrVector; + + /* (zzz) lpc16_1 = 17+1+820+2+2 = 842 (ordo2=700). */ + for (ind = 0; ind < enc_nrOfCoefs_; ind++) { + /* The below code multiplies the 16 b corrWindow values (Q15) with + * the 32 b corrvector (Q0) and shifts the result down 15 steps. */ + negate = *bptr < 0; + if (negate) + *bptr = -*bptr; + + blo = (int32_t)*aptr * (*bptr & 0xffff); + bhi = ((blo >> 16) & 0xffff) + + ((int32_t)(*aptr++) * ((*bptr >> 16) & 0xffff)); + blo = (blo & 0xffff) | ((bhi & 0xffff) << 16); + + *bptr = (((bhi >> 16) & 0x7fff) << 17) | ((uint32_t)blo >> 15); + if (negate) + *bptr = -*bptr; + bptr++; + } + /* End of bandwidth expansion. */ + + stab = WebRtcSpl_LevinsonDurbin(corrVector, arCoefs, refCs, enc_nrOfCoefs_); + + if (!stab) { + /* Disregard from this frame */ + return 0; + } + + } else { + for (i = 0; i < enc_nrOfCoefs_; i++) + refCs[i] = 0; + } + + if (force_sid) { + /* Read instantaneous values instead of averaged. */ + for (i = 0; i < enc_nrOfCoefs_; i++) + enc_reflCoefs_[i] = refCs[i]; + enc_Energy_ = outEnergy; + } else { + /* Average history with new values. */ + for (i = 0; i < enc_nrOfCoefs_; i++) { + enc_reflCoefs_[i] = + (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(enc_reflCoefs_[i], ReflBeta, 15); + enc_reflCoefs_[i] += + (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(refCs[i], ReflBetaComp, 15); + } + enc_Energy_ = (outEnergy >> 2) + (enc_Energy_ >> 1) + (enc_Energy_ >> 2); + } + + if (enc_Energy_ < 1) { + enc_Energy_ = 1; + } + + if ((enc_msSinceSid_ > (enc_interval_ - 1)) || force_sid) { + /* Search for best dbov value. */ + index = 0; + for (i = 1; i < 93; i++) { + /* Always round downwards. */ + if ((enc_Energy_ - WebRtcCng_kDbov[i]) > 0) { + index = i; + break; + } + } + if ((i == 93) && (index == 0)) + index = 94; + + const size_t output_coefs = enc_nrOfCoefs_ + 1; + output->AppendData(output_coefs, [&](rtc::ArrayView<uint8_t> output) { + output[0] = (uint8_t)index; + + /* Quantize coefficients with tweak for WebRtc implementation of + * RFC3389. */ + if (enc_nrOfCoefs_ == WEBRTC_CNG_MAX_LPC_ORDER) { + for (i = 0; i < enc_nrOfCoefs_; i++) { + /* Q15 to Q7 with rounding. */ + output[i + 1] = ((enc_reflCoefs_[i] + 128) >> 8); + } + } else { + for (i = 0; i < enc_nrOfCoefs_; i++) { + /* Q15 to Q7 with rounding. */ + output[i + 1] = (127 + ((enc_reflCoefs_[i] + 128) >> 8)); + } + } + + return output_coefs; + }); + + enc_msSinceSid_ = + static_cast<int16_t>((1000 * num_samples) / enc_sampfreq_); + return output_coefs; + } else { + enc_msSinceSid_ += + static_cast<int16_t>((1000 * num_samples) / enc_sampfreq_); + return 0; + } +} + +namespace { +/* Values in `k` are Q15, and `a` Q12. */ +void WebRtcCng_K2a16(int16_t* k, int useOrder, int16_t* a) { + int16_t any[WEBRTC_SPL_MAX_LPC_ORDER + 1]; + int16_t* aptr; + int16_t* aptr2; + int16_t* anyptr; + const int16_t* kptr; + int m, i; + + kptr = k; + *a = 4096; /* i.e., (Word16_MAX >> 3) + 1 */ + *any = *a; + a[1] = (*k + 4) >> 3; + for (m = 1; m < useOrder; m++) { + kptr++; + aptr = a; + aptr++; + aptr2 = &a[m]; + anyptr = any; + anyptr++; + + any[m + 1] = (*kptr + 4) >> 3; + for (i = 0; i < m; i++) { + *anyptr++ = + (*aptr++) + + (int16_t)((((int32_t)(*aptr2--) * (int32_t)*kptr) + 16384) >> 15); + } + + aptr = a; + anyptr = any; + for (i = 0; i < (m + 2); i++) { + *aptr++ = *anyptr++; + } + } +} + +} // namespace + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/cng/webrtc_cng.h b/third_party/libwebrtc/modules/audio_coding/codecs/cng/webrtc_cng.h new file mode 100644 index 0000000000..7afd243f81 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/cng/webrtc_cng.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_CNG_WEBRTC_CNG_H_ +#define MODULES_AUDIO_CODING_CODECS_CNG_WEBRTC_CNG_H_ + +#include <stdint.h> + +#include <cstddef> + +#include "api/array_view.h" +#include "rtc_base/buffer.h" + +#define WEBRTC_CNG_MAX_LPC_ORDER 12 + +namespace webrtc { + +class ComfortNoiseDecoder { + public: + ComfortNoiseDecoder(); + ~ComfortNoiseDecoder() = default; + + ComfortNoiseDecoder(const ComfortNoiseDecoder&) = delete; + ComfortNoiseDecoder& operator=(const ComfortNoiseDecoder&) = delete; + + void Reset(); + + // Updates the CN state when a new SID packet arrives. + // `sid` is a view of the SID packet without the headers. + void UpdateSid(rtc::ArrayView<const uint8_t> sid); + + // Generates comfort noise. + // `out_data` will be filled with samples - its size determines the number of + // samples generated. When `new_period` is true, CNG history will be reset + // before any audio is generated. Returns `false` if outData is too large - + // currently 640 bytes (equalling 10ms at 64kHz). + // TODO(ossu): Specify better limits for the size of out_data. Either let it + // be unbounded or limit to 10ms in the current sample rate. + bool Generate(rtc::ArrayView<int16_t> out_data, bool new_period); + + private: + uint32_t dec_seed_; + int32_t dec_target_energy_; + int32_t dec_used_energy_; + int16_t dec_target_reflCoefs_[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int16_t dec_used_reflCoefs_[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int16_t dec_filtstate_[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int16_t dec_filtstateLow_[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + uint16_t dec_order_; + int16_t dec_target_scale_factor_; /* Q29 */ + int16_t dec_used_scale_factor_; /* Q29 */ +}; + +class ComfortNoiseEncoder { + public: + // Creates a comfort noise encoder. + // `fs` selects sample rate: 8000 for narrowband or 16000 for wideband. + // `interval` sets the interval at which to generate SID data (in ms). + // `quality` selects the number of refl. coeffs. Maximum allowed is 12. + ComfortNoiseEncoder(int fs, int interval, int quality); + ~ComfortNoiseEncoder() = default; + + ComfortNoiseEncoder(const ComfortNoiseEncoder&) = delete; + ComfortNoiseEncoder& operator=(const ComfortNoiseEncoder&) = delete; + + // Resets the comfort noise encoder to its initial state. + // Parameters are set as during construction. + void Reset(int fs, int interval, int quality); + + // Analyzes background noise from `speech` and appends coefficients to + // `output`. Returns the number of coefficients generated. If `force_sid` is + // true, a SID frame is forced and the internal sid interval counter is reset. + // Will fail if the input size is too large (> 640 samples, see + // ComfortNoiseDecoder::Generate). + size_t Encode(rtc::ArrayView<const int16_t> speech, + bool force_sid, + rtc::Buffer* output); + + private: + size_t enc_nrOfCoefs_; + int enc_sampfreq_; + int16_t enc_interval_; + int16_t enc_msSinceSid_; + int32_t enc_Energy_; + int16_t enc_reflCoefs_[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int32_t enc_corrVector_[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + uint32_t enc_seed_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_CNG_WEBRTC_CNG_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.cc b/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.cc new file mode 100644 index 0000000000..46ac671b30 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.cc @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/g711/audio_decoder_pcm.h" + +#include <utility> + +#include "modules/audio_coding/codecs/g711/g711_interface.h" +#include "modules/audio_coding/codecs/legacy_encoded_audio_frame.h" + +namespace webrtc { + +void AudioDecoderPcmU::Reset() {} + +std::vector<AudioDecoder::ParseResult> AudioDecoderPcmU::ParsePayload( + rtc::Buffer&& payload, + uint32_t timestamp) { + return LegacyEncodedAudioFrame::SplitBySamples( + this, std::move(payload), timestamp, 8 * num_channels_, 8); +} + +int AudioDecoderPcmU::SampleRateHz() const { + return 8000; +} + +size_t AudioDecoderPcmU::Channels() const { + return num_channels_; +} + +int AudioDecoderPcmU::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { + RTC_DCHECK_EQ(SampleRateHz(), sample_rate_hz); + // Adjust the encoded length down to ensure the same number of samples in each + // channel. + const size_t encoded_len_adjusted = + PacketDuration(encoded, encoded_len) * + Channels(); // 1 byte per sample per channel + int16_t temp_type = 1; // Default is speech. + size_t ret = + WebRtcG711_DecodeU(encoded, encoded_len_adjusted, decoded, &temp_type); + *speech_type = ConvertSpeechType(temp_type); + return static_cast<int>(ret); +} + +int AudioDecoderPcmU::PacketDuration(const uint8_t* encoded, + size_t encoded_len) const { + // One encoded byte per sample per channel. + return static_cast<int>(encoded_len / Channels()); +} + +void AudioDecoderPcmA::Reset() {} + +std::vector<AudioDecoder::ParseResult> AudioDecoderPcmA::ParsePayload( + rtc::Buffer&& payload, + uint32_t timestamp) { + return LegacyEncodedAudioFrame::SplitBySamples( + this, std::move(payload), timestamp, 8 * num_channels_, 8); +} + +int AudioDecoderPcmA::SampleRateHz() const { + return 8000; +} + +size_t AudioDecoderPcmA::Channels() const { + return num_channels_; +} + +int AudioDecoderPcmA::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { + RTC_DCHECK_EQ(SampleRateHz(), sample_rate_hz); + // Adjust the encoded length down to ensure the same number of samples in each + // channel. + const size_t encoded_len_adjusted = + PacketDuration(encoded, encoded_len) * + Channels(); // 1 byte per sample per channel + int16_t temp_type = 1; // Default is speech. + size_t ret = + WebRtcG711_DecodeA(encoded, encoded_len_adjusted, decoded, &temp_type); + *speech_type = ConvertSpeechType(temp_type); + return static_cast<int>(ret); +} + +int AudioDecoderPcmA::PacketDuration(const uint8_t* encoded, + size_t encoded_len) const { + // One encoded byte per sample per channel. + return static_cast<int>(encoded_len / Channels()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.h b/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.h new file mode 100644 index 0000000000..3fa42cba30 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_G711_AUDIO_DECODER_PCM_H_ +#define MODULES_AUDIO_CODING_CODECS_G711_AUDIO_DECODER_PCM_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <vector> + +#include "api/audio_codecs/audio_decoder.h" +#include "rtc_base/buffer.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +class AudioDecoderPcmU final : public AudioDecoder { + public: + explicit AudioDecoderPcmU(size_t num_channels) : num_channels_(num_channels) { + RTC_DCHECK_GE(num_channels, 1); + } + + AudioDecoderPcmU(const AudioDecoderPcmU&) = delete; + AudioDecoderPcmU& operator=(const AudioDecoderPcmU&) = delete; + + void Reset() override; + std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload, + uint32_t timestamp) override; + int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override; + int SampleRateHz() const override; + size_t Channels() const override; + + protected: + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + + private: + const size_t num_channels_; +}; + +class AudioDecoderPcmA final : public AudioDecoder { + public: + explicit AudioDecoderPcmA(size_t num_channels) : num_channels_(num_channels) { + RTC_DCHECK_GE(num_channels, 1); + } + + AudioDecoderPcmA(const AudioDecoderPcmA&) = delete; + AudioDecoderPcmA& operator=(const AudioDecoderPcmA&) = delete; + + void Reset() override; + std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload, + uint32_t timestamp) override; + int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override; + int SampleRateHz() const override; + size_t Channels() const override; + + protected: + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + + private: + const size_t num_channels_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_G711_AUDIO_DECODER_PCM_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_encoder_pcm.cc b/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_encoder_pcm.cc new file mode 100644 index 0000000000..65e2da479d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_encoder_pcm.cc @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/g711/audio_encoder_pcm.h" + +#include <cstdint> + +#include "modules/audio_coding/codecs/g711/g711_interface.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +bool AudioEncoderPcm::Config::IsOk() const { + return (frame_size_ms % 10 == 0) && (num_channels >= 1); +} + +AudioEncoderPcm::AudioEncoderPcm(const Config& config, int sample_rate_hz) + : sample_rate_hz_(sample_rate_hz), + num_channels_(config.num_channels), + payload_type_(config.payload_type), + num_10ms_frames_per_packet_( + static_cast<size_t>(config.frame_size_ms / 10)), + full_frame_samples_(config.num_channels * config.frame_size_ms * + sample_rate_hz / 1000), + first_timestamp_in_buffer_(0) { + RTC_CHECK_GT(sample_rate_hz, 0) << "Sample rate must be larger than 0 Hz"; + RTC_CHECK_EQ(config.frame_size_ms % 10, 0) + << "Frame size must be an integer multiple of 10 ms."; + speech_buffer_.reserve(full_frame_samples_); +} + +AudioEncoderPcm::~AudioEncoderPcm() = default; + +int AudioEncoderPcm::SampleRateHz() const { + return sample_rate_hz_; +} + +size_t AudioEncoderPcm::NumChannels() const { + return num_channels_; +} + +size_t AudioEncoderPcm::Num10MsFramesInNextPacket() const { + return num_10ms_frames_per_packet_; +} + +size_t AudioEncoderPcm::Max10MsFramesInAPacket() const { + return num_10ms_frames_per_packet_; +} + +int AudioEncoderPcm::GetTargetBitrate() const { + return static_cast<int>(8 * BytesPerSample() * SampleRateHz() * + NumChannels()); +} + +AudioEncoder::EncodedInfo AudioEncoderPcm::EncodeImpl( + uint32_t rtp_timestamp, + rtc::ArrayView<const int16_t> audio, + rtc::Buffer* encoded) { + if (speech_buffer_.empty()) { + first_timestamp_in_buffer_ = rtp_timestamp; + } + speech_buffer_.insert(speech_buffer_.end(), audio.begin(), audio.end()); + if (speech_buffer_.size() < full_frame_samples_) { + return EncodedInfo(); + } + RTC_CHECK_EQ(speech_buffer_.size(), full_frame_samples_); + EncodedInfo info; + info.encoded_timestamp = first_timestamp_in_buffer_; + info.payload_type = payload_type_; + info.encoded_bytes = encoded->AppendData( + full_frame_samples_ * BytesPerSample(), + [&](rtc::ArrayView<uint8_t> encoded) { + return EncodeCall(&speech_buffer_[0], full_frame_samples_, + encoded.data()); + }); + speech_buffer_.clear(); + info.encoder_type = GetCodecType(); + return info; +} + +void AudioEncoderPcm::Reset() { + speech_buffer_.clear(); +} + +absl::optional<std::pair<TimeDelta, TimeDelta>> +AudioEncoderPcm::GetFrameLengthRange() const { + return {{TimeDelta::Millis(num_10ms_frames_per_packet_ * 10), + TimeDelta::Millis(num_10ms_frames_per_packet_ * 10)}}; +} + +size_t AudioEncoderPcmA::EncodeCall(const int16_t* audio, + size_t input_len, + uint8_t* encoded) { + return WebRtcG711_EncodeA(audio, input_len, encoded); +} + +size_t AudioEncoderPcmA::BytesPerSample() const { + return 1; +} + +AudioEncoder::CodecType AudioEncoderPcmA::GetCodecType() const { + return AudioEncoder::CodecType::kPcmA; +} + +size_t AudioEncoderPcmU::EncodeCall(const int16_t* audio, + size_t input_len, + uint8_t* encoded) { + return WebRtcG711_EncodeU(audio, input_len, encoded); +} + +size_t AudioEncoderPcmU::BytesPerSample() const { + return 1; +} + +AudioEncoder::CodecType AudioEncoderPcmU::GetCodecType() const { + return AudioEncoder::CodecType::kPcmU; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_encoder_pcm.h b/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_encoder_pcm.h new file mode 100644 index 0000000000..d50be4b457 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g711/audio_encoder_pcm.h @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_G711_AUDIO_ENCODER_PCM_H_ +#define MODULES_AUDIO_CODING_CODECS_G711_AUDIO_ENCODER_PCM_H_ + +#include <utility> +#include <vector> + +#include "absl/types/optional.h" +#include "api/audio_codecs/audio_encoder.h" +#include "api/units/time_delta.h" + +namespace webrtc { + +class AudioEncoderPcm : public AudioEncoder { + public: + struct Config { + public: + bool IsOk() const; + + int frame_size_ms; + size_t num_channels; + int payload_type; + + protected: + explicit Config(int pt) + : frame_size_ms(20), num_channels(1), payload_type(pt) {} + }; + + ~AudioEncoderPcm() override; + + int SampleRateHz() const override; + size_t NumChannels() const override; + size_t Num10MsFramesInNextPacket() const override; + size_t Max10MsFramesInAPacket() const override; + int GetTargetBitrate() const override; + void Reset() override; + absl::optional<std::pair<TimeDelta, TimeDelta>> GetFrameLengthRange() + const override; + + protected: + AudioEncoderPcm(const Config& config, int sample_rate_hz); + + EncodedInfo EncodeImpl(uint32_t rtp_timestamp, + rtc::ArrayView<const int16_t> audio, + rtc::Buffer* encoded) override; + + virtual size_t EncodeCall(const int16_t* audio, + size_t input_len, + uint8_t* encoded) = 0; + + virtual size_t BytesPerSample() const = 0; + + // Used to set EncodedInfoLeaf::encoder_type in + // AudioEncoderPcm::EncodeImpl + virtual AudioEncoder::CodecType GetCodecType() const = 0; + + private: + const int sample_rate_hz_; + const size_t num_channels_; + const int payload_type_; + const size_t num_10ms_frames_per_packet_; + const size_t full_frame_samples_; + std::vector<int16_t> speech_buffer_; + uint32_t first_timestamp_in_buffer_; +}; + +class AudioEncoderPcmA final : public AudioEncoderPcm { + public: + struct Config : public AudioEncoderPcm::Config { + Config() : AudioEncoderPcm::Config(8) {} + }; + + explicit AudioEncoderPcmA(const Config& config) + : AudioEncoderPcm(config, kSampleRateHz) {} + + AudioEncoderPcmA(const AudioEncoderPcmA&) = delete; + AudioEncoderPcmA& operator=(const AudioEncoderPcmA&) = delete; + + protected: + size_t EncodeCall(const int16_t* audio, + size_t input_len, + uint8_t* encoded) override; + + size_t BytesPerSample() const override; + + AudioEncoder::CodecType GetCodecType() const override; + + private: + static const int kSampleRateHz = 8000; +}; + +class AudioEncoderPcmU final : public AudioEncoderPcm { + public: + struct Config : public AudioEncoderPcm::Config { + Config() : AudioEncoderPcm::Config(0) {} + }; + + explicit AudioEncoderPcmU(const Config& config) + : AudioEncoderPcm(config, kSampleRateHz) {} + + AudioEncoderPcmU(const AudioEncoderPcmU&) = delete; + AudioEncoderPcmU& operator=(const AudioEncoderPcmU&) = delete; + + protected: + size_t EncodeCall(const int16_t* audio, + size_t input_len, + uint8_t* encoded) override; + + size_t BytesPerSample() const override; + + AudioEncoder::CodecType GetCodecType() const override; + + private: + static const int kSampleRateHz = 8000; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_G711_AUDIO_ENCODER_PCM_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g711/g711_interface.c b/third_party/libwebrtc/modules/audio_coding/codecs/g711/g711_interface.c new file mode 100644 index 0000000000..5fe1692ccb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g711/g711_interface.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <string.h> + +#include "modules/third_party/g711/g711.h" +#include "modules/audio_coding/codecs/g711/g711_interface.h" + +size_t WebRtcG711_EncodeA(const int16_t* speechIn, + size_t len, + uint8_t* encoded) { + size_t n; + for (n = 0; n < len; n++) + encoded[n] = linear_to_alaw(speechIn[n]); + return len; +} + +size_t WebRtcG711_EncodeU(const int16_t* speechIn, + size_t len, + uint8_t* encoded) { + size_t n; + for (n = 0; n < len; n++) + encoded[n] = linear_to_ulaw(speechIn[n]); + return len; +} + +size_t WebRtcG711_DecodeA(const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType) { + size_t n; + for (n = 0; n < len; n++) + decoded[n] = alaw_to_linear(encoded[n]); + *speechType = 1; + return len; +} + +size_t WebRtcG711_DecodeU(const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType) { + size_t n; + for (n = 0; n < len; n++) + decoded[n] = ulaw_to_linear(encoded[n]); + *speechType = 1; + return len; +} + +int16_t WebRtcG711_Version(char* version, int16_t lenBytes) { + strncpy(version, "2.0.0", lenBytes); + return 0; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g711/g711_interface.h b/third_party/libwebrtc/modules/audio_coding/codecs/g711/g711_interface.h new file mode 100644 index 0000000000..c92e6cc1c8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g711/g711_interface.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_G711_G711_INTERFACE_H_ +#define MODULES_AUDIO_CODING_CODECS_G711_G711_INTERFACE_H_ + +#include <stddef.h> +#include <stdint.h> + +// Comfort noise constants +#define G711_WEBRTC_SPEECH 1 +#define G711_WEBRTC_CNG 2 + +#ifdef __cplusplus +extern "C" { +#endif + +/**************************************************************************** + * WebRtcG711_EncodeA(...) + * + * This function encodes a G711 A-law frame and inserts it into a packet. + * Input speech length has be of any length. + * + * Input: + * - speechIn : Input speech vector + * - len : Samples in speechIn + * + * Output: + * - encoded : The encoded data vector + * + * Return value : Length (in bytes) of coded data. + * Always equal to len input parameter. + */ + +size_t WebRtcG711_EncodeA(const int16_t* speechIn, + size_t len, + uint8_t* encoded); + +/**************************************************************************** + * WebRtcG711_EncodeU(...) + * + * This function encodes a G711 U-law frame and inserts it into a packet. + * Input speech length has be of any length. + * + * Input: + * - speechIn : Input speech vector + * - len : Samples in speechIn + * + * Output: + * - encoded : The encoded data vector + * + * Return value : Length (in bytes) of coded data. + * Always equal to len input parameter. + */ + +size_t WebRtcG711_EncodeU(const int16_t* speechIn, + size_t len, + uint8_t* encoded); + +/**************************************************************************** + * WebRtcG711_DecodeA(...) + * + * This function decodes a packet G711 A-law frame. + * + * Input: + * - encoded : Encoded data + * - len : Bytes in encoded vector + * + * Output: + * - decoded : The decoded vector + * - speechType : 1 normal, 2 CNG (for G711 it should + * always return 1 since G711 does not have a + * built-in DTX/CNG scheme) + * + * Return value : >0 - Samples in decoded vector + * -1 - Error + */ + +size_t WebRtcG711_DecodeA(const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType); + +/**************************************************************************** + * WebRtcG711_DecodeU(...) + * + * This function decodes a packet G711 U-law frame. + * + * Input: + * - encoded : Encoded data + * - len : Bytes in encoded vector + * + * Output: + * - decoded : The decoded vector + * - speechType : 1 normal, 2 CNG (for G711 it should + * always return 1 since G711 does not have a + * built-in DTX/CNG scheme) + * + * Return value : >0 - Samples in decoded vector + * -1 - Error + */ + +size_t WebRtcG711_DecodeU(const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType); + +/********************************************************************** + * WebRtcG711_Version(...) + * + * This function gives the version string of the G.711 codec. + * + * Input: + * - lenBytes: the size of Allocated space (in Bytes) where + * the version number is written to (in string format). + * + * Output: + * - version: Pointer to a buffer where the version number is + * written to. + * + */ + +int16_t WebRtcG711_Version(char* version, int16_t lenBytes); + +#ifdef __cplusplus +} +#endif + +#endif // MODULES_AUDIO_CODING_CODECS_G711_G711_INTERFACE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g711/test/testG711.cc b/third_party/libwebrtc/modules/audio_coding/codecs/g711/test/testG711.cc new file mode 100644 index 0000000000..f3a42f5d79 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g711/test/testG711.cc @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * testG711.cpp : Defines the entry point for the console application. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* include API */ +#include "modules/audio_coding/codecs/g711/g711_interface.h" + +/* Runtime statistics */ +#include <time.h> +#define CLOCKS_PER_SEC_G711 1000 + +/* function for reading audio data from PCM file */ +bool readframe(int16_t* data, FILE* inp, size_t length) { + size_t rlen = fread(data, sizeof(int16_t), length, inp); + if (rlen >= length) + return false; + memset(data + rlen, 0, (length - rlen) * sizeof(int16_t)); + return true; +} + +int main(int argc, char* argv[]) { + char inname[80], outname[40], bitname[40]; + FILE* inp; + FILE* outp; + FILE* bitp = NULL; + int framecnt; + bool endfile; + + size_t framelength = 80; + + /* Runtime statistics */ + double starttime; + double runtime; + double length_file; + + size_t stream_len = 0; + int16_t shortdata[480]; + int16_t decoded[480]; + uint8_t streamdata[1000]; + int16_t speechType[1]; + char law[2]; + char versionNumber[40]; + + /* handling wrong input arguments in the command line */ + if ((argc != 5) && (argc != 6)) { + printf("\n\nWrong number of arguments or flag values.\n\n"); + + printf("\n"); + printf("\nG.711 test application\n\n"); + printf("Usage:\n\n"); + printf("./testG711.exe framelength law infile outfile \n\n"); + printf("framelength: Framelength in samples.\n"); + printf("law : Coding law, A och u.\n"); + printf("infile : Normal speech input file\n"); + printf("outfile : Speech output file\n\n"); + printf("outbits : Output bitstream file [optional]\n\n"); + exit(0); + } + + /* Get version and print */ + WebRtcG711_Version(versionNumber, 40); + + printf("-----------------------------------\n"); + printf("G.711 version: %s\n\n", versionNumber); + /* Get frame length */ + int framelength_int = atoi(argv[1]); + if (framelength_int < 0) { + printf(" G.722: Invalid framelength %d.\n", framelength_int); + exit(1); + } + framelength = static_cast<size_t>(framelength_int); + + /* Get compression law */ + strcpy(law, argv[2]); + + /* Get Input and Output files */ + sscanf(argv[3], "%s", inname); + sscanf(argv[4], "%s", outname); + if (argc == 6) { + sscanf(argv[5], "%s", bitname); + if ((bitp = fopen(bitname, "wb")) == NULL) { + printf(" G.711: Cannot read file %s.\n", bitname); + exit(1); + } + } + + if ((inp = fopen(inname, "rb")) == NULL) { + printf(" G.711: Cannot read file %s.\n", inname); + exit(1); + } + if ((outp = fopen(outname, "wb")) == NULL) { + printf(" G.711: Cannot write file %s.\n", outname); + exit(1); + } + printf("\nInput: %s\nOutput: %s\n", inname, outname); + if (argc == 6) { + printf("\nBitfile: %s\n", bitname); + } + + starttime = clock() / (double)CLOCKS_PER_SEC_G711; /* Runtime statistics */ + + /* Initialize encoder and decoder */ + framecnt = 0; + endfile = false; + while (!endfile) { + framecnt++; + /* Read speech block */ + endfile = readframe(shortdata, inp, framelength); + + /* G.711 encoding */ + if (!strcmp(law, "A")) { + /* A-law encoding */ + stream_len = WebRtcG711_EncodeA(shortdata, framelength, streamdata); + if (argc == 6) { + /* Write bits to file */ + if (fwrite(streamdata, sizeof(unsigned char), stream_len, bitp) != + stream_len) { + return -1; + } + } + WebRtcG711_DecodeA(streamdata, stream_len, decoded, speechType); + } else if (!strcmp(law, "u")) { + /* u-law encoding */ + stream_len = WebRtcG711_EncodeU(shortdata, framelength, streamdata); + if (argc == 6) { + /* Write bits to file */ + if (fwrite(streamdata, sizeof(unsigned char), stream_len, bitp) != + stream_len) { + return -1; + } + } + WebRtcG711_DecodeU(streamdata, stream_len, decoded, speechType); + } else { + printf("Wrong law mode\n"); + exit(1); + } + /* Write coded speech to file */ + if (fwrite(decoded, sizeof(short), framelength, outp) != framelength) { + return -1; + } + } + + runtime = (double)(clock() / (double)CLOCKS_PER_SEC_G711 - starttime); + length_file = ((double)framecnt * (double)framelength / 8000); + printf("\n\nLength of speech file: %.1f s\n", length_file); + printf("Time to run G.711: %.2f s (%.2f %% of realtime)\n\n", runtime, + (100 * runtime / length_file)); + printf("---------------------END----------------------\n"); + + fclose(inp); + fclose(outp); + + return 0; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.cc b/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.cc new file mode 100644 index 0000000000..1ecc9bc3d1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.cc @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/g722/audio_decoder_g722.h" + +#include <string.h> + +#include <utility> + +#include "modules/audio_coding/codecs/g722/g722_interface.h" +#include "modules/audio_coding/codecs/legacy_encoded_audio_frame.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +AudioDecoderG722Impl::AudioDecoderG722Impl() { + WebRtcG722_CreateDecoder(&dec_state_); + WebRtcG722_DecoderInit(dec_state_); +} + +AudioDecoderG722Impl::~AudioDecoderG722Impl() { + WebRtcG722_FreeDecoder(dec_state_); +} + +bool AudioDecoderG722Impl::HasDecodePlc() const { + return false; +} + +int AudioDecoderG722Impl::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { + RTC_DCHECK_EQ(SampleRateHz(), sample_rate_hz); + int16_t temp_type = 1; // Default is speech. + size_t ret = + WebRtcG722_Decode(dec_state_, encoded, encoded_len, decoded, &temp_type); + *speech_type = ConvertSpeechType(temp_type); + return static_cast<int>(ret); +} + +void AudioDecoderG722Impl::Reset() { + WebRtcG722_DecoderInit(dec_state_); +} + +std::vector<AudioDecoder::ParseResult> AudioDecoderG722Impl::ParsePayload( + rtc::Buffer&& payload, + uint32_t timestamp) { + return LegacyEncodedAudioFrame::SplitBySamples(this, std::move(payload), + timestamp, 8, 16); +} + +int AudioDecoderG722Impl::PacketDuration(const uint8_t* encoded, + size_t encoded_len) const { + // 1/2 encoded byte per sample per channel. + return static_cast<int>(2 * encoded_len / Channels()); +} + +int AudioDecoderG722Impl::SampleRateHz() const { + return 16000; +} + +size_t AudioDecoderG722Impl::Channels() const { + return 1; +} + +AudioDecoderG722StereoImpl::AudioDecoderG722StereoImpl() { + WebRtcG722_CreateDecoder(&dec_state_left_); + WebRtcG722_CreateDecoder(&dec_state_right_); + WebRtcG722_DecoderInit(dec_state_left_); + WebRtcG722_DecoderInit(dec_state_right_); +} + +AudioDecoderG722StereoImpl::~AudioDecoderG722StereoImpl() { + WebRtcG722_FreeDecoder(dec_state_left_); + WebRtcG722_FreeDecoder(dec_state_right_); +} + +int AudioDecoderG722StereoImpl::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { + RTC_DCHECK_EQ(SampleRateHz(), sample_rate_hz); + // Adjust the encoded length down to ensure the same number of samples in each + // channel. + const size_t encoded_len_adjusted = PacketDuration(encoded, encoded_len) * + Channels() / + 2; // 1/2 byte per sample per channel + int16_t temp_type = 1; // Default is speech. + // De-interleave the bit-stream into two separate payloads. + uint8_t* encoded_deinterleaved = new uint8_t[encoded_len_adjusted]; + SplitStereoPacket(encoded, encoded_len_adjusted, encoded_deinterleaved); + // Decode left and right. + size_t decoded_len = + WebRtcG722_Decode(dec_state_left_, encoded_deinterleaved, + encoded_len_adjusted / 2, decoded, &temp_type); + size_t ret = WebRtcG722_Decode( + dec_state_right_, &encoded_deinterleaved[encoded_len_adjusted / 2], + encoded_len_adjusted / 2, &decoded[decoded_len], &temp_type); + if (ret == decoded_len) { + ret += decoded_len; // Return total number of samples. + // Interleave output. + for (size_t k = ret / 2; k < ret; k++) { + int16_t temp = decoded[k]; + memmove(&decoded[2 * k - ret + 2], &decoded[2 * k - ret + 1], + (ret - k - 1) * sizeof(int16_t)); + decoded[2 * k - ret + 1] = temp; + } + } + *speech_type = ConvertSpeechType(temp_type); + delete[] encoded_deinterleaved; + return static_cast<int>(ret); +} + +int AudioDecoderG722StereoImpl::PacketDuration(const uint8_t* encoded, + size_t encoded_len) const { + // 1/2 encoded byte per sample per channel. Make sure the length represents + // an equal number of bytes per channel. Otherwise, we cannot de-interleave + // the encoded data later. + return static_cast<int>(2 * (encoded_len / Channels())); +} + +int AudioDecoderG722StereoImpl::SampleRateHz() const { + return 16000; +} + +size_t AudioDecoderG722StereoImpl::Channels() const { + return 2; +} + +void AudioDecoderG722StereoImpl::Reset() { + WebRtcG722_DecoderInit(dec_state_left_); + WebRtcG722_DecoderInit(dec_state_right_); +} + +std::vector<AudioDecoder::ParseResult> AudioDecoderG722StereoImpl::ParsePayload( + rtc::Buffer&& payload, + uint32_t timestamp) { + return LegacyEncodedAudioFrame::SplitBySamples(this, std::move(payload), + timestamp, 2 * 8, 16); +} + +// Split the stereo packet and place left and right channel after each other +// in the output array. +void AudioDecoderG722StereoImpl::SplitStereoPacket( + const uint8_t* encoded, + size_t encoded_len, + uint8_t* encoded_deinterleaved) { + // Regroup the 4 bits/sample so |l1 l2| |r1 r2| |l3 l4| |r3 r4| ..., + // where "lx" is 4 bits representing left sample number x, and "rx" right + // sample. Two samples fit in one byte, represented with |...|. + for (size_t i = 0; i + 1 < encoded_len; i += 2) { + uint8_t right_byte = ((encoded[i] & 0x0F) << 4) + (encoded[i + 1] & 0x0F); + encoded_deinterleaved[i] = (encoded[i] & 0xF0) + (encoded[i + 1] >> 4); + encoded_deinterleaved[i + 1] = right_byte; + } + + // Move one byte representing right channel each loop, and place it at the + // end of the bytestream vector. After looping the data is reordered to: + // |l1 l2| |l3 l4| ... |l(N-1) lN| |r1 r2| |r3 r4| ... |r(N-1) r(N)|, + // where N is the total number of samples. + for (size_t i = 0; i < encoded_len / 2; i++) { + uint8_t right_byte = encoded_deinterleaved[i + 1]; + memmove(&encoded_deinterleaved[i + 1], &encoded_deinterleaved[i + 2], + encoded_len - i - 2); + encoded_deinterleaved[encoded_len - 1] = right_byte; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.h b/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.h new file mode 100644 index 0000000000..5872fad5de --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_G722_AUDIO_DECODER_G722_H_ +#define MODULES_AUDIO_CODING_CODECS_G722_AUDIO_DECODER_G722_H_ + +#include "api/audio_codecs/audio_decoder.h" + +typedef struct WebRtcG722DecInst G722DecInst; + +namespace webrtc { + +class AudioDecoderG722Impl final : public AudioDecoder { + public: + AudioDecoderG722Impl(); + ~AudioDecoderG722Impl() override; + + AudioDecoderG722Impl(const AudioDecoderG722Impl&) = delete; + AudioDecoderG722Impl& operator=(const AudioDecoderG722Impl&) = delete; + + bool HasDecodePlc() const override; + void Reset() override; + std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload, + uint32_t timestamp) override; + int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override; + int SampleRateHz() const override; + size_t Channels() const override; + + protected: + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + + private: + G722DecInst* dec_state_; +}; + +class AudioDecoderG722StereoImpl final : public AudioDecoder { + public: + AudioDecoderG722StereoImpl(); + ~AudioDecoderG722StereoImpl() override; + + AudioDecoderG722StereoImpl(const AudioDecoderG722StereoImpl&) = delete; + AudioDecoderG722StereoImpl& operator=(const AudioDecoderG722StereoImpl&) = + delete; + + void Reset() override; + std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload, + uint32_t timestamp) override; + int SampleRateHz() const override; + int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override; + size_t Channels() const override; + + protected: + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + + private: + // Splits the stereo-interleaved payload in `encoded` into separate payloads + // for left and right channels. The separated payloads are written to + // `encoded_deinterleaved`, which must hold at least `encoded_len` samples. + // The left channel starts at offset 0, while the right channel starts at + // offset encoded_len / 2 into `encoded_deinterleaved`. + void SplitStereoPacket(const uint8_t* encoded, + size_t encoded_len, + uint8_t* encoded_deinterleaved); + + G722DecInst* dec_state_left_; + G722DecInst* dec_state_right_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_G722_AUDIO_DECODER_G722_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.cc b/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.cc new file mode 100644 index 0000000000..b7d34ba581 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.cc @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/g722/audio_encoder_g722.h" + +#include <cstdint> + +#include "modules/audio_coding/codecs/g722/g722_interface.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +namespace { + +const size_t kSampleRateHz = 16000; + +} // namespace + +AudioEncoderG722Impl::AudioEncoderG722Impl(const AudioEncoderG722Config& config, + int payload_type) + : num_channels_(config.num_channels), + payload_type_(payload_type), + num_10ms_frames_per_packet_( + static_cast<size_t>(config.frame_size_ms / 10)), + num_10ms_frames_buffered_(0), + first_timestamp_in_buffer_(0), + encoders_(new EncoderState[num_channels_]), + interleave_buffer_(2 * num_channels_) { + RTC_CHECK(config.IsOk()); + const size_t samples_per_channel = + kSampleRateHz / 100 * num_10ms_frames_per_packet_; + for (size_t i = 0; i < num_channels_; ++i) { + encoders_[i].speech_buffer.reset(new int16_t[samples_per_channel]); + encoders_[i].encoded_buffer.SetSize(samples_per_channel / 2); + } + Reset(); +} + +AudioEncoderG722Impl::~AudioEncoderG722Impl() = default; + +int AudioEncoderG722Impl::SampleRateHz() const { + return kSampleRateHz; +} + +size_t AudioEncoderG722Impl::NumChannels() const { + return num_channels_; +} + +int AudioEncoderG722Impl::RtpTimestampRateHz() const { + // The RTP timestamp rate for G.722 is 8000 Hz, even though it is a 16 kHz + // codec. + return kSampleRateHz / 2; +} + +size_t AudioEncoderG722Impl::Num10MsFramesInNextPacket() const { + return num_10ms_frames_per_packet_; +} + +size_t AudioEncoderG722Impl::Max10MsFramesInAPacket() const { + return num_10ms_frames_per_packet_; +} + +int AudioEncoderG722Impl::GetTargetBitrate() const { + // 4 bits/sample, 16000 samples/s/channel. + return static_cast<int>(64000 * NumChannels()); +} + +void AudioEncoderG722Impl::Reset() { + num_10ms_frames_buffered_ = 0; + for (size_t i = 0; i < num_channels_; ++i) + RTC_CHECK_EQ(0, WebRtcG722_EncoderInit(encoders_[i].encoder)); +} + +absl::optional<std::pair<TimeDelta, TimeDelta>> +AudioEncoderG722Impl::GetFrameLengthRange() const { + return {{TimeDelta::Millis(num_10ms_frames_per_packet_ * 10), + TimeDelta::Millis(num_10ms_frames_per_packet_ * 10)}}; +} + +AudioEncoder::EncodedInfo AudioEncoderG722Impl::EncodeImpl( + uint32_t rtp_timestamp, + rtc::ArrayView<const int16_t> audio, + rtc::Buffer* encoded) { + if (num_10ms_frames_buffered_ == 0) + first_timestamp_in_buffer_ = rtp_timestamp; + + // Deinterleave samples and save them in each channel's buffer. + const size_t start = kSampleRateHz / 100 * num_10ms_frames_buffered_; + for (size_t i = 0; i < kSampleRateHz / 100; ++i) + for (size_t j = 0; j < num_channels_; ++j) + encoders_[j].speech_buffer[start + i] = audio[i * num_channels_ + j]; + + // If we don't yet have enough samples for a packet, we're done for now. + if (++num_10ms_frames_buffered_ < num_10ms_frames_per_packet_) { + return EncodedInfo(); + } + + // Encode each channel separately. + RTC_CHECK_EQ(num_10ms_frames_buffered_, num_10ms_frames_per_packet_); + num_10ms_frames_buffered_ = 0; + const size_t samples_per_channel = SamplesPerChannel(); + for (size_t i = 0; i < num_channels_; ++i) { + const size_t bytes_encoded = WebRtcG722_Encode( + encoders_[i].encoder, encoders_[i].speech_buffer.get(), + samples_per_channel, encoders_[i].encoded_buffer.data()); + RTC_CHECK_EQ(bytes_encoded, samples_per_channel / 2); + } + + const size_t bytes_to_encode = samples_per_channel / 2 * num_channels_; + EncodedInfo info; + info.encoded_bytes = encoded->AppendData( + bytes_to_encode, [&](rtc::ArrayView<uint8_t> encoded) { + // Interleave the encoded bytes of the different channels. Each separate + // channel and the interleaved stream encodes two samples per byte, most + // significant half first. + for (size_t i = 0; i < samples_per_channel / 2; ++i) { + for (size_t j = 0; j < num_channels_; ++j) { + uint8_t two_samples = encoders_[j].encoded_buffer.data()[i]; + interleave_buffer_.data()[j] = two_samples >> 4; + interleave_buffer_.data()[num_channels_ + j] = two_samples & 0xf; + } + for (size_t j = 0; j < num_channels_; ++j) + encoded[i * num_channels_ + j] = + interleave_buffer_.data()[2 * j] << 4 | + interleave_buffer_.data()[2 * j + 1]; + } + + return bytes_to_encode; + }); + info.encoded_timestamp = first_timestamp_in_buffer_; + info.payload_type = payload_type_; + info.encoder_type = CodecType::kG722; + return info; +} + +AudioEncoderG722Impl::EncoderState::EncoderState() { + RTC_CHECK_EQ(0, WebRtcG722_CreateEncoder(&encoder)); +} + +AudioEncoderG722Impl::EncoderState::~EncoderState() { + RTC_CHECK_EQ(0, WebRtcG722_FreeEncoder(encoder)); +} + +size_t AudioEncoderG722Impl::SamplesPerChannel() const { + return kSampleRateHz / 100 * num_10ms_frames_per_packet_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.h b/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.h new file mode 100644 index 0000000000..a932aa8b7d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_G722_AUDIO_ENCODER_G722_H_ +#define MODULES_AUDIO_CODING_CODECS_G722_AUDIO_ENCODER_G722_H_ + +#include <memory> +#include <utility> + +#include "absl/types/optional.h" +#include "api/audio_codecs/audio_encoder.h" +#include "api/audio_codecs/g722/audio_encoder_g722_config.h" +#include "api/units/time_delta.h" +#include "modules/audio_coding/codecs/g722/g722_interface.h" +#include "rtc_base/buffer.h" + +namespace webrtc { + +class AudioEncoderG722Impl final : public AudioEncoder { + public: + AudioEncoderG722Impl(const AudioEncoderG722Config& config, int payload_type); + ~AudioEncoderG722Impl() override; + + AudioEncoderG722Impl(const AudioEncoderG722Impl&) = delete; + AudioEncoderG722Impl& operator=(const AudioEncoderG722Impl&) = delete; + + int SampleRateHz() const override; + size_t NumChannels() const override; + int RtpTimestampRateHz() const override; + size_t Num10MsFramesInNextPacket() const override; + size_t Max10MsFramesInAPacket() const override; + int GetTargetBitrate() const override; + void Reset() override; + absl::optional<std::pair<TimeDelta, TimeDelta>> GetFrameLengthRange() + const override; + + protected: + EncodedInfo EncodeImpl(uint32_t rtp_timestamp, + rtc::ArrayView<const int16_t> audio, + rtc::Buffer* encoded) override; + + private: + // The encoder state for one channel. + struct EncoderState { + G722EncInst* encoder; + std::unique_ptr<int16_t[]> speech_buffer; // Queued up for encoding. + rtc::Buffer encoded_buffer; // Already encoded. + EncoderState(); + ~EncoderState(); + }; + + size_t SamplesPerChannel() const; + + const size_t num_channels_; + const int payload_type_; + const size_t num_10ms_frames_per_packet_; + size_t num_10ms_frames_buffered_; + uint32_t first_timestamp_in_buffer_; + const std::unique_ptr<EncoderState[]> encoders_; + rtc::Buffer interleave_buffer_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_CODECS_G722_AUDIO_ENCODER_G722_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g722/g722_interface.c b/third_party/libwebrtc/modules/audio_coding/codecs/g722/g722_interface.c new file mode 100644 index 0000000000..36ee6d92be --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g722/g722_interface.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdlib.h> +#include <string.h> + +#include "modules/audio_coding/codecs/g722/g722_interface.h" +#include "modules/third_party/g722/g722_enc_dec.h" + +int16_t WebRtcG722_CreateEncoder(G722EncInst **G722enc_inst) +{ + *G722enc_inst=(G722EncInst*)malloc(sizeof(G722EncoderState)); + if (*G722enc_inst!=NULL) { + return(0); + } else { + return(-1); + } +} + +int16_t WebRtcG722_EncoderInit(G722EncInst *G722enc_inst) +{ + // Create and/or reset the G.722 encoder + // Bitrate 64 kbps and wideband mode (2) + G722enc_inst = (G722EncInst *) WebRtc_g722_encode_init( + (G722EncoderState*) G722enc_inst, 64000, 2); + if (G722enc_inst == NULL) { + return -1; + } else { + return 0; + } +} + +int WebRtcG722_FreeEncoder(G722EncInst *G722enc_inst) +{ + // Free encoder memory + return WebRtc_g722_encode_release((G722EncoderState*) G722enc_inst); +} + +size_t WebRtcG722_Encode(G722EncInst *G722enc_inst, + const int16_t* speechIn, + size_t len, + uint8_t* encoded) +{ + unsigned char *codechar = (unsigned char*) encoded; + // Encode the input speech vector + return WebRtc_g722_encode((G722EncoderState*) G722enc_inst, codechar, + speechIn, len); +} + +int16_t WebRtcG722_CreateDecoder(G722DecInst **G722dec_inst) +{ + *G722dec_inst=(G722DecInst*)malloc(sizeof(G722DecoderState)); + if (*G722dec_inst!=NULL) { + return(0); + } else { + return(-1); + } +} + +void WebRtcG722_DecoderInit(G722DecInst* inst) { + // Create and/or reset the G.722 decoder + // Bitrate 64 kbps and wideband mode (2) + WebRtc_g722_decode_init((G722DecoderState*)inst, 64000, 2); +} + +int WebRtcG722_FreeDecoder(G722DecInst *G722dec_inst) +{ + // Free encoder memory + return WebRtc_g722_decode_release((G722DecoderState*) G722dec_inst); +} + +size_t WebRtcG722_Decode(G722DecInst *G722dec_inst, + const uint8_t *encoded, + size_t len, + int16_t *decoded, + int16_t *speechType) +{ + // Decode the G.722 encoder stream + *speechType=G722_WEBRTC_SPEECH; + return WebRtc_g722_decode((G722DecoderState*) G722dec_inst, decoded, + encoded, len); +} + +int16_t WebRtcG722_Version(char *versionStr, short len) +{ + // Get version string + char version[30] = "2.0.0\n"; + if (strlen(version) < (unsigned int)len) + { + strcpy(versionStr, version); + return 0; + } + else + { + return -1; + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g722/g722_interface.h b/third_party/libwebrtc/modules/audio_coding/codecs/g722/g722_interface.h new file mode 100644 index 0000000000..353de4504f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g722/g722_interface.h @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_G722_G722_INTERFACE_H_ +#define MODULES_AUDIO_CODING_CODECS_G722_G722_INTERFACE_H_ + +#include <stddef.h> +#include <stdint.h> + +/* + * Solution to support multiple instances + */ + +typedef struct WebRtcG722EncInst G722EncInst; +typedef struct WebRtcG722DecInst G722DecInst; + +/* + * Comfort noise constants + */ + +#define G722_WEBRTC_SPEECH 1 +#define G722_WEBRTC_CNG 2 + +#ifdef __cplusplus +extern "C" { +#endif + +/**************************************************************************** + * WebRtcG722_CreateEncoder(...) + * + * Create memory used for G722 encoder + * + * Input: + * - G722enc_inst : G722 instance for encoder + * + * Return value : 0 - Ok + * -1 - Error + */ +int16_t WebRtcG722_CreateEncoder(G722EncInst** G722enc_inst); + +/**************************************************************************** + * WebRtcG722_EncoderInit(...) + * + * This function initializes a G722 instance + * + * Input: + * - G722enc_inst : G722 instance, i.e. the user that should receive + * be initialized + * + * Return value : 0 - Ok + * -1 - Error + */ + +int16_t WebRtcG722_EncoderInit(G722EncInst* G722enc_inst); + +/**************************************************************************** + * WebRtcG722_FreeEncoder(...) + * + * Free the memory used for G722 encoder + * + * Input: + * - G722enc_inst : G722 instance for encoder + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcG722_FreeEncoder(G722EncInst* G722enc_inst); + +/**************************************************************************** + * WebRtcG722_Encode(...) + * + * This function encodes G722 encoded data. + * + * Input: + * - G722enc_inst : G722 instance, i.e. the user that should encode + * a packet + * - speechIn : Input speech vector + * - len : Samples in speechIn + * + * Output: + * - encoded : The encoded data vector + * + * Return value : Length (in bytes) of coded data + */ + +size_t WebRtcG722_Encode(G722EncInst* G722enc_inst, + const int16_t* speechIn, + size_t len, + uint8_t* encoded); + +/**************************************************************************** + * WebRtcG722_CreateDecoder(...) + * + * Create memory used for G722 encoder + * + * Input: + * - G722dec_inst : G722 instance for decoder + * + * Return value : 0 - Ok + * -1 - Error + */ +int16_t WebRtcG722_CreateDecoder(G722DecInst** G722dec_inst); + +/**************************************************************************** + * WebRtcG722_DecoderInit(...) + * + * This function initializes a G722 instance + * + * Input: + * - inst : G722 instance + */ + +void WebRtcG722_DecoderInit(G722DecInst* inst); + +/**************************************************************************** + * WebRtcG722_FreeDecoder(...) + * + * Free the memory used for G722 decoder + * + * Input: + * - G722dec_inst : G722 instance for decoder + * + * Return value : 0 - Ok + * -1 - Error + */ + +int WebRtcG722_FreeDecoder(G722DecInst* G722dec_inst); + +/**************************************************************************** + * WebRtcG722_Decode(...) + * + * This function decodes a packet with G729 frame(s). Output speech length + * will be a multiple of 80 samples (80*frames/packet). + * + * Input: + * - G722dec_inst : G722 instance, i.e. the user that should decode + * a packet + * - encoded : Encoded G722 frame(s) + * - len : Bytes in encoded vector + * + * Output: + * - decoded : The decoded vector + * - speechType : 1 normal, 2 CNG (Since G722 does not have its own + * DTX/CNG scheme it should always return 1) + * + * Return value : Samples in decoded vector + */ + +size_t WebRtcG722_Decode(G722DecInst* G722dec_inst, + const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType); + +/**************************************************************************** + * WebRtcG722_Version(...) + * + * Get a string with the current version of the codec + */ + +int16_t WebRtcG722_Version(char* versionStr, short len); + +#ifdef __cplusplus +} +#endif + +#endif /* MODULES_AUDIO_CODING_CODECS_G722_G722_INTERFACE_H_ */ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/g722/test/testG722.cc b/third_party/libwebrtc/modules/audio_coding/codecs/g722/test/testG722.cc new file mode 100644 index 0000000000..9f2155d0f7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/g722/test/testG722.cc @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * testG722.cpp : Defines the entry point for the console application. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* include API */ +#include "modules/audio_coding/codecs/g722/g722_interface.h" + +/* Runtime statistics */ +#include <time.h> +#define CLOCKS_PER_SEC_G722 100000 + +// Forward declaration +typedef struct WebRtcG722EncInst G722EncInst; +typedef struct WebRtcG722DecInst G722DecInst; + +/* function for reading audio data from PCM file */ +bool readframe(int16_t* data, FILE* inp, size_t length) { + size_t rlen = fread(data, sizeof(int16_t), length, inp); + if (rlen >= length) + return false; + memset(data + rlen, 0, (length - rlen) * sizeof(int16_t)); + return true; +} + +int main(int argc, char* argv[]) { + char inname[60], outbit[40], outname[40]; + FILE *inp, *outbitp, *outp; + + int framecnt; + bool endfile; + size_t framelength = 160; + G722EncInst* G722enc_inst; + G722DecInst* G722dec_inst; + + /* Runtime statistics */ + double starttime; + double runtime = 0; + double length_file; + + size_t stream_len = 0; + int16_t shortdata[960]; + int16_t decoded[960]; + uint8_t streamdata[80 * 6]; + int16_t speechType[1]; + + /* handling wrong input arguments in the command line */ + if (argc != 5) { + printf("\n\nWrong number of arguments or flag values.\n\n"); + + printf("\n"); + printf("Usage:\n\n"); + printf("./testG722.exe framelength infile outbitfile outspeechfile \n\n"); + printf("with:\n"); + printf("framelength : Framelength in samples.\n\n"); + printf("infile : Normal speech input file\n\n"); + printf("outbitfile : Bitstream output file\n\n"); + printf("outspeechfile: Speech output file\n\n"); + exit(0); + } + + /* Get frame length */ + int framelength_int = atoi(argv[1]); + if (framelength_int < 0) { + printf(" G.722: Invalid framelength %d.\n", framelength_int); + exit(1); + } + framelength = static_cast<size_t>(framelength_int); + + /* Get Input and Output files */ + sscanf(argv[2], "%s", inname); + sscanf(argv[3], "%s", outbit); + sscanf(argv[4], "%s", outname); + + if ((inp = fopen(inname, "rb")) == NULL) { + printf(" G.722: Cannot read file %s.\n", inname); + exit(1); + } + if ((outbitp = fopen(outbit, "wb")) == NULL) { + printf(" G.722: Cannot write file %s.\n", outbit); + exit(1); + } + if ((outp = fopen(outname, "wb")) == NULL) { + printf(" G.722: Cannot write file %s.\n", outname); + exit(1); + } + printf("\nInput:%s\nOutput bitstream:%s\nOutput:%s\n", inname, outbit, + outname); + + /* Create and init */ + WebRtcG722_CreateEncoder((G722EncInst**)&G722enc_inst); + WebRtcG722_CreateDecoder((G722DecInst**)&G722dec_inst); + WebRtcG722_EncoderInit((G722EncInst*)G722enc_inst); + WebRtcG722_DecoderInit((G722DecInst*)G722dec_inst); + + /* Initialize encoder and decoder */ + framecnt = 0; + endfile = false; + while (!endfile) { + framecnt++; + + /* Read speech block */ + endfile = readframe(shortdata, inp, framelength); + + /* Start clock before call to encoder and decoder */ + starttime = clock() / (double)CLOCKS_PER_SEC_G722; + + /* G.722 encoding + decoding */ + stream_len = WebRtcG722_Encode((G722EncInst*)G722enc_inst, shortdata, + framelength, streamdata); + WebRtcG722_Decode(G722dec_inst, streamdata, stream_len, decoded, + speechType); + + /* Stop clock after call to encoder and decoder */ + runtime += (double)((clock() / (double)CLOCKS_PER_SEC_G722) - starttime); + + /* Write coded bits to file */ + if (fwrite(streamdata, sizeof(short), stream_len / 2, outbitp) != + stream_len / 2) { + return -1; + } + /* Write coded speech to file */ + if (fwrite(decoded, sizeof(short), framelength, outp) != framelength) { + return -1; + } + } + + WebRtcG722_FreeEncoder((G722EncInst*)G722enc_inst); + WebRtcG722_FreeDecoder((G722DecInst*)G722dec_inst); + + length_file = ((double)framecnt * (double)framelength / 16000); + printf("\n\nLength of speech file: %.1f s\n", length_file); + printf("Time to run G.722: %.2f s (%.2f %% of realtime)\n\n", runtime, + (100 * runtime / length_file)); + printf("---------------------END----------------------\n"); + + fclose(inp); + fclose(outbitp); + fclose(outp); + + return 0; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant.c new file mode 100644 index 0000000000..77da78ba7f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_AbsQuant.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/abs_quant.h" + +#include "modules/audio_coding/codecs/ilbc/abs_quant_loop.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + + +/*----------------------------------------------------------------* + * predictive noise shaping encoding of scaled start state + * (subrutine for WebRtcIlbcfix_StateSearch) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_AbsQuant( + IlbcEncoder *iLBCenc_inst, + /* (i) Encoder instance */ + iLBC_bits *iLBC_encbits, /* (i/o) Encoded bits (outputs idxForMax + and idxVec, uses state_first as + input) */ + int16_t *in, /* (i) vector to encode */ + int16_t *weightDenum /* (i) denominator of synthesis filter */ + ) { + int16_t *syntOut; + size_t quantLen[2]; + + /* Stack based */ + int16_t syntOutBuf[LPC_FILTERORDER+STATE_SHORT_LEN_30MS]; + int16_t in_weightedVec[STATE_SHORT_LEN_30MS+LPC_FILTERORDER]; + int16_t *in_weighted = &in_weightedVec[LPC_FILTERORDER]; + + /* Initialize the buffers */ + WebRtcSpl_MemSetW16(syntOutBuf, 0, LPC_FILTERORDER+STATE_SHORT_LEN_30MS); + syntOut = &syntOutBuf[LPC_FILTERORDER]; + /* Start with zero state */ + WebRtcSpl_MemSetW16(in_weightedVec, 0, LPC_FILTERORDER); + + /* Perform the quantization loop in two sections of length quantLen[i], + where the perceptual weighting filter is updated at the subframe + border */ + + if (iLBC_encbits->state_first) { + quantLen[0]=SUBL; + quantLen[1]=iLBCenc_inst->state_short_len-SUBL; + } else { + quantLen[0]=iLBCenc_inst->state_short_len-SUBL; + quantLen[1]=SUBL; + } + + /* Calculate the weighted residual, switch perceptual weighting + filter at the subframe border */ + WebRtcSpl_FilterARFastQ12( + in, in_weighted, + weightDenum, LPC_FILTERORDER+1, quantLen[0]); + WebRtcSpl_FilterARFastQ12( + &in[quantLen[0]], &in_weighted[quantLen[0]], + &weightDenum[LPC_FILTERORDER+1], LPC_FILTERORDER+1, quantLen[1]); + + WebRtcIlbcfix_AbsQuantLoop( + syntOut, + in_weighted, + weightDenum, + quantLen, + iLBC_encbits->idxVec); + +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant.h new file mode 100644 index 0000000000..c72e29cf29 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_AbsQuant.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_ABS_QUANT_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_ABS_QUANT_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * predictive noise shaping encoding of scaled start state + * (subrutine for WebRtcIlbcfix_StateSearch) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_AbsQuant( + IlbcEncoder* iLBCenc_inst, + /* (i) Encoder instance */ + iLBC_bits* iLBC_encbits, /* (i/o) Encoded bits (outputs idxForMax + and idxVec, uses state_first as + input) */ + int16_t* in, /* (i) vector to encode */ + int16_t* weightDenum /* (i) denominator of synthesis filter */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant_loop.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant_loop.c new file mode 100644 index 0000000000..cf9266299d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant_loop.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_AbsQuantLoop.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/abs_quant_loop.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/sort_sq.h" + +void WebRtcIlbcfix_AbsQuantLoop(int16_t *syntOutIN, int16_t *in_weightedIN, + int16_t *weightDenumIN, size_t *quantLenIN, + int16_t *idxVecIN ) { + size_t k1, k2; + int16_t index; + int32_t toQW32; + int32_t toQ32; + int16_t tmp16a; + int16_t xq; + + int16_t *syntOut = syntOutIN; + int16_t *in_weighted = in_weightedIN; + int16_t *weightDenum = weightDenumIN; + size_t *quantLen = quantLenIN; + int16_t *idxVec = idxVecIN; + + for(k1=0;k1<2;k1++) { + for(k2=0;k2<quantLen[k1];k2++){ + + /* Filter to get the predicted value */ + WebRtcSpl_FilterARFastQ12( + syntOut, syntOut, + weightDenum, LPC_FILTERORDER+1, 1); + + /* the quantizer */ + toQW32 = (int32_t)(*in_weighted) - (int32_t)(*syntOut); + + toQ32 = (((int32_t)toQW32)<<2); + + if (toQ32 > 32767) { + toQ32 = (int32_t) 32767; + } else if (toQ32 < -32768) { + toQ32 = (int32_t) -32768; + } + + /* Quantize the state */ + if (toQW32<(-7577)) { + /* To prevent negative overflow */ + index=0; + } else if (toQW32>8151) { + /* To prevent positive overflow */ + index=7; + } else { + /* Find the best quantization index + (state_sq3Tbl is in Q13 and toQ is in Q11) + */ + WebRtcIlbcfix_SortSq(&xq, &index, + (int16_t)toQ32, + WebRtcIlbcfix_kStateSq3, 8); + } + + /* Store selected index */ + (*idxVec++) = index; + + /* Compute decoded sample and update of the prediction filter */ + tmp16a = ((WebRtcIlbcfix_kStateSq3[index] + 2 ) >> 2); + + *syntOut = (int16_t) (tmp16a + (int32_t)(*in_weighted) - toQW32); + + syntOut++; in_weighted++; + } + /* Update perceptual weighting filter at subframe border */ + weightDenum += 11; + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant_loop.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant_loop.h new file mode 100644 index 0000000000..841d73b9fb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/abs_quant_loop.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_AbsQuantLoop.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_ABS_QUANT_LOOP_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_ABS_QUANT_LOOP_H_ + +#include <stddef.h> +#include <stdint.h> + +/*----------------------------------------------------------------* + * predictive noise shaping encoding of scaled start state + * (subrutine for WebRtcIlbcfix_StateSearch) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_AbsQuantLoop(int16_t* syntOutIN, + int16_t* in_weightedIN, + int16_t* weightDenumIN, + size_t* quantLenIN, + int16_t* idxVecIN); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.cc b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.cc new file mode 100644 index 0000000000..57b5abbe23 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.cc @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.h" + +#include <memory> +#include <utility> + +#include "modules/audio_coding/codecs/ilbc/ilbc.h" +#include "modules/audio_coding/codecs/legacy_encoded_audio_frame.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +AudioDecoderIlbcImpl::AudioDecoderIlbcImpl() { + WebRtcIlbcfix_DecoderCreate(&dec_state_); + WebRtcIlbcfix_Decoderinit30Ms(dec_state_); +} + +AudioDecoderIlbcImpl::~AudioDecoderIlbcImpl() { + WebRtcIlbcfix_DecoderFree(dec_state_); +} + +bool AudioDecoderIlbcImpl::HasDecodePlc() const { + return true; +} + +int AudioDecoderIlbcImpl::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { + RTC_DCHECK_EQ(sample_rate_hz, 8000); + int16_t temp_type = 1; // Default is speech. + int ret = WebRtcIlbcfix_Decode(dec_state_, encoded, encoded_len, decoded, + &temp_type); + *speech_type = ConvertSpeechType(temp_type); + return ret; +} + +size_t AudioDecoderIlbcImpl::DecodePlc(size_t num_frames, int16_t* decoded) { + return WebRtcIlbcfix_NetEqPlc(dec_state_, decoded, num_frames); +} + +void AudioDecoderIlbcImpl::Reset() { + WebRtcIlbcfix_Decoderinit30Ms(dec_state_); +} + +std::vector<AudioDecoder::ParseResult> AudioDecoderIlbcImpl::ParsePayload( + rtc::Buffer&& payload, + uint32_t timestamp) { + std::vector<ParseResult> results; + size_t bytes_per_frame; + int timestamps_per_frame; + if (payload.size() >= 950) { + RTC_LOG(LS_WARNING) + << "AudioDecoderIlbcImpl::ParsePayload: Payload too large"; + return results; + } + if (payload.size() % 38 == 0) { + // 20 ms frames. + bytes_per_frame = 38; + timestamps_per_frame = 160; + } else if (payload.size() % 50 == 0) { + // 30 ms frames. + bytes_per_frame = 50; + timestamps_per_frame = 240; + } else { + RTC_LOG(LS_WARNING) + << "AudioDecoderIlbcImpl::ParsePayload: Invalid payload"; + return results; + } + + RTC_DCHECK_EQ(0, payload.size() % bytes_per_frame); + if (payload.size() == bytes_per_frame) { + std::unique_ptr<EncodedAudioFrame> frame( + new LegacyEncodedAudioFrame(this, std::move(payload))); + results.emplace_back(timestamp, 0, std::move(frame)); + } else { + size_t byte_offset; + uint32_t timestamp_offset; + for (byte_offset = 0, timestamp_offset = 0; byte_offset < payload.size(); + byte_offset += bytes_per_frame, + timestamp_offset += timestamps_per_frame) { + std::unique_ptr<EncodedAudioFrame> frame(new LegacyEncodedAudioFrame( + this, rtc::Buffer(payload.data() + byte_offset, bytes_per_frame))); + results.emplace_back(timestamp + timestamp_offset, 0, std::move(frame)); + } + } + + return results; +} + +int AudioDecoderIlbcImpl::SampleRateHz() const { + return 8000; +} + +size_t AudioDecoderIlbcImpl::Channels() const { + return 1; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.h new file mode 100644 index 0000000000..46ba755148 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_AUDIO_DECODER_ILBC_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_AUDIO_DECODER_ILBC_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <vector> + +#include "api/audio_codecs/audio_decoder.h" +#include "rtc_base/buffer.h" + +typedef struct iLBC_decinst_t_ IlbcDecoderInstance; + +namespace webrtc { + +class AudioDecoderIlbcImpl final : public AudioDecoder { + public: + AudioDecoderIlbcImpl(); + ~AudioDecoderIlbcImpl() override; + + AudioDecoderIlbcImpl(const AudioDecoderIlbcImpl&) = delete; + AudioDecoderIlbcImpl& operator=(const AudioDecoderIlbcImpl&) = delete; + + bool HasDecodePlc() const override; + size_t DecodePlc(size_t num_frames, int16_t* decoded) override; + void Reset() override; + std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload, + uint32_t timestamp) override; + int SampleRateHz() const override; + size_t Channels() const override; + + protected: + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + + private: + IlbcDecoderInstance* dec_state_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_CODECS_ILBC_AUDIO_DECODER_ILBC_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.cc b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.cc new file mode 100644 index 0000000000..9fbf42ceeb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.cc @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.h" + +#include <algorithm> +#include <cstdint> + +#include "modules/audio_coding/codecs/ilbc/ilbc.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +namespace { + +const int kSampleRateHz = 8000; + +int GetIlbcBitrate(int ptime) { + switch (ptime) { + case 20: + case 40: + // 38 bytes per frame of 20 ms => 15200 bits/s. + return 15200; + case 30: + case 60: + // 50 bytes per frame of 30 ms => (approx) 13333 bits/s. + return 13333; + default: + RTC_CHECK_NOTREACHED(); + } +} + +} // namespace + +AudioEncoderIlbcImpl::AudioEncoderIlbcImpl(const AudioEncoderIlbcConfig& config, + int payload_type) + : frame_size_ms_(config.frame_size_ms), + payload_type_(payload_type), + num_10ms_frames_per_packet_( + static_cast<size_t>(config.frame_size_ms / 10)), + encoder_(nullptr) { + RTC_CHECK(config.IsOk()); + Reset(); +} + +AudioEncoderIlbcImpl::~AudioEncoderIlbcImpl() { + RTC_CHECK_EQ(0, WebRtcIlbcfix_EncoderFree(encoder_)); +} + +int AudioEncoderIlbcImpl::SampleRateHz() const { + return kSampleRateHz; +} + +size_t AudioEncoderIlbcImpl::NumChannels() const { + return 1; +} + +size_t AudioEncoderIlbcImpl::Num10MsFramesInNextPacket() const { + return num_10ms_frames_per_packet_; +} + +size_t AudioEncoderIlbcImpl::Max10MsFramesInAPacket() const { + return num_10ms_frames_per_packet_; +} + +int AudioEncoderIlbcImpl::GetTargetBitrate() const { + return GetIlbcBitrate(rtc::dchecked_cast<int>(num_10ms_frames_per_packet_) * + 10); +} + +AudioEncoder::EncodedInfo AudioEncoderIlbcImpl::EncodeImpl( + uint32_t rtp_timestamp, + rtc::ArrayView<const int16_t> audio, + rtc::Buffer* encoded) { + // Save timestamp if starting a new packet. + if (num_10ms_frames_buffered_ == 0) + first_timestamp_in_buffer_ = rtp_timestamp; + + // Buffer input. + std::copy(audio.cbegin(), audio.cend(), + input_buffer_ + kSampleRateHz / 100 * num_10ms_frames_buffered_); + + // If we don't yet have enough buffered input for a whole packet, we're done + // for now. + if (++num_10ms_frames_buffered_ < num_10ms_frames_per_packet_) { + return EncodedInfo(); + } + + // Encode buffered input. + RTC_DCHECK_EQ(num_10ms_frames_buffered_, num_10ms_frames_per_packet_); + num_10ms_frames_buffered_ = 0; + size_t encoded_bytes = encoded->AppendData( + RequiredOutputSizeBytes(), [&](rtc::ArrayView<uint8_t> encoded) { + const int r = WebRtcIlbcfix_Encode( + encoder_, input_buffer_, + kSampleRateHz / 100 * num_10ms_frames_per_packet_, encoded.data()); + RTC_CHECK_GE(r, 0); + + return static_cast<size_t>(r); + }); + + RTC_DCHECK_EQ(encoded_bytes, RequiredOutputSizeBytes()); + + EncodedInfo info; + info.encoded_bytes = encoded_bytes; + info.encoded_timestamp = first_timestamp_in_buffer_; + info.payload_type = payload_type_; + info.encoder_type = CodecType::kIlbc; + return info; +} + +void AudioEncoderIlbcImpl::Reset() { + if (encoder_) + RTC_CHECK_EQ(0, WebRtcIlbcfix_EncoderFree(encoder_)); + RTC_CHECK_EQ(0, WebRtcIlbcfix_EncoderCreate(&encoder_)); + const int encoder_frame_size_ms = + frame_size_ms_ > 30 ? frame_size_ms_ / 2 : frame_size_ms_; + RTC_CHECK_EQ(0, WebRtcIlbcfix_EncoderInit(encoder_, encoder_frame_size_ms)); + num_10ms_frames_buffered_ = 0; +} + +absl::optional<std::pair<TimeDelta, TimeDelta>> +AudioEncoderIlbcImpl::GetFrameLengthRange() const { + return {{TimeDelta::Millis(num_10ms_frames_per_packet_ * 10), + TimeDelta::Millis(num_10ms_frames_per_packet_ * 10)}}; +} + +size_t AudioEncoderIlbcImpl::RequiredOutputSizeBytes() const { + switch (num_10ms_frames_per_packet_) { + case 2: + return 38; + case 3: + return 50; + case 4: + return 2 * 38; + case 6: + return 2 * 50; + default: + RTC_CHECK_NOTREACHED(); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.h new file mode 100644 index 0000000000..c8dfa2ca6d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_AUDIO_ENCODER_ILBC_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_AUDIO_ENCODER_ILBC_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <utility> + +#include "absl/types/optional.h" +#include "api/audio_codecs/audio_encoder.h" +#include "api/audio_codecs/ilbc/audio_encoder_ilbc_config.h" +#include "api/units/time_delta.h" +#include "modules/audio_coding/codecs/ilbc/ilbc.h" + +namespace webrtc { + +class AudioEncoderIlbcImpl final : public AudioEncoder { + public: + AudioEncoderIlbcImpl(const AudioEncoderIlbcConfig& config, int payload_type); + ~AudioEncoderIlbcImpl() override; + + AudioEncoderIlbcImpl(const AudioEncoderIlbcImpl&) = delete; + AudioEncoderIlbcImpl& operator=(const AudioEncoderIlbcImpl&) = delete; + + int SampleRateHz() const override; + size_t NumChannels() const override; + size_t Num10MsFramesInNextPacket() const override; + size_t Max10MsFramesInAPacket() const override; + int GetTargetBitrate() const override; + EncodedInfo EncodeImpl(uint32_t rtp_timestamp, + rtc::ArrayView<const int16_t> audio, + rtc::Buffer* encoded) override; + void Reset() override; + absl::optional<std::pair<TimeDelta, TimeDelta>> GetFrameLengthRange() + const override; + + private: + size_t RequiredOutputSizeBytes() const; + + static constexpr size_t kMaxSamplesPerPacket = 480; + const int frame_size_ms_; + const int payload_type_; + const size_t num_10ms_frames_per_packet_; + size_t num_10ms_frames_buffered_; + uint32_t first_timestamp_in_buffer_; + int16_t input_buffer_[kMaxSamplesPerPacket]; + IlbcEncoderInstance* encoder_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_CODING_CODECS_ILBC_AUDIO_ENCODER_ILBC_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/augmented_cb_corr.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/augmented_cb_corr.c new file mode 100644 index 0000000000..c915a2f9f0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/augmented_cb_corr.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_AugmentedCbCorr.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/augmented_cb_corr.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +void WebRtcIlbcfix_AugmentedCbCorr( + int16_t *target, /* (i) Target vector */ + int16_t *buffer, /* (i) Memory buffer */ + int16_t *interpSamples, /* (i) buffer with + interpolated samples */ + int32_t *crossDot, /* (o) The cross correlation between + the target and the Augmented + vector */ + size_t low, /* (i) Lag to start from (typically + 20) */ + size_t high, /* (i) Lag to end at (typically 39) */ + int scale) /* (i) Scale factor to use for + the crossDot */ +{ + size_t lagcount; + size_t ilow; + int16_t *targetPtr; + int32_t *crossDotPtr; + int16_t *iSPtr=interpSamples; + + /* Calculate the correlation between the target and the + interpolated codebook. The correlation is calculated in + 3 sections with the interpolated part in the middle */ + crossDotPtr=crossDot; + for (lagcount=low; lagcount<=high; lagcount++) { + + ilow = lagcount - 4; + + /* Compute dot product for the first (lagcount-4) samples */ + (*crossDotPtr) = WebRtcSpl_DotProductWithScale(target, buffer-lagcount, ilow, scale); + + /* Compute dot product on the interpolated samples */ + (*crossDotPtr) += WebRtcSpl_DotProductWithScale(target+ilow, iSPtr, 4, scale); + targetPtr = target + lagcount; + iSPtr += lagcount-ilow; + + /* Compute dot product for the remaining samples */ + (*crossDotPtr) += WebRtcSpl_DotProductWithScale(targetPtr, buffer-lagcount, SUBL-lagcount, scale); + crossDotPtr++; + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/augmented_cb_corr.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/augmented_cb_corr.h new file mode 100644 index 0000000000..2e9612e51a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/augmented_cb_corr.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_AugmentedCbCorr.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_AUGMENTED_CB_CORR_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_AUGMENTED_CB_CORR_H_ + +#include <stddef.h> +#include <stdint.h> + +/*----------------------------------------------------------------* + * Calculate correlation between target and Augmented codebooks + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_AugmentedCbCorr( + int16_t* target, /* (i) Target vector */ + int16_t* buffer, /* (i) Memory buffer */ + int16_t* interpSamples, /* (i) buffer with + interpolated samples */ + int32_t* crossDot, /* (o) The cross correlation between + the target and the Augmented + vector */ + size_t low, /* (i) Lag to start from (typically + 20) */ + size_t high, /* (i) Lag to end at (typically 39 */ + int scale); /* (i) Scale factor to use for the crossDot */ + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/bw_expand.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/bw_expand.c new file mode 100644 index 0000000000..1a9b882adf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/bw_expand.c @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_BwExpand.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/bw_expand.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * lpc bandwidth expansion + *---------------------------------------------------------------*/ + +/* The output is in the same domain as the input */ +void WebRtcIlbcfix_BwExpand( + int16_t *out, /* (o) the bandwidth expanded lpc coefficients */ + int16_t *in, /* (i) the lpc coefficients before bandwidth + expansion */ + int16_t *coef, /* (i) the bandwidth expansion factor Q15 */ + int16_t length /* (i) the length of lpc coefficient vectors */ + ) { + int i; + + out[0] = in[0]; + for (i = 1; i < length; i++) { + /* out[i] = coef[i] * in[i] with rounding. + in[] and out[] are in Q12 and coef[] is in Q15 + */ + out[i] = (int16_t)((coef[i] * in[i] + 16384) >> 15); + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/bw_expand.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/bw_expand.h new file mode 100644 index 0000000000..ff9b0b302e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/bw_expand.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_BwExpand.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_BW_EXPAND_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_BW_EXPAND_H_ + +#include <stddef.h> +#include <stdint.h> + +/*----------------------------------------------------------------* + * lpc bandwidth expansion + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_BwExpand( + int16_t* out, /* (o) the bandwidth expanded lpc coefficients */ + int16_t* in, /* (i) the lpc coefficients before bandwidth + expansion */ + int16_t* coef, /* (i) the bandwidth expansion factor Q15 */ + int16_t length /* (i) the length of lpc coefficient vectors */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_construct.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_construct.c new file mode 100644 index 0000000000..1e9a7040c7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_construct.c @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbConstruct.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/cb_construct.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/gain_dequant.h" +#include "modules/audio_coding/codecs/ilbc/get_cd_vec.h" +#include "rtc_base/sanitizer.h" + +// An arithmetic operation that is allowed to overflow. (It's still undefined +// behavior, so not a good idea; this just makes UBSan ignore the violation, so +// that our old code can continue to do what it's always been doing.) +static inline int32_t RTC_NO_SANITIZE("signed-integer-overflow") + OverflowingAddS32S32ToS32(int32_t a, int32_t b) { + return a + b; +} + +/*----------------------------------------------------------------* + * Construct decoded vector from codebook and gains. + *---------------------------------------------------------------*/ + +bool WebRtcIlbcfix_CbConstruct( + int16_t* decvector, /* (o) Decoded vector */ + const int16_t* index, /* (i) Codebook indices */ + const int16_t* gain_index, /* (i) Gain quantization indices */ + int16_t* mem, /* (i) Buffer for codevector construction */ + size_t lMem, /* (i) Length of buffer */ + size_t veclen) { /* (i) Length of vector */ + size_t j; + int16_t gain[CB_NSTAGES]; + /* Stack based */ + int16_t cbvec0[SUBL]; + int16_t cbvec1[SUBL]; + int16_t cbvec2[SUBL]; + int32_t a32; + int16_t *gainPtr; + + /* gain de-quantization */ + + gain[0] = WebRtcIlbcfix_GainDequant(gain_index[0], 16384, 0); + gain[1] = WebRtcIlbcfix_GainDequant(gain_index[1], gain[0], 1); + gain[2] = WebRtcIlbcfix_GainDequant(gain_index[2], gain[1], 2); + + /* codebook vector construction and construction of total vector */ + + /* Stack based */ + if (!WebRtcIlbcfix_GetCbVec(cbvec0, mem, (size_t)index[0], lMem, veclen)) + return false; // Failure. + if (!WebRtcIlbcfix_GetCbVec(cbvec1, mem, (size_t)index[1], lMem, veclen)) + return false; // Failure. + if (!WebRtcIlbcfix_GetCbVec(cbvec2, mem, (size_t)index[2], lMem, veclen)) + return false; // Failure. + + gainPtr = &gain[0]; + for (j=0;j<veclen;j++) { + a32 = (*gainPtr++) * cbvec0[j]; + a32 += (*gainPtr++) * cbvec1[j]; + a32 = OverflowingAddS32S32ToS32(a32, (*gainPtr) * cbvec2[j]); + gainPtr -= 2; + decvector[j] = (int16_t)((a32 + 8192) >> 14); + } + + return true; // Success. +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_construct.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_construct.h new file mode 100644 index 0000000000..8f7c663164 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_construct.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbConstruct.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_CONSTRUCT_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_CONSTRUCT_H_ + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +#include "absl/base/attributes.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Construct decoded vector from codebook and gains. + *---------------------------------------------------------------*/ + +// Returns true on success, false on failure. +ABSL_MUST_USE_RESULT +bool WebRtcIlbcfix_CbConstruct( + int16_t* decvector, /* (o) Decoded vector */ + const int16_t* index, /* (i) Codebook indices */ + const int16_t* gain_index, /* (i) Gain quantization indices */ + int16_t* mem, /* (i) Buffer for codevector construction */ + size_t lMem, /* (i) Length of buffer */ + size_t veclen /* (i) Length of vector */ +); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy.c new file mode 100644 index 0000000000..21e4197607 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy.c @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbMemEnergy.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/cb_mem_energy.h" + +#include "modules/audio_coding/codecs/ilbc/cb_mem_energy_calc.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Function WebRtcIlbcfix_CbMemEnergy computes the energy of all + * the vectors in the codebook memory that will be used in the + * following search for the best match. + *----------------------------------------------------------------*/ + +void WebRtcIlbcfix_CbMemEnergy( + size_t range, + int16_t *CB, /* (i) The CB memory (1:st section) */ + int16_t *filteredCB, /* (i) The filtered CB memory (2:nd section) */ + size_t lMem, /* (i) Length of the CB memory */ + size_t lTarget, /* (i) Length of the target vector */ + int16_t *energyW16, /* (o) Energy in the CB vectors */ + int16_t *energyShifts, /* (o) Shift value of the energy */ + int scale, /* (i) The scaling of all energy values */ + size_t base_size /* (i) Index to where energy values should be stored */ + ) { + int16_t *ppi, *ppo, *pp; + int32_t energy, tmp32; + + /* Compute the energy and store it in a vector. Also the + * corresponding shift values are stored. The energy values + * are reused in all three stages. */ + + /* Calculate the energy in the first block of 'lTarget' sampels. */ + ppi = CB+lMem-lTarget-1; + ppo = CB+lMem-1; + + pp=CB+lMem-lTarget; + energy = WebRtcSpl_DotProductWithScale( pp, pp, lTarget, scale); + + /* Normalize the energy and store the number of shifts */ + energyShifts[0] = (int16_t)WebRtcSpl_NormW32(energy); + tmp32 = energy << energyShifts[0]; + energyW16[0] = (int16_t)(tmp32 >> 16); + + /* Compute the energy of the rest of the cb memory + * by step wise adding and subtracting the next + * sample and the last sample respectively. */ + WebRtcIlbcfix_CbMemEnergyCalc(energy, range, ppi, ppo, energyW16, energyShifts, scale, 0); + + /* Next, precompute the energy values for the filtered cb section */ + energy=0; + pp=filteredCB+lMem-lTarget; + + energy = WebRtcSpl_DotProductWithScale( pp, pp, lTarget, scale); + + /* Normalize the energy and store the number of shifts */ + energyShifts[base_size] = (int16_t)WebRtcSpl_NormW32(energy); + tmp32 = energy << energyShifts[base_size]; + energyW16[base_size] = (int16_t)(tmp32 >> 16); + + ppi = filteredCB + lMem - 1 - lTarget; + ppo = filteredCB + lMem - 1; + + WebRtcIlbcfix_CbMemEnergyCalc(energy, range, ppi, ppo, energyW16, energyShifts, scale, base_size); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy.h new file mode 100644 index 0000000000..17ec337dc6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbMemEnergy.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_MEM_ENERGY_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_MEM_ENERGY_H_ + +#include <stddef.h> +#include <stdint.h> + +void WebRtcIlbcfix_CbMemEnergy( + size_t range, + int16_t* CB, /* (i) The CB memory (1:st section) */ + int16_t* filteredCB, /* (i) The filtered CB memory (2:nd section) */ + size_t lMem, /* (i) Length of the CB memory */ + size_t lTarget, /* (i) Length of the target vector */ + int16_t* energyW16, /* (o) Energy in the CB vectors */ + int16_t* energyShifts, /* (o) Shift value of the energy */ + int scale, /* (i) The scaling of all energy values */ + size_t base_size /* (i) Index to where energy values should be stored */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_augmentation.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_augmentation.c new file mode 100644 index 0000000000..0619bbe422 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_augmentation.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbMemEnergyAugmentation.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/cb_mem_energy_augmentation.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +void WebRtcIlbcfix_CbMemEnergyAugmentation( + int16_t *interpSamples, /* (i) The interpolated samples */ + int16_t *CBmem, /* (i) The CB memory */ + int scale, /* (i) The scaling of all energy values */ + size_t base_size, /* (i) Index to where energy values should be stored */ + int16_t *energyW16, /* (o) Energy in the CB vectors */ + int16_t *energyShifts /* (o) Shift value of the energy */ + ){ + int32_t energy, tmp32; + int16_t *ppe, *pp, *interpSamplesPtr; + int16_t *CBmemPtr; + size_t lagcount; + int16_t *enPtr=&energyW16[base_size-20]; + int16_t *enShPtr=&energyShifts[base_size-20]; + int32_t nrjRecursive; + + CBmemPtr = CBmem+147; + interpSamplesPtr = interpSamples; + + /* Compute the energy for the first (low-5) noninterpolated samples */ + nrjRecursive = WebRtcSpl_DotProductWithScale( CBmemPtr-19, CBmemPtr-19, 15, scale); + ppe = CBmemPtr - 20; + + for (lagcount=20; lagcount<=39; lagcount++) { + + /* Update the energy recursively to save complexity */ + nrjRecursive += (*ppe * *ppe) >> scale; + ppe--; + energy = nrjRecursive; + + /* interpolation */ + energy += WebRtcSpl_DotProductWithScale(interpSamplesPtr, interpSamplesPtr, 4, scale); + interpSamplesPtr += 4; + + /* Compute energy for the remaining samples */ + pp = CBmemPtr - lagcount; + energy += WebRtcSpl_DotProductWithScale(pp, pp, SUBL-lagcount, scale); + + /* Normalize the energy and store the number of shifts */ + (*enShPtr) = (int16_t)WebRtcSpl_NormW32(energy); + tmp32 = energy << *enShPtr; + *enPtr = (int16_t)(tmp32 >> 16); + enShPtr++; + enPtr++; + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_augmentation.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_augmentation.h new file mode 100644 index 0000000000..d7b7a0d97e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_augmentation.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbMemEnergyAugmentation.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_MEM_ENERGY_AUGMENTATION_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_MEM_ENERGY_AUGMENTATION_H_ + +#include <stddef.h> +#include <stdint.h> + +void WebRtcIlbcfix_CbMemEnergyAugmentation( + int16_t* interpSamples, /* (i) The interpolated samples */ + int16_t* CBmem, /* (i) The CB memory */ + int scale, /* (i) The scaling of all energy values */ + size_t base_size, /* (i) Index to where energy values should be stored */ + int16_t* energyW16, /* (o) Energy in the CB vectors */ + int16_t* energyShifts /* (o) Shift value of the energy */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_calc.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_calc.c new file mode 100644 index 0000000000..58c0c5fe6d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_calc.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbMemEnergyCalc.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/cb_mem_energy_calc.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/* Compute the energy of the rest of the cb memory + * by step wise adding and subtracting the next + * sample and the last sample respectively */ +void WebRtcIlbcfix_CbMemEnergyCalc( + int32_t energy, /* (i) input start energy */ + size_t range, /* (i) number of iterations */ + int16_t *ppi, /* (i) input pointer 1 */ + int16_t *ppo, /* (i) input pointer 2 */ + int16_t *energyW16, /* (o) Energy in the CB vectors */ + int16_t *energyShifts, /* (o) Shift value of the energy */ + int scale, /* (i) The scaling of all energy values */ + size_t base_size /* (i) Index to where energy values should be stored */ + ) +{ + size_t j; + int16_t shft; + int32_t tmp; + int16_t *eSh_ptr; + int16_t *eW16_ptr; + + + eSh_ptr = &energyShifts[1+base_size]; + eW16_ptr = &energyW16[1+base_size]; + + for (j = 0; j + 1 < range; j++) { + + /* Calculate next energy by a +/- + operation on the edge samples */ + tmp = (*ppi) * (*ppi) - (*ppo) * (*ppo); + energy += tmp >> scale; + energy = WEBRTC_SPL_MAX(energy, 0); + + ppi--; + ppo--; + + /* Normalize the energy into a int16_t and store + the number of shifts */ + + shft = (int16_t)WebRtcSpl_NormW32(energy); + *eSh_ptr++ = shft; + + tmp = energy << shft; + *eW16_ptr++ = (int16_t)(tmp >> 16); + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_calc.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_calc.h new file mode 100644 index 0000000000..1d1e8d62b9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_mem_energy_calc.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbMemEnergyCalc.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_MEM_ENERGY_CALC_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_MEM_ENERGY_CALC_H_ + +#include <stddef.h> +#include <stdint.h> + +void WebRtcIlbcfix_CbMemEnergyCalc( + int32_t energy, /* (i) input start energy */ + size_t range, /* (i) number of iterations */ + int16_t* ppi, /* (i) input pointer 1 */ + int16_t* ppo, /* (i) input pointer 2 */ + int16_t* energyW16, /* (o) Energy in the CB vectors */ + int16_t* energyShifts, /* (o) Shift value of the energy */ + int scale, /* (i) The scaling of all energy values */ + size_t base_size /* (i) Index to where energy values should be stored */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search.c new file mode 100644 index 0000000000..24b5292354 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search.c @@ -0,0 +1,405 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbSearch.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/cb_search.h" + +#include "modules/audio_coding/codecs/ilbc/augmented_cb_corr.h" +#include "modules/audio_coding/codecs/ilbc/cb_mem_energy.h" +#include "modules/audio_coding/codecs/ilbc/cb_mem_energy_augmentation.h" +#include "modules/audio_coding/codecs/ilbc/cb_search_core.h" +#include "modules/audio_coding/codecs/ilbc/cb_update_best_index.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/create_augmented_vec.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/energy_inverse.h" +#include "modules/audio_coding/codecs/ilbc/filtered_cb_vecs.h" +#include "modules/audio_coding/codecs/ilbc/gain_quant.h" +#include "modules/audio_coding/codecs/ilbc/interpolate_samples.h" + +/*----------------------------------------------------------------* + * Search routine for codebook encoding and gain quantization. + *----------------------------------------------------------------*/ + +void WebRtcIlbcfix_CbSearch( + IlbcEncoder *iLBCenc_inst, + /* (i) the encoder state structure */ + int16_t *index, /* (o) Codebook indices */ + int16_t *gain_index, /* (o) Gain quantization indices */ + int16_t *intarget, /* (i) Target vector for encoding */ + int16_t *decResidual,/* (i) Decoded residual for codebook construction */ + size_t lMem, /* (i) Length of buffer */ + size_t lTarget, /* (i) Length of vector */ + int16_t *weightDenum,/* (i) weighting filter coefficients in Q12 */ + size_t block /* (i) the subblock number */ + ) { + size_t i, range; + int16_t ii, j, stage; + int16_t *pp; + int16_t tmp; + int scale; + int16_t bits, temp1, temp2; + size_t base_size; + int32_t codedEner, targetEner; + int16_t gains[CB_NSTAGES+1]; + int16_t *cb_vecPtr; + size_t indexOffset, sInd, eInd; + int32_t CritMax=0; + int16_t shTotMax=WEBRTC_SPL_WORD16_MIN; + size_t bestIndex=0; + int16_t bestGain=0; + size_t indexNew; + int16_t CritNewSh; + int32_t CritNew; + int32_t *cDotPtr; + size_t noOfZeros; + int16_t *gainPtr; + int32_t t32, tmpW32; + int16_t *WebRtcIlbcfix_kGainSq5_ptr; + /* Stack based */ + int16_t CBbuf[CB_MEML+LPC_FILTERORDER+CB_HALFFILTERLEN]; + int32_t cDot[128]; + int32_t Crit[128]; + int16_t targetVec[SUBL+LPC_FILTERORDER]; + int16_t cbvectors[CB_MEML + 1]; /* Adding one extra position for + Coverity warnings. */ + int16_t codedVec[SUBL]; + int16_t interpSamples[20*4]; + int16_t interpSamplesFilt[20*4]; + int16_t energyW16[CB_EXPAND*128]; + int16_t energyShifts[CB_EXPAND*128]; + int16_t *inverseEnergy=energyW16; /* Reuse memory */ + int16_t *inverseEnergyShifts=energyShifts; /* Reuse memory */ + int16_t *buf = &CBbuf[LPC_FILTERORDER]; + int16_t *target = &targetVec[LPC_FILTERORDER]; + int16_t *aug_vec = (int16_t*)cDot; /* length [SUBL], reuse memory */ + + /* Determine size of codebook sections */ + + base_size=lMem-lTarget+1; + if (lTarget==SUBL) { + base_size=lMem-19; + } + + /* weighting of the CB memory */ + noOfZeros=lMem-WebRtcIlbcfix_kFilterRange[block]; + WebRtcSpl_MemSetW16(&buf[-LPC_FILTERORDER], 0, noOfZeros+LPC_FILTERORDER); + WebRtcSpl_FilterARFastQ12( + decResidual+noOfZeros, buf+noOfZeros, + weightDenum, LPC_FILTERORDER+1, WebRtcIlbcfix_kFilterRange[block]); + + /* weighting of the target vector */ + WEBRTC_SPL_MEMCPY_W16(&target[-LPC_FILTERORDER], buf+noOfZeros+WebRtcIlbcfix_kFilterRange[block]-LPC_FILTERORDER, LPC_FILTERORDER); + WebRtcSpl_FilterARFastQ12( + intarget, target, + weightDenum, LPC_FILTERORDER+1, lTarget); + + /* Store target, towards the end codedVec is calculated as + the initial target minus the remaining target */ + WEBRTC_SPL_MEMCPY_W16(codedVec, target, lTarget); + + /* Find the highest absolute value to calculate proper + vector scale factor (so that it uses 12 bits) */ + temp1 = WebRtcSpl_MaxAbsValueW16(buf, lMem); + temp2 = WebRtcSpl_MaxAbsValueW16(target, lTarget); + + if ((temp1>0)&&(temp2>0)) { + temp1 = WEBRTC_SPL_MAX(temp1, temp2); + scale = WebRtcSpl_GetSizeInBits((uint32_t)(temp1 * temp1)); + } else { + /* temp1 or temp2 is negative (maximum was -32768) */ + scale = 30; + } + + /* Scale to so that a mul-add 40 times does not overflow */ + scale = scale - 25; + scale = WEBRTC_SPL_MAX(0, scale); + + /* Compute energy of the original target */ + targetEner = WebRtcSpl_DotProductWithScale(target, target, lTarget, scale); + + /* Prepare search over one more codebook section. This section + is created by filtering the original buffer with a filter. */ + WebRtcIlbcfix_FilteredCbVecs(cbvectors, buf, lMem, WebRtcIlbcfix_kFilterRange[block]); + + range = WebRtcIlbcfix_kSearchRange[block][0]; + + if(lTarget == SUBL) { + /* Create the interpolated samples and store them for use in all stages */ + + /* First section, non-filtered half of the cb */ + WebRtcIlbcfix_InterpolateSamples(interpSamples, buf, lMem); + + /* Second section, filtered half of the cb */ + WebRtcIlbcfix_InterpolateSamples(interpSamplesFilt, cbvectors, lMem); + + /* Compute the CB vectors' energies for the first cb section (non-filtered) */ + WebRtcIlbcfix_CbMemEnergyAugmentation(interpSamples, buf, + scale, 20, energyW16, energyShifts); + + /* Compute the CB vectors' energies for the second cb section (filtered cb) */ + WebRtcIlbcfix_CbMemEnergyAugmentation(interpSamplesFilt, cbvectors, scale, + base_size + 20, energyW16, + energyShifts); + + /* Compute the CB vectors' energies and store them in the vector + * energyW16. Also the corresponding shift values are stored. The + * energy values are used in all three stages. */ + WebRtcIlbcfix_CbMemEnergy(range, buf, cbvectors, lMem, + lTarget, energyW16+20, energyShifts+20, scale, base_size); + + } else { + /* Compute the CB vectors' energies and store them in the vector + * energyW16. Also the corresponding shift values are stored. The + * energy values are used in all three stages. */ + WebRtcIlbcfix_CbMemEnergy(range, buf, cbvectors, lMem, + lTarget, energyW16, energyShifts, scale, base_size); + + /* Set the energy positions 58-63 and 122-127 to zero + (otherwise they are uninitialized) */ + WebRtcSpl_MemSetW16(energyW16+range, 0, (base_size-range)); + WebRtcSpl_MemSetW16(energyW16+range+base_size, 0, (base_size-range)); + } + + /* Calculate Inverse Energy (energyW16 is already normalized + and will contain the inverse energy in Q29 after this call */ + WebRtcIlbcfix_EnergyInverse(energyW16, base_size*CB_EXPAND); + + /* The gain value computed in the previous stage is used + * as an upper limit to what the next stage gain value + * is allowed to be. In stage 0, 16384 (1.0 in Q14) is used as + * the upper limit. */ + gains[0] = 16384; + + for (stage=0; stage<CB_NSTAGES; stage++) { + + /* Set up memories */ + range = WebRtcIlbcfix_kSearchRange[block][stage]; + + /* initialize search measures */ + CritMax=0; + shTotMax=-100; + bestIndex=0; + bestGain=0; + + /* loop over lags 40+ in the first codebook section, full search */ + cb_vecPtr = buf+lMem-lTarget; + + /* Calculate all the cross correlations (augmented part of CB) */ + if (lTarget==SUBL) { + WebRtcIlbcfix_AugmentedCbCorr(target, buf+lMem, + interpSamples, cDot, + 20, 39, scale); + cDotPtr=&cDot[20]; + } else { + cDotPtr=cDot; + } + /* Calculate all the cross correlations (main part of CB) */ + WebRtcSpl_CrossCorrelation(cDotPtr, target, cb_vecPtr, lTarget, range, scale, -1); + + /* Adjust the search range for the augmented vectors */ + if (lTarget==SUBL) { + range=WebRtcIlbcfix_kSearchRange[block][stage]+20; + } else { + range=WebRtcIlbcfix_kSearchRange[block][stage]; + } + + indexOffset=0; + + /* Search for best index in this part of the vector */ + WebRtcIlbcfix_CbSearchCore( + cDot, range, stage, inverseEnergy, + inverseEnergyShifts, Crit, + &indexNew, &CritNew, &CritNewSh); + + /* Update the global best index and the corresponding gain */ + WebRtcIlbcfix_CbUpdateBestIndex( + CritNew, CritNewSh, indexNew+indexOffset, cDot[indexNew+indexOffset], + inverseEnergy[indexNew+indexOffset], inverseEnergyShifts[indexNew+indexOffset], + &CritMax, &shTotMax, &bestIndex, &bestGain); + + sInd = ((CB_RESRANGE >> 1) > bestIndex) ? + 0 : (bestIndex - (CB_RESRANGE >> 1)); + eInd=sInd+CB_RESRANGE; + if (eInd>=range) { + eInd=range-1; + sInd=eInd-CB_RESRANGE; + } + + range = WebRtcIlbcfix_kSearchRange[block][stage]; + + if (lTarget==SUBL) { + i=sInd; + if (sInd<20) { + WebRtcIlbcfix_AugmentedCbCorr(target, cbvectors + lMem, + interpSamplesFilt, cDot, sInd + 20, + WEBRTC_SPL_MIN(39, (eInd + 20)), scale); + i=20; + cDotPtr = &cDot[20 - sInd]; + } else { + cDotPtr = cDot; + } + + cb_vecPtr = cbvectors+lMem-20-i; + + /* Calculate the cross correlations (main part of the filtered CB) */ + WebRtcSpl_CrossCorrelation(cDotPtr, target, cb_vecPtr, lTarget, + eInd - i + 1, scale, -1); + + } else { + cDotPtr = cDot; + cb_vecPtr = cbvectors+lMem-lTarget-sInd; + + /* Calculate the cross correlations (main part of the filtered CB) */ + WebRtcSpl_CrossCorrelation(cDotPtr, target, cb_vecPtr, lTarget, + eInd - sInd + 1, scale, -1); + + } + + /* Adjust the search range for the augmented vectors */ + indexOffset=base_size+sInd; + + /* Search for best index in this part of the vector */ + WebRtcIlbcfix_CbSearchCore( + cDot, eInd-sInd+1, stage, inverseEnergy+indexOffset, + inverseEnergyShifts+indexOffset, Crit, + &indexNew, &CritNew, &CritNewSh); + + /* Update the global best index and the corresponding gain */ + WebRtcIlbcfix_CbUpdateBestIndex( + CritNew, CritNewSh, indexNew+indexOffset, cDot[indexNew], + inverseEnergy[indexNew+indexOffset], inverseEnergyShifts[indexNew+indexOffset], + &CritMax, &shTotMax, &bestIndex, &bestGain); + + index[stage] = (int16_t)bestIndex; + + + bestGain = WebRtcIlbcfix_GainQuant(bestGain, + (int16_t)WEBRTC_SPL_ABS_W16(gains[stage]), stage, &gain_index[stage]); + + /* Extract the best (according to measure) codebook vector + Also adjust the index, so that the augmented vectors are last. + Above these vectors were first... + */ + + if(lTarget==(STATE_LEN-iLBCenc_inst->state_short_len)) { + + if((size_t)index[stage]<base_size) { + pp=buf+lMem-lTarget-index[stage]; + } else { + pp=cbvectors+lMem-lTarget- + index[stage]+base_size; + } + + } else { + + if ((size_t)index[stage]<base_size) { + if (index[stage]>=20) { + /* Adjust index and extract vector */ + index[stage]-=20; + pp=buf+lMem-lTarget-index[stage]; + } else { + /* Adjust index and extract vector */ + index[stage]+=(int16_t)(base_size-20); + + WebRtcIlbcfix_CreateAugmentedVec(index[stage]-base_size+40, + buf+lMem, aug_vec); + pp = aug_vec; + + } + } else { + + if ((index[stage] - base_size) >= 20) { + /* Adjust index and extract vector */ + index[stage]-=20; + pp=cbvectors+lMem-lTarget- + index[stage]+base_size; + } else { + /* Adjust index and extract vector */ + index[stage]+=(int16_t)(base_size-20); + WebRtcIlbcfix_CreateAugmentedVec(index[stage]-2*base_size+40, + cbvectors+lMem, aug_vec); + pp = aug_vec; + } + } + } + + /* Subtract the best codebook vector, according + to measure, from the target vector */ + + WebRtcSpl_AddAffineVectorToVector(target, pp, (int16_t)(-bestGain), + (int32_t)8192, (int16_t)14, lTarget); + + /* record quantized gain */ + gains[stage+1] = bestGain; + + } /* end of Main Loop. for (stage=0;... */ + + /* Calculte the coded vector (original target - what's left) */ + for (i=0;i<lTarget;i++) { + codedVec[i]-=target[i]; + } + + /* Gain adjustment for energy matching */ + codedEner = WebRtcSpl_DotProductWithScale(codedVec, codedVec, lTarget, scale); + + j=gain_index[0]; + + temp1 = (int16_t)WebRtcSpl_NormW32(codedEner); + temp2 = (int16_t)WebRtcSpl_NormW32(targetEner); + + if(temp1 < temp2) { + bits = 16 - temp1; + } else { + bits = 16 - temp2; + } + + tmp = (int16_t)((gains[1] * gains[1]) >> 14); + + targetEner = (int16_t)WEBRTC_SPL_SHIFT_W32(targetEner, -bits) * tmp; + + tmpW32 = ((int32_t)(gains[1]-1))<<1; + + /* Pointer to the table that contains + gain_sq5TblFIX * gain_sq5TblFIX in Q14 */ + gainPtr=(int16_t*)WebRtcIlbcfix_kGainSq5Sq+gain_index[0]; + temp1 = (int16_t)WEBRTC_SPL_SHIFT_W32(codedEner, -bits); + + WebRtcIlbcfix_kGainSq5_ptr = (int16_t*)&WebRtcIlbcfix_kGainSq5[j]; + + /* targetEner and codedEner are in Q(-2*scale) */ + for (ii=gain_index[0];ii<32;ii++) { + + /* Change the index if + (codedEnergy*gainTbl[i]*gainTbl[i])<(targetEn*gain[0]*gain[0]) AND + gainTbl[i] < 2*gain[0] + */ + + t32 = temp1 * *gainPtr; + t32 = t32 - targetEner; + if (t32 < 0) { + if ((*WebRtcIlbcfix_kGainSq5_ptr) < tmpW32) { + j=ii; + WebRtcIlbcfix_kGainSq5_ptr = (int16_t*)&WebRtcIlbcfix_kGainSq5[ii]; + } + } + gainPtr++; + } + gain_index[0]=j; + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search.h new file mode 100644 index 0000000000..84a52c7868 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbSearch.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_SEARCH_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_SEARCH_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +void WebRtcIlbcfix_CbSearch( + IlbcEncoder* iLBCenc_inst, + /* (i) the encoder state structure */ + int16_t* index, /* (o) Codebook indices */ + int16_t* gain_index, /* (o) Gain quantization indices */ + int16_t* intarget, /* (i) Target vector for encoding */ + int16_t* decResidual, /* (i) Decoded residual for codebook construction */ + size_t lMem, /* (i) Length of buffer */ + size_t lTarget, /* (i) Length of vector */ + int16_t* weightDenum, /* (i) weighting filter coefficients in Q12 */ + size_t block /* (i) the subblock number */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search_core.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search_core.c new file mode 100644 index 0000000000..a75e5b0ab8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search_core.c @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbSearchCore.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/cb_search_core.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +void WebRtcIlbcfix_CbSearchCore( + int32_t *cDot, /* (i) Cross Correlation */ + size_t range, /* (i) Search range */ + int16_t stage, /* (i) Stage of this search */ + int16_t *inverseEnergy, /* (i) Inversed energy */ + int16_t *inverseEnergyShift, /* (i) Shifts of inversed energy + with the offset 2*16-29 */ + int32_t *Crit, /* (o) The criteria */ + size_t *bestIndex, /* (o) Index that corresponds to + maximum criteria (in this + vector) */ + int32_t *bestCrit, /* (o) Value of critera for the + chosen index */ + int16_t *bestCritSh) /* (o) The domain of the chosen + criteria */ +{ + int32_t maxW32, tmp32; + int16_t max, sh, tmp16; + size_t i; + int32_t *cDotPtr; + int16_t cDotSqW16; + int16_t *inverseEnergyPtr; + int32_t *critPtr; + int16_t *inverseEnergyShiftPtr; + + /* Don't allow negative values for stage 0 */ + if (stage==0) { + cDotPtr=cDot; + for (i=0;i<range;i++) { + *cDotPtr=WEBRTC_SPL_MAX(0, (*cDotPtr)); + cDotPtr++; + } + } + + /* Normalize cDot to int16_t, calculate the square of cDot and store the upper int16_t */ + maxW32 = WebRtcSpl_MaxAbsValueW32(cDot, range); + + sh = (int16_t)WebRtcSpl_NormW32(maxW32); + cDotPtr = cDot; + inverseEnergyPtr = inverseEnergy; + critPtr = Crit; + inverseEnergyShiftPtr=inverseEnergyShift; + max=WEBRTC_SPL_WORD16_MIN; + + for (i=0;i<range;i++) { + /* Calculate cDot*cDot and put the result in a int16_t */ + tmp32 = *cDotPtr << sh; + tmp16 = (int16_t)(tmp32 >> 16); + cDotSqW16 = (int16_t)(((int32_t)(tmp16)*(tmp16))>>16); + + /* Calculate the criteria (cDot*cDot/energy) */ + *critPtr = cDotSqW16 * *inverseEnergyPtr; + + /* Extract the maximum shift value under the constraint + that the criteria is not zero */ + if ((*critPtr)!=0) { + max = WEBRTC_SPL_MAX((*inverseEnergyShiftPtr), max); + } + + inverseEnergyPtr++; + inverseEnergyShiftPtr++; + critPtr++; + cDotPtr++; + } + + /* If no max shifts still at initialization value, set shift to zero */ + if (max==WEBRTC_SPL_WORD16_MIN) { + max = 0; + } + + /* Modify the criterias, so that all of them use the same Q domain */ + critPtr=Crit; + inverseEnergyShiftPtr=inverseEnergyShift; + for (i=0;i<range;i++) { + /* Guarantee that the shift value is less than 16 + in order to simplify for DSP's (and guard against >31) */ + tmp16 = WEBRTC_SPL_MIN(16, max-(*inverseEnergyShiftPtr)); + + (*critPtr)=WEBRTC_SPL_SHIFT_W32((*critPtr),-tmp16); + critPtr++; + inverseEnergyShiftPtr++; + } + + /* Find the index of the best value */ + *bestIndex = WebRtcSpl_MaxIndexW32(Crit, range); + *bestCrit = Crit[*bestIndex]; + + /* Calculate total shifts of this criteria */ + *bestCritSh = 32 - 2*sh + max; + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search_core.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search_core.h new file mode 100644 index 0000000000..5da70e0988 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_search_core.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbSearchCore.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_SEARCH_CORE_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_SEARCH_CORE_H_ + +#include <stddef.h> +#include <stdint.h> + +void WebRtcIlbcfix_CbSearchCore( + int32_t* cDot, /* (i) Cross Correlation */ + size_t range, /* (i) Search range */ + int16_t stage, /* (i) Stage of this search */ + int16_t* inverseEnergy, /* (i) Inversed energy */ + int16_t* inverseEnergyShift, /* (i) Shifts of inversed energy + with the offset 2*16-29 */ + int32_t* Crit, /* (o) The criteria */ + size_t* bestIndex, /* (o) Index that corresponds to + maximum criteria (in this + vector) */ + int32_t* bestCrit, /* (o) Value of critera for the + chosen index */ + int16_t* bestCritSh); /* (o) The domain of the chosen + criteria */ + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_update_best_index.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_update_best_index.c new file mode 100644 index 0000000000..d6fa4d93d4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_update_best_index.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbUpdateBestIndex.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/cb_update_best_index.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +void WebRtcIlbcfix_CbUpdateBestIndex( + int32_t CritNew, /* (i) New Potentially best Criteria */ + int16_t CritNewSh, /* (i) Shift value of above Criteria */ + size_t IndexNew, /* (i) Index of new Criteria */ + int32_t cDotNew, /* (i) Cross dot of new index */ + int16_t invEnergyNew, /* (i) Inversed energy new index */ + int16_t energyShiftNew, /* (i) Energy shifts of new index */ + int32_t *CritMax, /* (i/o) Maximum Criteria (so far) */ + int16_t *shTotMax, /* (i/o) Shifts of maximum criteria */ + size_t *bestIndex, /* (i/o) Index that corresponds to + maximum criteria */ + int16_t *bestGain) /* (i/o) Gain in Q14 that corresponds + to maximum criteria */ +{ + int16_t shOld, shNew, tmp16; + int16_t scaleTmp; + int32_t gainW32; + + /* Normalize the new and old Criteria to the same domain */ + if (CritNewSh>(*shTotMax)) { + shOld=WEBRTC_SPL_MIN(31,CritNewSh-(*shTotMax)); + shNew=0; + } else { + shOld=0; + shNew=WEBRTC_SPL_MIN(31,(*shTotMax)-CritNewSh); + } + + /* Compare the two criterias. If the new one is better, + calculate the gain and store this index as the new best one + */ + + if ((CritNew >> shNew) > (*CritMax >> shOld)) { + + tmp16 = (int16_t)WebRtcSpl_NormW32(cDotNew); + tmp16 = 16 - tmp16; + + /* Calculate the gain in Q14 + Compensate for inverseEnergyshift in Q29 and that the energy + value was stored in a int16_t (shifted down 16 steps) + => 29-14+16 = 31 */ + + scaleTmp = -energyShiftNew-tmp16+31; + scaleTmp = WEBRTC_SPL_MIN(31, scaleTmp); + + gainW32 = ((int16_t)WEBRTC_SPL_SHIFT_W32(cDotNew, -tmp16) * invEnergyNew) >> + scaleTmp; + + /* Check if criteria satisfies Gain criteria (max 1.3) + if it is larger set the gain to 1.3 + (slightly different from FLP version) + */ + if (gainW32>21299) { + *bestGain=21299; + } else if (gainW32<-21299) { + *bestGain=-21299; + } else { + *bestGain=(int16_t)gainW32; + } + + *CritMax=CritNew; + *shTotMax=CritNewSh; + *bestIndex = IndexNew; + } + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_update_best_index.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_update_best_index.h new file mode 100644 index 0000000000..1a95d531e9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/cb_update_best_index.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CbUpdateBestIndex.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_UPDATE_BEST_INDEX_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CB_UPDATE_BEST_INDEX_H_ + +#include <stddef.h> +#include <stdint.h> + +void WebRtcIlbcfix_CbUpdateBestIndex( + int32_t CritNew, /* (i) New Potentially best Criteria */ + int16_t CritNewSh, /* (i) Shift value of above Criteria */ + size_t IndexNew, /* (i) Index of new Criteria */ + int32_t cDotNew, /* (i) Cross dot of new index */ + int16_t invEnergyNew, /* (i) Inversed energy new index */ + int16_t energyShiftNew, /* (i) Energy shifts of new index */ + int32_t* CritMax, /* (i/o) Maximum Criteria (so far) */ + int16_t* shTotMax, /* (i/o) Shifts of maximum criteria */ + size_t* bestIndex, /* (i/o) Index that corresponds to + maximum criteria */ + int16_t* bestGain); /* (i/o) Gain in Q14 that corresponds + to maximum criteria */ + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/chebyshev.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/chebyshev.c new file mode 100644 index 0000000000..b4eee66219 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/chebyshev.c @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Chebyshev.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/chebyshev.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*------------------------------------------------------------------* + * Calculate the Chevyshev polynomial series + * F(w) = 2*exp(-j5w)*C(x) + * C(x) = (T_0(x) + f(1)T_1(x) + ... + f(4)T_1(x) + f(5)/2) + * T_i(x) is the i:th order Chebyshev polynomial + *------------------------------------------------------------------*/ + +int16_t WebRtcIlbcfix_Chebyshev( + /* (o) Result of C(x) */ + int16_t x, /* (i) Value to the Chevyshev polynomial */ + int16_t *f /* (i) The coefficients in the polynomial */ + ) { + int16_t b1_high, b1_low; /* Use the high, low format to increase the accuracy */ + int32_t b2; + int32_t tmp1W32; + int32_t tmp2W32; + int i; + + b2 = (int32_t)0x1000000; /* b2 = 1.0 (Q23) */ + /* Calculate b1 = 2*x + f[1] */ + tmp1W32 = (x << 10) + (f[1] << 14); + + for (i = 2; i < 5; i++) { + tmp2W32 = tmp1W32; + + /* Split b1 (in tmp1W32) into a high and low part */ + b1_high = (int16_t)(tmp1W32 >> 16); + b1_low = (int16_t)((tmp1W32 - ((int32_t)b1_high << 16)) >> 1); + + /* Calculate 2*x*b1-b2+f[i] */ + tmp1W32 = ((b1_high * x + ((b1_low * x) >> 15)) << 2) - b2 + (f[i] << 14); + + /* Update b2 for next round */ + b2 = tmp2W32; + } + + /* Split b1 (in tmp1W32) into a high and low part */ + b1_high = (int16_t)(tmp1W32 >> 16); + b1_low = (int16_t)((tmp1W32 - ((int32_t)b1_high << 16)) >> 1); + + /* tmp1W32 = x*b1 - b2 + f[i]/2 */ + tmp1W32 = ((b1_high * x) << 1) + (((b1_low * x) >> 15) << 1) - + b2 + (f[i] << 13); + + /* Handle overflows and set to maximum or minimum int16_t instead */ + if (tmp1W32>((int32_t)33553408)) { + return(WEBRTC_SPL_WORD16_MAX); + } else if (tmp1W32<((int32_t)-33554432)) { + return(WEBRTC_SPL_WORD16_MIN); + } else { + return (int16_t)(tmp1W32 >> 10); + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/chebyshev.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/chebyshev.h new file mode 100644 index 0000000000..7e7742c5cc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/chebyshev.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Chebyshev.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CHEBYSHEV_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CHEBYSHEV_H_ + +#include <stddef.h> +#include <stdint.h> + +/*------------------------------------------------------------------* + * Calculate the Chevyshev polynomial series + * F(w) = 2*exp(-j5w)*C(x) + * C(x) = (T_0(x) + f(1)T_1(x) + ... + f(4)T_1(x) + f(5)/2) + * T_i(x) is the i:th order Chebyshev polynomial + *------------------------------------------------------------------*/ + +int16_t WebRtcIlbcfix_Chebyshev( + /* (o) Result of C(x) */ + int16_t x, /* (i) Value to the Chevyshev polynomial */ + int16_t* f /* (i) The coefficients in the polynomial */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/comp_corr.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/comp_corr.c new file mode 100644 index 0000000000..452bc78e3b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/comp_corr.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CompCorr.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/comp_corr.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Compute cross correlation and pitch gain for pitch prediction + * of last subframe at given lag. + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_CompCorr( + int32_t *corr, /* (o) cross correlation */ + int32_t *ener, /* (o) energy */ + int16_t *buffer, /* (i) signal buffer */ + size_t lag, /* (i) pitch lag */ + size_t bLen, /* (i) length of buffer */ + size_t sRange, /* (i) correlation search length */ + int16_t scale /* (i) number of rightshifts to use */ + ){ + int16_t *w16ptr; + + w16ptr=&buffer[bLen-sRange-lag]; + + /* Calculate correlation and energy */ + (*corr)=WebRtcSpl_DotProductWithScale(&buffer[bLen-sRange], w16ptr, sRange, scale); + (*ener)=WebRtcSpl_DotProductWithScale(w16ptr, w16ptr, sRange, scale); + + /* For zero energy set the energy to 0 in order to avoid potential + problems for coming divisions */ + if (*ener == 0) { + *corr = 0; + *ener = 1; + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/comp_corr.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/comp_corr.h new file mode 100644 index 0000000000..010c6a1ce5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/comp_corr.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CompCorr.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_COMP_CORR_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_COMP_CORR_H_ + +#include <stddef.h> +#include <stdint.h> + +/*----------------------------------------------------------------* + * Compute cross correlation and pitch gain for pitch prediction + * of last subframe at given lag. + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_CompCorr(int32_t* corr, /* (o) cross correlation */ + int32_t* ener, /* (o) energy */ + int16_t* buffer, /* (i) signal buffer */ + size_t lag, /* (i) pitch lag */ + size_t bLen, /* (i) length of buffer */ + size_t sRange, /* (i) correlation search length */ + int16_t scale /* (i) number of rightshifts to use */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/complexityMeasures.m b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/complexityMeasures.m new file mode 100644 index 0000000000..4bda83622f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/complexityMeasures.m @@ -0,0 +1,57 @@ +% % Copyright(c) 2011 The WebRTC project authors.All Rights Reserved.% + % Use of this source code is governed by a BSD + - + style license % that can be found in the LICENSE file in the root of the source + % tree.An additional intellectual property rights grant can be found + % in the file PATENTS.All contributing project authors may + % be found in the AUTHORS file in the root of the source tree.% + + clear; +pack; +% +% Enter the path to YOUR executable and remember to define the perprocessor +% variable PRINT_MIPS te get the instructions printed to the screen. +% +command = '!iLBCtest.exe 30 speechAndBGnoise.pcm out1.bit out1.pcm tlm10_30ms.dat'; +cout=' > st.txt'; %saves to matlab variable 'st' +eval(strcat(command,cout)); +if(length(cout)>3) + load st.txt +else + disp('No cout file to load') +end + +% initialize vector to zero +index = find(st(1:end,1)==-1); +indexnonzero = find(st(1:end,1)>0); +frames = length(index)-indexnonzero(1)+1; +start = indexnonzero(1) - 1; +functionOrder=max(st(:,2)); +new=zeros(frames,functionOrder); + +for i = 1:frames, + for j = index(start-1+i)+1:(index(start+i)-1), + new(i,st(j,2)) = new(i,st(j,2)) + st(j,1); + end +end + +result=zeros(functionOrder,3); +for i=1:functionOrder + nonzeroelements = find(new(1:end,i)>0); + result(i,1)=i; + + % Compute each function's mean complexity + % result(i,2)=(sum(new(nonzeroelements,i))/(length(nonzeroelements)*0.03))/1000000; + + % Compute each function's maximum complexity in encoding + % and decoding respectively and then add it together: + % result(i,3)=(max(new(1:end,i))/0.03)/1000000; + result(i,3)=(max(new(1:size(new,1)/2,i))/0.03)/1000000 + (max(new(size(new,1)/2+1:end,i))/0.03)/1000000; +end + +result + +% Compute maximum complexity for a single frame (enc/dec separately and together) +maxEncComplexityInAFrame = (max(sum(new(1:size(new,1)/2,:),2))/0.03)/1000000 +maxDecComplexityInAFrame = (max(sum(new(size(new,1)/2+1:end,:),2))/0.03)/1000000 +totalComplexity = maxEncComplexityInAFrame + maxDecComplexityInAFrame diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/constants.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/constants.c new file mode 100644 index 0000000000..22f2acb330 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/constants.c @@ -0,0 +1,667 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + constants.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/constants.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/* HP Filters {b[0] b[1] b[2] -a[1] -a[2]} */ + +const int16_t WebRtcIlbcfix_kHpInCoefs[5] = {3798, -7596, 3798, 7807, -3733}; +const int16_t WebRtcIlbcfix_kHpOutCoefs[5] = {3849, -7699, 3849, 7918, -3833}; + +/* Window in Q11 to window the energies of the 5 choises (3 for 20ms) in the choise for + the 80 sample start state +*/ +const int16_t WebRtcIlbcfix_kStartSequenceEnrgWin[NSUB_MAX-1]= { + 1638, 1843, 2048, 1843, 1638 +}; + +/* LP Filter coeffs used for downsampling */ +const int16_t WebRtcIlbcfix_kLpFiltCoefs[FILTERORDER_DS_PLUS1]= { + -273, 512, 1297, 1696, 1297, 512, -273 +}; + +/* Constants used in the LPC calculations */ + +/* Hanning LPC window (in Q15) */ +const int16_t WebRtcIlbcfix_kLpcWin[BLOCKL_MAX] = { + 6, 22, 50, 89, 139, 200, 272, 355, 449, 554, 669, 795, + 932, 1079, 1237, 1405, 1583, 1771, 1969, 2177, 2395, 2622, 2858, 3104, + 3359, 3622, 3894, 4175, 4464, 4761, 5066, 5379, 5699, 6026, 6361, 6702, + 7050, 7404, 7764, 8130, 8502, 8879, 9262, 9649, 10040, 10436, 10836, 11240, + 11647, 12058, 12471, 12887, 13306, 13726, 14148, 14572, 14997, 15423, 15850, 16277, + 16704, 17131, 17558, 17983, 18408, 18831, 19252, 19672, 20089, 20504, 20916, 21325, + 21730, 22132, 22530, 22924, 23314, 23698, 24078, 24452, 24821, 25185, 25542, 25893, + 26238, 26575, 26906, 27230, 27547, 27855, 28156, 28450, 28734, 29011, 29279, 29538, + 29788, 30029, 30261, 30483, 30696, 30899, 31092, 31275, 31448, 31611, 31764, 31906, + 32037, 32158, 32268, 32367, 32456, 32533, 32600, 32655, 32700, 32733, 32755, 32767, + 32767, 32755, 32733, 32700, 32655, 32600, 32533, 32456, 32367, 32268, 32158, 32037, + 31906, 31764, 31611, 31448, 31275, 31092, 30899, 30696, 30483, 30261, 30029, 29788, + 29538, 29279, 29011, 28734, 28450, 28156, 27855, 27547, 27230, 26906, 26575, 26238, + 25893, 25542, 25185, 24821, 24452, 24078, 23698, 23314, 22924, 22530, 22132, 21730, + 21325, 20916, 20504, 20089, 19672, 19252, 18831, 18408, 17983, 17558, 17131, 16704, + 16277, 15850, 15423, 14997, 14572, 14148, 13726, 13306, 12887, 12471, 12058, 11647, + 11240, 10836, 10436, 10040, 9649, 9262, 8879, 8502, 8130, 7764, 7404, 7050, + 6702, 6361, 6026, 5699, 5379, 5066, 4761, 4464, 4175, 3894, 3622, 3359, + 3104, 2858, 2622, 2395, 2177, 1969, 1771, 1583, 1405, 1237, 1079, 932, + 795, 669, 554, 449, 355, 272, 200, 139, 89, 50, 22, 6 +}; + +/* Asymmetric LPC window (in Q15)*/ +const int16_t WebRtcIlbcfix_kLpcAsymWin[BLOCKL_MAX] = { + 2, 7, 15, 27, 42, 60, 81, 106, 135, 166, 201, 239, + 280, 325, 373, 424, 478, 536, 597, 661, 728, 798, 872, 949, + 1028, 1111, 1197, 1287, 1379, 1474, 1572, 1674, 1778, 1885, 1995, 2108, + 2224, 2343, 2465, 2589, 2717, 2847, 2980, 3115, 3254, 3395, 3538, 3684, + 3833, 3984, 4138, 4295, 4453, 4615, 4778, 4944, 5112, 5283, 5456, 5631, + 5808, 5987, 6169, 6352, 6538, 6725, 6915, 7106, 7300, 7495, 7692, 7891, + 8091, 8293, 8497, 8702, 8909, 9118, 9328, 9539, 9752, 9966, 10182, 10398, + 10616, 10835, 11055, 11277, 11499, 11722, 11947, 12172, 12398, 12625, 12852, 13080, + 13309, 13539, 13769, 14000, 14231, 14463, 14695, 14927, 15160, 15393, 15626, 15859, + 16092, 16326, 16559, 16792, 17026, 17259, 17492, 17725, 17957, 18189, 18421, 18653, + 18884, 19114, 19344, 19573, 19802, 20030, 20257, 20483, 20709, 20934, 21157, 21380, + 21602, 21823, 22042, 22261, 22478, 22694, 22909, 23123, 23335, 23545, 23755, 23962, + 24168, 24373, 24576, 24777, 24977, 25175, 25371, 25565, 25758, 25948, 26137, 26323, + 26508, 26690, 26871, 27049, 27225, 27399, 27571, 27740, 27907, 28072, 28234, 28394, + 28552, 28707, 28860, 29010, 29157, 29302, 29444, 29584, 29721, 29855, 29987, 30115, + 30241, 30364, 30485, 30602, 30717, 30828, 30937, 31043, 31145, 31245, 31342, 31436, + 31526, 31614, 31699, 31780, 31858, 31933, 32005, 32074, 32140, 32202, 32261, 32317, + 32370, 32420, 32466, 32509, 32549, 32585, 32618, 32648, 32675, 32698, 32718, 32734, + 32748, 32758, 32764, 32767, 32767, 32667, 32365, 31863, 31164, 30274, 29197, 27939, + 26510, 24917, 23170, 21281, 19261, 17121, 14876, 12540, 10126, 7650, 5126, 2571 +}; + +/* Lag window for LPC (Q31) */ +const int32_t WebRtcIlbcfix_kLpcLagWin[LPC_FILTERORDER + 1]={ + 2147483647, 2144885453, 2137754373, 2125918626, 2109459810, + 2088483140, 2063130336, 2033564590, 1999977009, 1962580174, + 1921610283}; + +/* WebRtcIlbcfix_kLpcChirpSyntDenum vector in Q15 corresponding + * floating point vector {1 0.9025 0.9025^2 0.9025^3 ...} + */ +const int16_t WebRtcIlbcfix_kLpcChirpSyntDenum[LPC_FILTERORDER + 1] = { + 32767, 29573, 26690, 24087, + 21739, 19619, 17707, 15980, + 14422, 13016, 11747}; + +/* WebRtcIlbcfix_kLpcChirpWeightDenum in Q15 corresponding to + * floating point vector {1 0.4222 0.4222^2... } + */ +const int16_t WebRtcIlbcfix_kLpcChirpWeightDenum[LPC_FILTERORDER + 1] = { + 32767, 13835, 5841, 2466, 1041, 440, + 186, 78, 33, 14, 6}; + +/* LSF quantization Q13 domain */ +const int16_t WebRtcIlbcfix_kLsfCb[64 * 3 + 128 * 3 + 128 * 4] = { + 1273, 2238, 3696, + 3199, 5309, 8209, + 3606, 5671, 7829, + 2815, 5262, 8778, + 2608, 4027, 5493, + 1582, 3076, 5945, + 2983, 4181, 5396, + 2437, 4322, 6902, + 1861, 2998, 4613, + 2007, 3250, 5214, + 1388, 2459, 4262, + 2563, 3805, 5269, + 2036, 3522, 5129, + 1935, 4025, 6694, + 2744, 5121, 7338, + 2810, 4248, 5723, + 3054, 5405, 7745, + 1449, 2593, 4763, + 3411, 5128, 6596, + 2484, 4659, 7496, + 1668, 2879, 4818, + 1812, 3072, 5036, + 1638, 2649, 3900, + 2464, 3550, 4644, + 1853, 2900, 4158, + 2458, 4163, 5830, + 2556, 4036, 6254, + 2703, 4432, 6519, + 3062, 4953, 7609, + 1725, 3703, 6187, + 2221, 3877, 5427, + 2339, 3579, 5197, + 2021, 4633, 7037, + 2216, 3328, 4535, + 2961, 4739, 6667, + 2807, 3955, 5099, + 2788, 4501, 6088, + 1642, 2755, 4431, + 3341, 5282, 7333, + 2414, 3726, 5727, + 1582, 2822, 5269, + 2259, 3447, 4905, + 3117, 4986, 7054, + 1825, 3491, 5542, + 3338, 5736, 8627, + 1789, 3090, 5488, + 2566, 3720, 4923, + 2846, 4682, 7161, + 1950, 3321, 5976, + 1834, 3383, 6734, + 3238, 4769, 6094, + 2031, 3978, 5903, + 1877, 4068, 7436, + 2131, 4644, 8296, + 2764, 5010, 8013, + 2194, 3667, 6302, + 2053, 3127, 4342, + 3523, 6595, 10010, + 3134, 4457, 5748, + 3142, 5819, 9414, + 2223, 4334, 6353, + 2022, 3224, 4822, + 2186, 3458, 5544, + 2552, 4757, 6870, + 10905, 12917, 14578, + 9503, 11485, 14485, + 9518, 12494, 14052, + 6222, 7487, 9174, + 7759, 9186, 10506, + 8315, 12755, 14786, + 9609, 11486, 13866, + 8909, 12077, 13643, + 7369, 9054, 11520, + 9408, 12163, 14715, + 6436, 9911, 12843, + 7109, 9556, 11884, + 7557, 10075, 11640, + 6482, 9202, 11547, + 6463, 7914, 10980, + 8611, 10427, 12752, + 7101, 9676, 12606, + 7428, 11252, 13172, + 10197, 12955, 15842, + 7487, 10955, 12613, + 5575, 7858, 13621, + 7268, 11719, 14752, + 7476, 11744, 13795, + 7049, 8686, 11922, + 8234, 11314, 13983, + 6560, 11173, 14984, + 6405, 9211, 12337, + 8222, 12054, 13801, + 8039, 10728, 13255, + 10066, 12733, 14389, + 6016, 7338, 10040, + 6896, 8648, 10234, + 7538, 9170, 12175, + 7327, 12608, 14983, + 10516, 12643, 15223, + 5538, 7644, 12213, + 6728, 12221, 14253, + 7563, 9377, 12948, + 8661, 11023, 13401, + 7280, 8806, 11085, + 7723, 9793, 12333, + 12225, 14648, 16709, + 8768, 13389, 15245, + 10267, 12197, 13812, + 5301, 7078, 11484, + 7100, 10280, 11906, + 8716, 12555, 14183, + 9567, 12464, 15434, + 7832, 12305, 14300, + 7608, 10556, 12121, + 8913, 11311, 12868, + 7414, 9722, 11239, + 8666, 11641, 13250, + 9079, 10752, 12300, + 8024, 11608, 13306, + 10453, 13607, 16449, + 8135, 9573, 10909, + 6375, 7741, 10125, + 10025, 12217, 14874, + 6985, 11063, 14109, + 9296, 13051, 14642, + 8613, 10975, 12542, + 6583, 10414, 13534, + 6191, 9368, 13430, + 5742, 6859, 9260, + 7723, 9813, 13679, + 8137, 11291, 12833, + 6562, 8973, 10641, + 6062, 8462, 11335, + 6928, 8784, 12647, + 7501, 8784, 10031, + 8372, 10045, 12135, + 8191, 9864, 12746, + 5917, 7487, 10979, + 5516, 6848, 10318, + 6819, 9899, 11421, + 7882, 12912, 15670, + 9558, 11230, 12753, + 7752, 9327, 11472, + 8479, 9980, 11358, + 11418, 14072, 16386, + 7968, 10330, 14423, + 8423, 10555, 12162, + 6337, 10306, 14391, + 8850, 10879, 14276, + 6750, 11885, 15710, + 7037, 8328, 9764, + 6914, 9266, 13476, + 9746, 13949, 15519, + 11032, 14444, 16925, + 8032, 10271, 11810, + 10962, 13451, 15833, + 10021, 11667, 13324, + 6273, 8226, 12936, + 8543, 10397, 13496, + 7936, 10302, 12745, + 6769, 8138, 10446, + 6081, 7786, 11719, + 8637, 11795, 14975, + 8790, 10336, 11812, + 7040, 8490, 10771, + 7338, 10381, 13153, + 6598, 7888, 9358, + 6518, 8237, 12030, + 9055, 10763, 12983, + 6490, 10009, 12007, + 9589, 12023, 13632, + 6867, 9447, 10995, + 7930, 9816, 11397, + 10241, 13300, 14939, + 5830, 8670, 12387, + 9870, 11915, 14247, + 9318, 11647, 13272, + 6721, 10836, 12929, + 6543, 8233, 9944, + 8034, 10854, 12394, + 9112, 11787, 14218, + 9302, 11114, 13400, + 9022, 11366, 13816, + 6962, 10461, 12480, + 11288, 13333, 15222, + 7249, 8974, 10547, + 10566, 12336, 14390, + 6697, 11339, 13521, + 11851, 13944, 15826, + 6847, 8381, 11349, + 7509, 9331, 10939, + 8029, 9618, 11909, + 13973, 17644, 19647, 22474, + 14722, 16522, 20035, 22134, + 16305, 18179, 21106, 23048, + 15150, 17948, 21394, 23225, + 13582, 15191, 17687, 22333, + 11778, 15546, 18458, 21753, + 16619, 18410, 20827, 23559, + 14229, 15746, 17907, 22474, + 12465, 15327, 20700, 22831, + 15085, 16799, 20182, 23410, + 13026, 16935, 19890, 22892, + 14310, 16854, 19007, 22944, + 14210, 15897, 18891, 23154, + 14633, 18059, 20132, 22899, + 15246, 17781, 19780, 22640, + 16396, 18904, 20912, 23035, + 14618, 17401, 19510, 21672, + 15473, 17497, 19813, 23439, + 18851, 20736, 22323, 23864, + 15055, 16804, 18530, 20916, + 16490, 18196, 19990, 21939, + 11711, 15223, 21154, 23312, + 13294, 15546, 19393, 21472, + 12956, 16060, 20610, 22417, + 11628, 15843, 19617, 22501, + 14106, 16872, 19839, 22689, + 15655, 18192, 20161, 22452, + 12953, 15244, 20619, 23549, + 15322, 17193, 19926, 21762, + 16873, 18676, 20444, 22359, + 14874, 17871, 20083, 21959, + 11534, 14486, 19194, 21857, + 17766, 19617, 21338, 23178, + 13404, 15284, 19080, 23136, + 15392, 17527, 19470, 21953, + 14462, 16153, 17985, 21192, + 17734, 19750, 21903, 23783, + 16973, 19096, 21675, 23815, + 16597, 18936, 21257, 23461, + 15966, 17865, 20602, 22920, + 15416, 17456, 20301, 22972, + 18335, 20093, 21732, 23497, + 15548, 17217, 20679, 23594, + 15208, 16995, 20816, 22870, + 13890, 18015, 20531, 22468, + 13211, 15377, 19951, 22388, + 12852, 14635, 17978, 22680, + 16002, 17732, 20373, 23544, + 11373, 14134, 19534, 22707, + 17329, 19151, 21241, 23462, + 15612, 17296, 19362, 22850, + 15422, 19104, 21285, 23164, + 13792, 17111, 19349, 21370, + 15352, 17876, 20776, 22667, + 15253, 16961, 18921, 22123, + 14108, 17264, 20294, 23246, + 15785, 17897, 20010, 21822, + 17399, 19147, 20915, 22753, + 13010, 15659, 18127, 20840, + 16826, 19422, 22218, 24084, + 18108, 20641, 22695, 24237, + 18018, 20273, 22268, 23920, + 16057, 17821, 21365, 23665, + 16005, 17901, 19892, 23016, + 13232, 16683, 21107, 23221, + 13280, 16615, 19915, 21829, + 14950, 18575, 20599, 22511, + 16337, 18261, 20277, 23216, + 14306, 16477, 21203, 23158, + 12803, 17498, 20248, 22014, + 14327, 17068, 20160, 22006, + 14402, 17461, 21599, 23688, + 16968, 18834, 20896, 23055, + 15070, 17157, 20451, 22315, + 15419, 17107, 21601, 23946, + 16039, 17639, 19533, 21424, + 16326, 19261, 21745, 23673, + 16489, 18534, 21658, 23782, + 16594, 18471, 20549, 22807, + 18973, 21212, 22890, 24278, + 14264, 18674, 21123, 23071, + 15117, 16841, 19239, 23118, + 13762, 15782, 20478, 23230, + 14111, 15949, 20058, 22354, + 14990, 16738, 21139, 23492, + 13735, 16971, 19026, 22158, + 14676, 17314, 20232, 22807, + 16196, 18146, 20459, 22339, + 14747, 17258, 19315, 22437, + 14973, 17778, 20692, 23367, + 15715, 17472, 20385, 22349, + 15702, 18228, 20829, 23410, + 14428, 16188, 20541, 23630, + 16824, 19394, 21365, 23246, + 13069, 16392, 18900, 21121, + 12047, 16640, 19463, 21689, + 14757, 17433, 19659, 23125, + 15185, 16930, 19900, 22540, + 16026, 17725, 19618, 22399, + 16086, 18643, 21179, 23472, + 15462, 17248, 19102, 21196, + 17368, 20016, 22396, 24096, + 12340, 14475, 19665, 23362, + 13636, 16229, 19462, 22728, + 14096, 16211, 19591, 21635, + 12152, 14867, 19943, 22301, + 14492, 17503, 21002, 22728, + 14834, 16788, 19447, 21411, + 14650, 16433, 19326, 22308, + 14624, 16328, 19659, 23204, + 13888, 16572, 20665, 22488, + 12977, 16102, 18841, 22246, + 15523, 18431, 21757, 23738, + 14095, 16349, 18837, 20947, + 13266, 17809, 21088, 22839, + 15427, 18190, 20270, 23143, + 11859, 16753, 20935, 22486, + 12310, 17667, 21736, 23319, + 14021, 15926, 18702, 22002, + 12286, 15299, 19178, 21126, + 15703, 17491, 21039, 23151, + 12272, 14018, 18213, 22570, + 14817, 16364, 18485, 22598, + 17109, 19683, 21851, 23677, + 12657, 14903, 19039, 22061, + 14713, 16487, 20527, 22814, + 14635, 16726, 18763, 21715, + 15878, 18550, 20718, 22906 +}; + +const int16_t WebRtcIlbcfix_kLsfDimCb[LSF_NSPLIT] = {3, 3, 4}; +const int16_t WebRtcIlbcfix_kLsfSizeCb[LSF_NSPLIT] = {64,128,128}; + +const int16_t WebRtcIlbcfix_kLsfMean[LPC_FILTERORDER] = { + 2308, 3652, 5434, 7885, + 10255, 12559, 15160, 17513, + 20328, 22752}; + +const int16_t WebRtcIlbcfix_kLspMean[LPC_FILTERORDER] = { + 31476, 29565, 25819, 18725, 10276, + 1236, -9049, -17600, -25884, -30618 +}; + +/* Q14 */ +const int16_t WebRtcIlbcfix_kLsfWeight20ms[4] = {12288, 8192, 4096, 0}; +const int16_t WebRtcIlbcfix_kLsfWeight30ms[6] = {8192, 16384, 10923, 5461, 0, 0}; + +/* + cos(x) in Q15 + WebRtcIlbcfix_kCos[i] = cos(pi*i/64.0) + used in WebRtcIlbcfix_Lsp2Lsf() +*/ + +const int16_t WebRtcIlbcfix_kCos[64] = { + 32767, 32729, 32610, 32413, 32138, 31786, 31357, 30853, + 30274, 29622, 28899, 28106, 27246, 26320, 25330, 24279, + 23170, 22006, 20788, 19520, 18205, 16846, 15447, 14010, + 12540, 11039, 9512, 7962, 6393, 4808, 3212, 1608, + 0, -1608, -3212, -4808, -6393, -7962, -9512, -11039, + -12540, -14010, -15447, -16846, -18205, -19520, -20788, -22006, + -23170, -24279, -25330, -26320, -27246, -28106, -28899, -29622, + -30274, -30853, -31357, -31786, -32138, -32413, -32610, -32729 +}; + +/* + Derivative in Q19, used to interpolate between the + WebRtcIlbcfix_kCos[] values to get a more exact y = cos(x) +*/ +const int16_t WebRtcIlbcfix_kCosDerivative[64] = { + -632, -1893, -3150, -4399, -5638, -6863, -8072, -9261, + -10428, -11570, -12684, -13767, -14817, -15832, -16808, -17744, + -18637, -19486, -20287, -21039, -21741, -22390, -22986, -23526, + -24009, -24435, -24801, -25108, -25354, -25540, -25664, -25726, + -25726, -25664, -25540, -25354, -25108, -24801, -24435, -24009, + -23526, -22986, -22390, -21741, -21039, -20287, -19486, -18637, + -17744, -16808, -15832, -14817, -13767, -12684, -11570, -10428, + -9261, -8072, -6863, -5638, -4399, -3150, -1893, -632}; + +/* + Table in Q15, used for a2lsf conversion + WebRtcIlbcfix_kCosGrid[i] = cos((2*pi*i)/(float)(2*COS_GRID_POINTS)); +*/ + +const int16_t WebRtcIlbcfix_kCosGrid[COS_GRID_POINTS + 1] = { + 32760, 32723, 32588, 32364, 32051, 31651, 31164, 30591, + 29935, 29196, 28377, 27481, 26509, 25465, 24351, 23170, + 21926, 20621, 19260, 17846, 16384, 14876, 13327, 11743, + 10125, 8480, 6812, 5126, 3425, 1714, 0, -1714, -3425, + -5126, -6812, -8480, -10125, -11743, -13327, -14876, + -16384, -17846, -19260, -20621, -21926, -23170, -24351, + -25465, -26509, -27481, -28377, -29196, -29935, -30591, + -31164, -31651, -32051, -32364, -32588, -32723, -32760 +}; + +/* + Derivative of y = acos(x) in Q12 + used in WebRtcIlbcfix_Lsp2Lsf() +*/ + +const int16_t WebRtcIlbcfix_kAcosDerivative[64] = { + -26887, -8812, -5323, -3813, -2979, -2444, -2081, -1811, + -1608, -1450, -1322, -1219, -1132, -1059, -998, -946, + -901, -861, -827, -797, -772, -750, -730, -713, + -699, -687, -677, -668, -662, -657, -654, -652, + -652, -654, -657, -662, -668, -677, -687, -699, + -713, -730, -750, -772, -797, -827, -861, -901, + -946, -998, -1059, -1132, -1219, -1322, -1450, -1608, + -1811, -2081, -2444, -2979, -3813, -5323, -8812, -26887 +}; + + +/* Tables for quantization of start state */ + +/* State quantization tables */ +const int16_t WebRtcIlbcfix_kStateSq3[8] = { /* Values in Q13 */ + -30473, -17838, -9257, -2537, + 3639, 10893, 19958, 32636 +}; + +/* This table defines the limits for the selection of the freqg + less or equal than value 0 => index = 0 + less or equal than value k => index = k +*/ +const int32_t WebRtcIlbcfix_kChooseFrgQuant[64] = { + 118, 163, 222, 305, 425, 604, + 851, 1174, 1617, 2222, 3080, 4191, + 5525, 7215, 9193, 11540, 14397, 17604, + 21204, 25209, 29863, 35720, 42531, 50375, + 59162, 68845, 80108, 93754, 110326, 129488, + 150654, 174328, 201962, 233195, 267843, 308239, + 354503, 405988, 464251, 531550, 608652, 697516, + 802526, 928793, 1080145, 1258120, 1481106, 1760881, + 2111111, 2546619, 3078825, 3748642, 4563142, 5573115, + 6887601, 8582108, 10797296, 14014513, 18625760, 25529599, + 37302935, 58819185, 109782723, WEBRTC_SPL_WORD32_MAX +}; + +const int16_t WebRtcIlbcfix_kScale[64] = { + /* Values in Q16 */ + 29485, 25003, 21345, 18316, 15578, 13128, 10973, 9310, 7955, + 6762, 5789, 4877, 4255, 3699, 3258, 2904, 2595, 2328, + 2123, 1932, 1785, 1631, 1493, 1370, 1260, 1167, 1083, + /* Values in Q21 */ + 32081, 29611, 27262, 25229, 23432, 21803, 20226, 18883, 17609, + 16408, 15311, 14327, 13390, 12513, 11693, 10919, 10163, 9435, + 8739, 8100, 7424, 6813, 6192, 5648, 5122, 4639, 4207, 3798, + 3404, 3048, 2706, 2348, 2036, 1713, 1393, 1087, 747 +}; + +/*frgq in fixpoint, but already computed like this: + for(i=0; i<64; i++){ + a = (pow(10,frgq[i])/4.5); + WebRtcIlbcfix_kFrgQuantMod[i] = round(a); + } + + Value 0 :36 in Q8 + 37:58 in Q5 + 59:63 in Q3 +*/ +const int16_t WebRtcIlbcfix_kFrgQuantMod[64] = { + /* First 37 values in Q8 */ + 569, 671, 786, 916, 1077, 1278, + 1529, 1802, 2109, 2481, 2898, 3440, + 3943, 4535, 5149, 5778, 6464, 7208, + 7904, 8682, 9397, 10285, 11240, 12246, + 13313, 14382, 15492, 16735, 18131, 19693, + 21280, 22912, 24624, 26544, 28432, 30488, + 32720, + /* 22 values in Q5 */ + 4383, 4684, 5012, 5363, 5739, 6146, + 6603, 7113, 7679, 8285, 9040, 9850, + 10838, 11882, 13103, 14467, 15950, 17669, + 19712, 22016, 24800, 28576, + /* 5 values in Q3 */ + 8240, 9792, 12040, 15440, 22472 +}; + +/* Constants for codebook search and creation */ + +/* Expansion filter to get additional cb section. + * Q12 and reversed compared to flp + */ +const int16_t WebRtcIlbcfix_kCbFiltersRev[CB_FILTERLEN]={ + -140, 446, -755, 3302, 2922, -590, 343, -138}; + +/* Weighting coefficients for short lags. + * [0.2 0.4 0.6 0.8] in Q15 */ +const int16_t WebRtcIlbcfix_kAlpha[4]={ + 6554, 13107, 19661, 26214}; + +/* Ranges for search and filters at different subframes */ + +const size_t WebRtcIlbcfix_kSearchRange[5][CB_NSTAGES]={ + {58,58,58}, {108,44,44}, {108,108,108}, {108,108,108}, {108,108,108}}; + +const size_t WebRtcIlbcfix_kFilterRange[5]={63, 85, 125, 147, 147}; + +/* Gain Quantization for the codebook gains of the 3 stages */ + +/* Q14 (one extra value (max int16_t) to simplify for the search) */ +const int16_t WebRtcIlbcfix_kGainSq3[9]={ + -16384, -10813, -5407, 0, 4096, 8192, + 12288, 16384, 32767}; + +/* Q14 (one extra value (max int16_t) to simplify for the search) */ +const int16_t WebRtcIlbcfix_kGainSq4[17]={ + -17203, -14746, -12288, -9830, -7373, -4915, + -2458, 0, 2458, 4915, 7373, 9830, + 12288, 14746, 17203, 19661, 32767}; + +/* Q14 (one extra value (max int16_t) to simplify for the search) */ +const int16_t WebRtcIlbcfix_kGainSq5[33]={ + 614, 1229, 1843, 2458, 3072, 3686, + 4301, 4915, 5530, 6144, 6758, 7373, + 7987, 8602, 9216, 9830, 10445, 11059, + 11674, 12288, 12902, 13517, 14131, 14746, + 15360, 15974, 16589, 17203, 17818, 18432, + 19046, 19661, 32767}; + +/* Q14 gain_sq5Tbl squared in Q14 */ +const int16_t WebRtcIlbcfix_kGainSq5Sq[32] = { + 23, 92, 207, 368, 576, 829, + 1129, 1474, 1866, 2304, 2787, 3317, + 3893, 4516, 5184, 5897, 6658, 7464, + 8318, 9216, 10160, 11151, 12187, 13271, + 14400, 15574, 16796, 18062, 19377, 20736, + 22140, 23593 +}; + +const int16_t* const WebRtcIlbcfix_kGain[3] = +{WebRtcIlbcfix_kGainSq5, WebRtcIlbcfix_kGainSq4, WebRtcIlbcfix_kGainSq3}; + + +/* Tables for the Enhancer, using upsamling factor 4 (ENH_UPS0 = 4) */ + +const int16_t WebRtcIlbcfix_kEnhPolyPhaser[ENH_UPS0][ENH_FLO_MULT2_PLUS1]={ + {0, 0, 0, 4096, 0, 0, 0}, + {64, -315, 1181, 3531, -436, 77, -64}, + {97, -509, 2464, 2464, -509, 97, -97}, + {77, -436, 3531, 1181, -315, 64, -77} +}; + +const int16_t WebRtcIlbcfix_kEnhWt[3] = { + 4800, 16384, 27968 /* Q16 */ +}; + +const size_t WebRtcIlbcfix_kEnhPlocs[ENH_NBLOCKS_TOT] = { + 160, 480, 800, 1120, 1440, 1760, 2080, 2400 /* Q(-2) */ +}; + +/* PLC table */ + +const int16_t WebRtcIlbcfix_kPlcPerSqr[6] = { /* Grid points for square of periodiciy in Q15 */ + 839, 1343, 2048, 2998, 4247, 5849 +}; + +const int16_t WebRtcIlbcfix_kPlcPitchFact[6] = { /* Value of y=(x^4-0.4)/(0.7-0.4) in grid points in Q15 */ + 0, 5462, 10922, 16384, 21846, 27306 +}; + +const int16_t WebRtcIlbcfix_kPlcPfSlope[6] = { /* Slope of y=(x^4-0.4)/(0.7-0.4) in Q11 */ + 26667, 18729, 13653, 10258, 7901, 6214 +}; diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/constants.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/constants.h new file mode 100644 index 0000000000..a8645c00db --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/constants.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + constants.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CONSTANTS_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CONSTANTS_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/* high pass filters */ + +extern const int16_t WebRtcIlbcfix_kHpInCoefs[]; +extern const int16_t WebRtcIlbcfix_kHpOutCoefs[]; + +/* Window for start state decision */ +extern const int16_t WebRtcIlbcfix_kStartSequenceEnrgWin[]; + +/* low pass filter used for downsampling */ +extern const int16_t WebRtcIlbcfix_kLpFiltCoefs[]; + +/* LPC analysis and quantization */ + +extern const int16_t WebRtcIlbcfix_kLpcWin[]; +extern const int16_t WebRtcIlbcfix_kLpcAsymWin[]; +extern const int32_t WebRtcIlbcfix_kLpcLagWin[]; +extern const int16_t WebRtcIlbcfix_kLpcChirpSyntDenum[]; +extern const int16_t WebRtcIlbcfix_kLpcChirpWeightDenum[]; +extern const int16_t WebRtcIlbcfix_kLsfDimCb[]; +extern const int16_t WebRtcIlbcfix_kLsfSizeCb[]; +extern const int16_t WebRtcIlbcfix_kLsfCb[]; +extern const int16_t WebRtcIlbcfix_kLsfWeight20ms[]; +extern const int16_t WebRtcIlbcfix_kLsfWeight30ms[]; +extern const int16_t WebRtcIlbcfix_kLsfMean[]; +extern const int16_t WebRtcIlbcfix_kLspMean[]; +extern const int16_t WebRtcIlbcfix_kCos[]; +extern const int16_t WebRtcIlbcfix_kCosDerivative[]; +extern const int16_t WebRtcIlbcfix_kCosGrid[]; +extern const int16_t WebRtcIlbcfix_kAcosDerivative[]; + +/* state quantization tables */ + +extern const int16_t WebRtcIlbcfix_kStateSq3[]; +extern const int32_t WebRtcIlbcfix_kChooseFrgQuant[]; +extern const int16_t WebRtcIlbcfix_kScale[]; +extern const int16_t WebRtcIlbcfix_kFrgQuantMod[]; + +/* Ranges for search and filters at different subframes */ + +extern const size_t WebRtcIlbcfix_kSearchRange[5][CB_NSTAGES]; +extern const size_t WebRtcIlbcfix_kFilterRange[]; + +/* gain quantization tables */ + +extern const int16_t WebRtcIlbcfix_kGainSq3[]; +extern const int16_t WebRtcIlbcfix_kGainSq4[]; +extern const int16_t WebRtcIlbcfix_kGainSq5[]; +extern const int16_t WebRtcIlbcfix_kGainSq5Sq[]; +extern const int16_t* const WebRtcIlbcfix_kGain[]; + +/* adaptive codebook definitions */ + +extern const int16_t WebRtcIlbcfix_kCbFiltersRev[]; +extern const int16_t WebRtcIlbcfix_kAlpha[]; + +/* enhancer definitions */ + +extern const int16_t WebRtcIlbcfix_kEnhPolyPhaser[ENH_UPS0] + [ENH_FLO_MULT2_PLUS1]; +extern const int16_t WebRtcIlbcfix_kEnhWt[]; +extern const size_t WebRtcIlbcfix_kEnhPlocs[]; + +/* PLC tables */ + +extern const int16_t WebRtcIlbcfix_kPlcPerSqr[]; +extern const int16_t WebRtcIlbcfix_kPlcPitchFact[]; +extern const int16_t WebRtcIlbcfix_kPlcPfSlope[]; + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/create_augmented_vec.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/create_augmented_vec.c new file mode 100644 index 0000000000..7e21faee6c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/create_augmented_vec.c @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CreateAugmentedVec.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/create_augmented_vec.h" + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "rtc_base/sanitizer.h" + +/*----------------------------------------------------------------* + * Recreate a specific codebook vector from the augmented part. + * + *----------------------------------------------------------------*/ + +void WebRtcIlbcfix_CreateAugmentedVec( + size_t index, /* (i) Index for the augmented vector to be + created */ + const int16_t* buffer, /* (i) Pointer to the end of the codebook memory + that is used for creation of the augmented + codebook */ + int16_t* cbVec) { /* (o) The constructed codebook vector */ + size_t ilow; + const int16_t *ppo, *ppi; + int16_t cbVecTmp[4]; + /* Interpolation starts 4 elements before cbVec+index, but must not start + outside `cbVec`; clamping interp_len to stay within `cbVec`. + */ + size_t interp_len = WEBRTC_SPL_MIN(index, 4); + + rtc_MsanCheckInitialized(buffer - index - interp_len, sizeof(buffer[0]), + index + interp_len); + + ilow = index - interp_len; + + /* copy the first noninterpolated part */ + ppo = buffer-index; + WEBRTC_SPL_MEMCPY_W16(cbVec, ppo, index); + + /* interpolation */ + ppo = buffer - interp_len; + ppi = buffer - index - interp_len; + + /* perform cbVec[ilow+k] = ((ppi[k]*alphaTbl[k])>>15) + + ((ppo[k]*alphaTbl[interp_len-1-k])>>15); + for k = 0..interp_len-1 + */ + WebRtcSpl_ElementwiseVectorMult(&cbVec[ilow], ppi, WebRtcIlbcfix_kAlpha, + interp_len, 15); + WebRtcSpl_ReverseOrderMultArrayElements( + cbVecTmp, ppo, &WebRtcIlbcfix_kAlpha[interp_len - 1], interp_len, 15); + WebRtcSpl_AddVectorsAndShift(&cbVec[ilow], &cbVec[ilow], cbVecTmp, interp_len, + 0); + + /* copy the second noninterpolated part */ + ppo = buffer - index; + /* `tempbuff2` is declared in WebRtcIlbcfix_GetCbVec and is SUBL+5 elements + long. `buffer` points one element past the end of that vector, i.e., at + tempbuff2+SUBL+5. Since ppo=buffer-index, we cannot read any more than + `index` elements from `ppo`. + + `cbVec` is declared to be SUBL elements long in WebRtcIlbcfix_CbConstruct. + Therefore, we can only write SUBL-index elements to cbVec+index. + + These two conditions limit the number of elements to copy. + */ + WEBRTC_SPL_MEMCPY_W16(cbVec+index, ppo, WEBRTC_SPL_MIN(SUBL-index, index)); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/create_augmented_vec.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/create_augmented_vec.h new file mode 100644 index 0000000000..d7e5be1c2f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/create_augmented_vec.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_CreateAugmentedVec.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CREATE_AUGMENTED_VEC_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_CREATE_AUGMENTED_VEC_H_ + +#include <stddef.h> +#include <stdint.h> + +/*----------------------------------------------------------------* + * Recreate a specific codebook vector from the augmented part. + * + *----------------------------------------------------------------*/ + +void WebRtcIlbcfix_CreateAugmentedVec( + size_t index, /* (i) Index for the augmented vector to be + created */ + const int16_t* buffer, /* (i) Pointer to the end of the codebook memory + that is used for creation of the augmented + codebook */ + int16_t* cbVec); /* (o) The construced codebook vector */ + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode.c new file mode 100644 index 0000000000..d7621d5b65 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode.c @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Decode.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/decode.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/decode_residual.h" +#include "modules/audio_coding/codecs/ilbc/decoder_interpolate_lsf.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/do_plc.h" +#include "modules/audio_coding/codecs/ilbc/enhancer_interface.h" +#include "modules/audio_coding/codecs/ilbc/hp_output.h" +#include "modules/audio_coding/codecs/ilbc/index_conv_dec.h" +#include "modules/audio_coding/codecs/ilbc/init_decode.h" +#include "modules/audio_coding/codecs/ilbc/lsf_check.h" +#include "modules/audio_coding/codecs/ilbc/simple_lsf_dequant.h" +#include "modules/audio_coding/codecs/ilbc/unpack_bits.h" +#include "modules/audio_coding/codecs/ilbc/xcorr_coef.h" +#include "rtc_base/system/arch.h" + +#ifndef WEBRTC_ARCH_BIG_ENDIAN +#include "modules/audio_coding/codecs/ilbc/swap_bytes.h" +#endif + +/*----------------------------------------------------------------* + * main decoder function + *---------------------------------------------------------------*/ + +int WebRtcIlbcfix_DecodeImpl( + int16_t *decblock, /* (o) decoded signal block */ + const uint16_t *bytes, /* (i) encoded signal bits */ + IlbcDecoder *iLBCdec_inst, /* (i/o) the decoder state + structure */ + int16_t mode /* (i) 0: bad packet, PLC, + 1: normal */ + ) { + const int old_mode = iLBCdec_inst->mode; + const int old_use_enhancer = iLBCdec_inst->use_enhancer; + + size_t i; + int16_t order_plus_one; + + int16_t last_bit; + int16_t *data; + /* Stack based */ + int16_t decresidual[BLOCKL_MAX]; + int16_t PLCresidual[BLOCKL_MAX + LPC_FILTERORDER]; + int16_t syntdenum[NSUB_MAX*(LPC_FILTERORDER+1)]; + int16_t PLClpc[LPC_FILTERORDER + 1]; +#ifndef WEBRTC_ARCH_BIG_ENDIAN + uint16_t swapped[NO_OF_WORDS_30MS]; +#endif + iLBC_bits *iLBCbits_inst = (iLBC_bits*)PLCresidual; + + /* Reuse some buffers that are non overlapping in order to save stack memory */ + data = &PLCresidual[LPC_FILTERORDER]; + + if (mode) { /* the data are good */ + + /* decode data */ + + /* Unpacketize bits into parameters */ + +#ifndef WEBRTC_ARCH_BIG_ENDIAN + WebRtcIlbcfix_SwapBytes(bytes, iLBCdec_inst->no_of_words, swapped); + last_bit = WebRtcIlbcfix_UnpackBits(swapped, iLBCbits_inst, iLBCdec_inst->mode); +#else + last_bit = WebRtcIlbcfix_UnpackBits(bytes, iLBCbits_inst, iLBCdec_inst->mode); +#endif + + /* Check for bit errors */ + if (iLBCbits_inst->startIdx<1) + mode = 0; + if ((iLBCdec_inst->mode==20) && (iLBCbits_inst->startIdx>3)) + mode = 0; + if ((iLBCdec_inst->mode==30) && (iLBCbits_inst->startIdx>5)) + mode = 0; + if (last_bit==1) + mode = 0; + + if (mode) { /* No bit errors was detected, continue decoding */ + /* Stack based */ + int16_t lsfdeq[LPC_FILTERORDER*LPC_N_MAX]; + int16_t weightdenum[(LPC_FILTERORDER + 1)*NSUB_MAX]; + + /* adjust index */ + WebRtcIlbcfix_IndexConvDec(iLBCbits_inst->cb_index); + + /* decode the lsf */ + WebRtcIlbcfix_SimpleLsfDeQ(lsfdeq, (int16_t*)(iLBCbits_inst->lsf), iLBCdec_inst->lpc_n); + WebRtcIlbcfix_LsfCheck(lsfdeq, LPC_FILTERORDER, iLBCdec_inst->lpc_n); + WebRtcIlbcfix_DecoderInterpolateLsp(syntdenum, weightdenum, + lsfdeq, LPC_FILTERORDER, iLBCdec_inst); + + /* Decode the residual using the cb and gain indexes */ + if (!WebRtcIlbcfix_DecodeResidual(iLBCdec_inst, iLBCbits_inst, + decresidual, syntdenum)) + goto error; + + /* preparing the plc for a future loss! */ + WebRtcIlbcfix_DoThePlc( + PLCresidual, PLClpc, 0, decresidual, + syntdenum + (LPC_FILTERORDER + 1) * (iLBCdec_inst->nsub - 1), + iLBCdec_inst->last_lag, iLBCdec_inst); + + /* Use the output from doThePLC */ + WEBRTC_SPL_MEMCPY_W16(decresidual, PLCresidual, iLBCdec_inst->blockl); + } + + } + + if (mode == 0) { + /* the data is bad (either a PLC call + * was made or a bit error was detected) + */ + + /* packet loss conceal */ + + WebRtcIlbcfix_DoThePlc(PLCresidual, PLClpc, 1, decresidual, syntdenum, + iLBCdec_inst->last_lag, iLBCdec_inst); + + WEBRTC_SPL_MEMCPY_W16(decresidual, PLCresidual, iLBCdec_inst->blockl); + + order_plus_one = LPC_FILTERORDER + 1; + + for (i = 0; i < iLBCdec_inst->nsub; i++) { + WEBRTC_SPL_MEMCPY_W16(syntdenum+(i*order_plus_one), + PLClpc, order_plus_one); + } + } + + if ((*iLBCdec_inst).use_enhancer == 1) { /* Enhancer activated */ + + /* Update the filter and filter coefficients if there was a packet loss */ + if (iLBCdec_inst->prev_enh_pl==2) { + for (i=0;i<iLBCdec_inst->nsub;i++) { + WEBRTC_SPL_MEMCPY_W16(&(iLBCdec_inst->old_syntdenum[i*(LPC_FILTERORDER+1)]), + syntdenum, (LPC_FILTERORDER+1)); + } + } + + /* post filtering */ + (*iLBCdec_inst).last_lag = + WebRtcIlbcfix_EnhancerInterface(data, decresidual, iLBCdec_inst); + + /* synthesis filtering */ + + /* Set up the filter state */ + WEBRTC_SPL_MEMCPY_W16(&data[-LPC_FILTERORDER], iLBCdec_inst->syntMem, LPC_FILTERORDER); + + if (iLBCdec_inst->mode==20) { + /* Enhancer has 40 samples delay */ + i=0; + WebRtcSpl_FilterARFastQ12( + data, data, + iLBCdec_inst->old_syntdenum + (i+iLBCdec_inst->nsub-1)*(LPC_FILTERORDER+1), + LPC_FILTERORDER+1, SUBL); + + for (i=1; i < iLBCdec_inst->nsub; i++) { + WebRtcSpl_FilterARFastQ12( + data+i*SUBL, data+i*SUBL, + syntdenum+(i-1)*(LPC_FILTERORDER+1), + LPC_FILTERORDER+1, SUBL); + } + + } else if (iLBCdec_inst->mode==30) { + /* Enhancer has 80 samples delay */ + for (i=0; i < 2; i++) { + WebRtcSpl_FilterARFastQ12( + data+i*SUBL, data+i*SUBL, + iLBCdec_inst->old_syntdenum + (i+4)*(LPC_FILTERORDER+1), + LPC_FILTERORDER+1, SUBL); + } + for (i=2; i < iLBCdec_inst->nsub; i++) { + WebRtcSpl_FilterARFastQ12( + data+i*SUBL, data+i*SUBL, + syntdenum+(i-2)*(LPC_FILTERORDER+1), + LPC_FILTERORDER+1, SUBL); + } + } + + /* Save the filter state */ + WEBRTC_SPL_MEMCPY_W16(iLBCdec_inst->syntMem, &data[iLBCdec_inst->blockl-LPC_FILTERORDER], LPC_FILTERORDER); + + } else { /* Enhancer not activated */ + size_t lag; + + /* Find last lag (since the enhancer is not called to give this info) */ + lag = 20; + if (iLBCdec_inst->mode==20) { + lag = WebRtcIlbcfix_XcorrCoef( + &decresidual[iLBCdec_inst->blockl-60], + &decresidual[iLBCdec_inst->blockl-60-lag], + 60, + 80, lag, -1); + } else { + lag = WebRtcIlbcfix_XcorrCoef( + &decresidual[iLBCdec_inst->blockl-ENH_BLOCKL], + &decresidual[iLBCdec_inst->blockl-ENH_BLOCKL-lag], + ENH_BLOCKL, + 100, lag, -1); + } + + /* Store lag (it is needed if next packet is lost) */ + (*iLBCdec_inst).last_lag = lag; + + /* copy data and run synthesis filter */ + WEBRTC_SPL_MEMCPY_W16(data, decresidual, iLBCdec_inst->blockl); + + /* Set up the filter state */ + WEBRTC_SPL_MEMCPY_W16(&data[-LPC_FILTERORDER], iLBCdec_inst->syntMem, LPC_FILTERORDER); + + for (i=0; i < iLBCdec_inst->nsub; i++) { + WebRtcSpl_FilterARFastQ12( + data+i*SUBL, data+i*SUBL, + syntdenum + i*(LPC_FILTERORDER+1), + LPC_FILTERORDER+1, SUBL); + } + + /* Save the filter state */ + WEBRTC_SPL_MEMCPY_W16(iLBCdec_inst->syntMem, &data[iLBCdec_inst->blockl-LPC_FILTERORDER], LPC_FILTERORDER); + } + + WEBRTC_SPL_MEMCPY_W16(decblock,data,iLBCdec_inst->blockl); + + /* High pass filter the signal (with upscaling a factor 2 and saturation) */ + WebRtcIlbcfix_HpOutput(decblock, (int16_t*)WebRtcIlbcfix_kHpOutCoefs, + iLBCdec_inst->hpimemy, iLBCdec_inst->hpimemx, + iLBCdec_inst->blockl); + + WEBRTC_SPL_MEMCPY_W16(iLBCdec_inst->old_syntdenum, + syntdenum, iLBCdec_inst->nsub*(LPC_FILTERORDER+1)); + + iLBCdec_inst->prev_enh_pl=0; + + if (mode==0) { /* PLC was used */ + iLBCdec_inst->prev_enh_pl=1; + } + + return 0; // Success. + +error: + // The decoder got sick from eating that data. Reset it and return. + WebRtcIlbcfix_InitDecode(iLBCdec_inst, old_mode, old_use_enhancer); + return -1; // Error +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode.h new file mode 100644 index 0000000000..a7d2910115 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Decode.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_DECODE_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_DECODE_H_ + +#include <stdint.h> + +#include "absl/base/attributes.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * main decoder function + *---------------------------------------------------------------*/ + +// Returns 0 on success, -1 on error. +ABSL_MUST_USE_RESULT +int WebRtcIlbcfix_DecodeImpl( + int16_t* decblock, /* (o) decoded signal block */ + const uint16_t* bytes, /* (i) encoded signal bits */ + IlbcDecoder* iLBCdec_inst, /* (i/o) the decoder state + structure */ + int16_t mode /* (i) 0: bad packet, PLC, + 1: normal */ +); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode_residual.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode_residual.c new file mode 100644 index 0000000000..a9668e2889 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode_residual.c @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_DecodeResidual.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/decode_residual.h" + +#include <string.h> + +#include "modules/audio_coding/codecs/ilbc/cb_construct.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/do_plc.h" +#include "modules/audio_coding/codecs/ilbc/enhancer_interface.h" +#include "modules/audio_coding/codecs/ilbc/index_conv_dec.h" +#include "modules/audio_coding/codecs/ilbc/lsf_check.h" +#include "modules/audio_coding/codecs/ilbc/state_construct.h" +#include "modules/audio_coding/codecs/ilbc/xcorr_coef.h" + +/*----------------------------------------------------------------* + * frame residual decoder function (subrutine to iLBC_decode) + *---------------------------------------------------------------*/ + +bool WebRtcIlbcfix_DecodeResidual( + IlbcDecoder *iLBCdec_inst, + /* (i/o) the decoder state structure */ + iLBC_bits *iLBC_encbits, /* (i/o) Encoded bits, which are used + for the decoding */ + int16_t *decresidual, /* (o) decoded residual frame */ + int16_t *syntdenum /* (i) the decoded synthesis filter + coefficients */ + ) { + size_t meml_gotten, diff, start_pos; + size_t subcount, subframe; + int16_t *reverseDecresidual = iLBCdec_inst->enh_buf; /* Reversed decoded data, used for decoding backwards in time (reuse memory in state) */ + int16_t *memVec = iLBCdec_inst->prevResidual; /* Memory for codebook and filter state (reuse memory in state) */ + int16_t *mem = &memVec[CB_HALFFILTERLEN]; /* Memory for codebook */ + + diff = STATE_LEN - iLBCdec_inst->state_short_len; + + if (iLBC_encbits->state_first == 1) { + start_pos = (iLBC_encbits->startIdx-1)*SUBL; + } else { + start_pos = (iLBC_encbits->startIdx-1)*SUBL + diff; + } + + /* decode scalar part of start state */ + + WebRtcIlbcfix_StateConstruct(iLBC_encbits->idxForMax, + iLBC_encbits->idxVec, &syntdenum[(iLBC_encbits->startIdx-1)*(LPC_FILTERORDER+1)], + &decresidual[start_pos], iLBCdec_inst->state_short_len + ); + + if (iLBC_encbits->state_first) { /* put adaptive part in the end */ + + /* setup memory */ + + WebRtcSpl_MemSetW16(mem, 0, CB_MEML - iLBCdec_inst->state_short_len); + WEBRTC_SPL_MEMCPY_W16(mem+CB_MEML-iLBCdec_inst->state_short_len, decresidual+start_pos, + iLBCdec_inst->state_short_len); + + /* construct decoded vector */ + + if (!WebRtcIlbcfix_CbConstruct( + &decresidual[start_pos + iLBCdec_inst->state_short_len], + iLBC_encbits->cb_index, iLBC_encbits->gain_index, + mem + CB_MEML - ST_MEM_L_TBL, ST_MEM_L_TBL, diff)) + return false; // Error. + + } + else {/* put adaptive part in the beginning */ + + /* setup memory */ + + meml_gotten = iLBCdec_inst->state_short_len; + WebRtcSpl_MemCpyReversedOrder(mem+CB_MEML-1, + decresidual+start_pos, meml_gotten); + WebRtcSpl_MemSetW16(mem, 0, CB_MEML - meml_gotten); + + /* construct decoded vector */ + + if (!WebRtcIlbcfix_CbConstruct(reverseDecresidual, iLBC_encbits->cb_index, + iLBC_encbits->gain_index, + mem + CB_MEML - ST_MEM_L_TBL, ST_MEM_L_TBL, + diff)) + return false; // Error. + + /* get decoded residual from reversed vector */ + + WebRtcSpl_MemCpyReversedOrder(&decresidual[start_pos-1], + reverseDecresidual, diff); + } + + /* counter for predicted subframes */ + + subcount=1; + + /* forward prediction of subframes */ + + if (iLBCdec_inst->nsub > iLBC_encbits->startIdx + 1) { + + /* setup memory */ + WebRtcSpl_MemSetW16(mem, 0, CB_MEML-STATE_LEN); + WEBRTC_SPL_MEMCPY_W16(mem+CB_MEML-STATE_LEN, + decresidual+(iLBC_encbits->startIdx-1)*SUBL, STATE_LEN); + + /* loop over subframes to encode */ + + size_t Nfor = iLBCdec_inst->nsub - iLBC_encbits->startIdx - 1; + for (subframe=0; subframe<Nfor; subframe++) { + + /* construct decoded vector */ + if (!WebRtcIlbcfix_CbConstruct( + &decresidual[(iLBC_encbits->startIdx + 1 + subframe) * SUBL], + iLBC_encbits->cb_index + subcount * CB_NSTAGES, + iLBC_encbits->gain_index + subcount * CB_NSTAGES, mem, MEM_LF_TBL, + SUBL)) + return false; // Error; + + /* update memory */ + memmove(mem, mem + SUBL, (CB_MEML - SUBL) * sizeof(*mem)); + WEBRTC_SPL_MEMCPY_W16(mem+CB_MEML-SUBL, + &decresidual[(iLBC_encbits->startIdx+1+subframe)*SUBL], SUBL); + + subcount++; + } + + } + + /* backward prediction of subframes */ + + if (iLBC_encbits->startIdx > 1) { + + /* setup memory */ + + meml_gotten = SUBL*(iLBCdec_inst->nsub+1-iLBC_encbits->startIdx); + if( meml_gotten > CB_MEML ) { + meml_gotten=CB_MEML; + } + + WebRtcSpl_MemCpyReversedOrder(mem+CB_MEML-1, + decresidual+(iLBC_encbits->startIdx-1)*SUBL, meml_gotten); + WebRtcSpl_MemSetW16(mem, 0, CB_MEML - meml_gotten); + + /* loop over subframes to decode */ + + size_t Nback = iLBC_encbits->startIdx - 1; + for (subframe=0; subframe<Nback; subframe++) { + + /* construct decoded vector */ + if (!WebRtcIlbcfix_CbConstruct( + &reverseDecresidual[subframe * SUBL], + iLBC_encbits->cb_index + subcount * CB_NSTAGES, + iLBC_encbits->gain_index + subcount * CB_NSTAGES, mem, MEM_LF_TBL, + SUBL)) + return false; // Error. + + /* update memory */ + memmove(mem, mem + SUBL, (CB_MEML - SUBL) * sizeof(*mem)); + WEBRTC_SPL_MEMCPY_W16(mem+CB_MEML-SUBL, + &reverseDecresidual[subframe*SUBL], SUBL); + + subcount++; + } + + /* get decoded residual from reversed vector */ + WebRtcSpl_MemCpyReversedOrder(decresidual+SUBL*Nback-1, + reverseDecresidual, SUBL*Nback); + } + + return true; // Success. +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode_residual.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode_residual.h new file mode 100644 index 0000000000..d079577661 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decode_residual.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_DecodeResidual.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_DECODE_RESIDUAL_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_DECODE_RESIDUAL_H_ + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +#include "absl/base/attributes.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * frame residual decoder function (subrutine to iLBC_decode) + *---------------------------------------------------------------*/ + +// Returns true on success, false on failure. In case of failure, the decoder +// state may be corrupted and needs resetting. +ABSL_MUST_USE_RESULT +bool WebRtcIlbcfix_DecodeResidual( + IlbcDecoder* iLBCdec_inst, /* (i/o) the decoder state structure */ + iLBC_bits* iLBC_encbits, /* (i/o) Encoded bits, which are used + for the decoding */ + int16_t* decresidual, /* (o) decoded residual frame */ + int16_t* syntdenum /* (i) the decoded synthesis filter + coefficients */ +); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decoder_interpolate_lsf.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decoder_interpolate_lsf.c new file mode 100644 index 0000000000..d96bb9b2e9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decoder_interpolate_lsf.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_DecoderInterpolateLsp.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/decoder_interpolate_lsf.h" + +#include "modules/audio_coding/codecs/ilbc/bw_expand.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_dec.h" + +/*----------------------------------------------------------------* + * obtain synthesis and weighting filters form lsf coefficients + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_DecoderInterpolateLsp( + int16_t *syntdenum, /* (o) synthesis filter coefficients */ + int16_t *weightdenum, /* (o) weighting denumerator + coefficients */ + int16_t *lsfdeq, /* (i) dequantized lsf coefficients */ + int16_t length, /* (i) length of lsf coefficient vector */ + IlbcDecoder *iLBCdec_inst + /* (i) the decoder state structure */ + ){ + size_t i; + int pos, lp_length; + int16_t lp[LPC_FILTERORDER + 1], *lsfdeq2; + + lsfdeq2 = lsfdeq + length; + lp_length = length + 1; + + if (iLBCdec_inst->mode==30) { + /* subframe 1: Interpolation between old and first LSF */ + + WebRtcIlbcfix_LspInterpolate2PolyDec(lp, (*iLBCdec_inst).lsfdeqold, lsfdeq, + WebRtcIlbcfix_kLsfWeight30ms[0], length); + WEBRTC_SPL_MEMCPY_W16(syntdenum,lp,lp_length); + WebRtcIlbcfix_BwExpand(weightdenum, lp, (int16_t*)WebRtcIlbcfix_kLpcChirpSyntDenum, (int16_t)lp_length); + + /* subframes 2 to 6: interpolation between first and last LSF */ + + pos = lp_length; + for (i = 1; i < 6; i++) { + WebRtcIlbcfix_LspInterpolate2PolyDec(lp, lsfdeq, lsfdeq2, + WebRtcIlbcfix_kLsfWeight30ms[i], length); + WEBRTC_SPL_MEMCPY_W16(syntdenum + pos,lp,lp_length); + WebRtcIlbcfix_BwExpand(weightdenum + pos, lp, + (int16_t*)WebRtcIlbcfix_kLpcChirpSyntDenum, (int16_t)lp_length); + pos += lp_length; + } + } else { /* iLBCdec_inst->mode=20 */ + /* subframes 1 to 4: interpolation between old and new LSF */ + pos = 0; + for (i = 0; i < iLBCdec_inst->nsub; i++) { + WebRtcIlbcfix_LspInterpolate2PolyDec(lp, iLBCdec_inst->lsfdeqold, lsfdeq, + WebRtcIlbcfix_kLsfWeight20ms[i], length); + WEBRTC_SPL_MEMCPY_W16(syntdenum+pos,lp,lp_length); + WebRtcIlbcfix_BwExpand(weightdenum+pos, lp, + (int16_t*)WebRtcIlbcfix_kLpcChirpSyntDenum, (int16_t)lp_length); + pos += lp_length; + } + } + + /* update memory */ + + if (iLBCdec_inst->mode==30) { + WEBRTC_SPL_MEMCPY_W16(iLBCdec_inst->lsfdeqold, lsfdeq2, length); + } else { + WEBRTC_SPL_MEMCPY_W16(iLBCdec_inst->lsfdeqold, lsfdeq, length); + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decoder_interpolate_lsf.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decoder_interpolate_lsf.h new file mode 100644 index 0000000000..8b08114467 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/decoder_interpolate_lsf.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_DecoderInterpolateLsp.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_DECODER_INTERPOLATE_LSF_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_DECODER_INTERPOLATE_LSF_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * obtain synthesis and weighting filters form lsf coefficients + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_DecoderInterpolateLsp( + int16_t* syntdenum, /* (o) synthesis filter coefficients */ + int16_t* weightdenum, /* (o) weighting denumerator + coefficients */ + int16_t* lsfdeq, /* (i) dequantized lsf coefficients */ + int16_t length, /* (i) length of lsf coefficient vector */ + IlbcDecoder* iLBCdec_inst + /* (i) the decoder state structure */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/defines.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/defines.h new file mode 100644 index 0000000000..64135c4887 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/defines.h @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + define.h + +******************************************************************/ +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_DEFINES_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_DEFINES_H_ + +#include <stdint.h> +#include <string.h> + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +/* general codec settings */ + +#define FS 8000 +#define BLOCKL_20MS 160 +#define BLOCKL_30MS 240 +#define BLOCKL_MAX 240 +#define NSUB_20MS 4 +#define NSUB_30MS 6 +#define NSUB_MAX 6 +#define NASUB_20MS 2 +#define NASUB_30MS 4 +#define NASUB_MAX 4 +#define SUBL 40 +#define STATE_LEN 80 +#define STATE_SHORT_LEN_30MS 58 +#define STATE_SHORT_LEN_20MS 57 + +/* LPC settings */ + +#define LPC_FILTERORDER 10 +#define LPC_LOOKBACK 60 +#define LPC_N_20MS 1 +#define LPC_N_30MS 2 +#define LPC_N_MAX 2 +#define LPC_ASYMDIFF 20 +#define LSF_NSPLIT 3 +#define LSF_NUMBER_OF_STEPS 4 +#define LPC_HALFORDER 5 +#define COS_GRID_POINTS 60 + +/* cb settings */ + +#define CB_NSTAGES 3 +#define CB_EXPAND 2 +#define CB_MEML 147 +#define CB_FILTERLEN (2 * 4) +#define CB_HALFFILTERLEN 4 +#define CB_RESRANGE 34 +#define CB_MAXGAIN_FIXQ6 83 /* error = -0.24% */ +#define CB_MAXGAIN_FIXQ14 21299 + +/* enhancer */ + +#define ENH_BLOCKL 80 /* block length */ +#define ENH_BLOCKL_HALF (ENH_BLOCKL / 2) +#define ENH_HL \ + 3 /* 2*ENH_HL+1 is number blocks \ + in said second \ + sequence */ +#define ENH_SLOP \ + 2 /* max difference estimated and \ + correct pitch period */ +#define ENH_PLOCSL \ + 8 /* pitch-estimates and \ + pitch-locations buffer \ + length */ +#define ENH_OVERHANG 2 +#define ENH_UPS0 4 /* upsampling rate */ +#define ENH_FL0 3 /* 2*FLO+1 is the length of each filter */ +#define ENH_FLO_MULT2_PLUS1 7 +#define ENH_VECTL (ENH_BLOCKL + 2 * ENH_FL0) +#define ENH_CORRDIM (2 * ENH_SLOP + 1) +#define ENH_NBLOCKS (BLOCKL / ENH_BLOCKL) +#define ENH_NBLOCKS_EXTRA 5 +#define ENH_NBLOCKS_TOT 8 /* ENH_NBLOCKS+ENH_NBLOCKS_EXTRA */ +#define ENH_BUFL (ENH_NBLOCKS_TOT) * ENH_BLOCKL +#define ENH_BUFL_FILTEROVERHEAD 3 +#define ENH_A0 819 /* Q14 */ +#define ENH_A0_MINUS_A0A0DIV4 848256041 /* Q34 */ +#define ENH_A0DIV2 26843546 /* Q30 */ + +/* PLC */ + +/* Down sampling */ + +#define FILTERORDER_DS_PLUS1 7 +#define DELAY_DS 3 +#define FACTOR_DS 2 + +/* bit stream defs */ + +#define NO_OF_BYTES_20MS 38 +#define NO_OF_BYTES_30MS 50 +#define NO_OF_WORDS_20MS 19 +#define NO_OF_WORDS_30MS 25 +#define STATE_BITS 3 +#define BYTE_LEN 8 +#define ULP_CLASSES 3 + +/* help parameters */ + +#define TWO_PI_FIX 25736 /* Q12 */ + +/* Constants for codebook search and creation */ + +#define ST_MEM_L_TBL 85 +#define MEM_LF_TBL 147 + +/* Struct for the bits */ +typedef struct iLBC_bits_t_ { + int16_t lsf[LSF_NSPLIT * LPC_N_MAX]; + int16_t cb_index[CB_NSTAGES * (NASUB_MAX + 1)]; /* First CB_NSTAGES values + contains extra CB index */ + int16_t gain_index[CB_NSTAGES * (NASUB_MAX + 1)]; /* First CB_NSTAGES values + contains extra CB gain */ + size_t idxForMax; + int16_t state_first; + int16_t idxVec[STATE_SHORT_LEN_30MS]; + int16_t firstbits; + size_t startIdx; +} iLBC_bits; + +/* type definition encoder instance */ +typedef struct IlbcEncoder_ { + /* flag for frame size mode */ + int16_t mode; + + /* basic parameters for different frame sizes */ + size_t blockl; + size_t nsub; + int16_t nasub; + size_t no_of_bytes, no_of_words; + int16_t lpc_n; + size_t state_short_len; + + /* analysis filter state */ + int16_t anaMem[LPC_FILTERORDER]; + + /* Fix-point old lsf parameters for interpolation */ + int16_t lsfold[LPC_FILTERORDER]; + int16_t lsfdeqold[LPC_FILTERORDER]; + + /* signal buffer for LP analysis */ + int16_t lpc_buffer[LPC_LOOKBACK + BLOCKL_MAX]; + + /* state of input HP filter */ + int16_t hpimemx[2]; + int16_t hpimemy[4]; + +#ifdef SPLIT_10MS + int16_t weightdenumbuf[66]; + int16_t past_samples[160]; + uint16_t bytes[25]; + int16_t section; + int16_t Nfor_flag; + int16_t Nback_flag; + int16_t start_pos; + size_t diff; +#endif + +} IlbcEncoder; + +/* type definition decoder instance */ +typedef struct IlbcDecoder_ { + /* flag for frame size mode */ + int16_t mode; + + /* basic parameters for different frame sizes */ + size_t blockl; + size_t nsub; + int16_t nasub; + size_t no_of_bytes, no_of_words; + int16_t lpc_n; + size_t state_short_len; + + /* synthesis filter state */ + int16_t syntMem[LPC_FILTERORDER]; + + /* old LSF for interpolation */ + int16_t lsfdeqold[LPC_FILTERORDER]; + + /* pitch lag estimated in enhancer and used in PLC */ + size_t last_lag; + + /* PLC state information */ + int consPLICount, prev_enh_pl; + int16_t perSquare; + + int16_t prevScale, prevPLI; + size_t prevLag; + int16_t prevLpc[LPC_FILTERORDER + 1]; + int16_t prevResidual[NSUB_MAX * SUBL]; + int16_t seed; + + /* previous synthesis filter parameters */ + + int16_t old_syntdenum[(LPC_FILTERORDER + 1) * NSUB_MAX]; + + /* state of output HP filter */ + int16_t hpimemx[2]; + int16_t hpimemy[4]; + + /* enhancer state information */ + int use_enhancer; + int16_t enh_buf[ENH_BUFL + ENH_BUFL_FILTEROVERHEAD]; + size_t enh_period[ENH_NBLOCKS_TOT]; + +} IlbcDecoder; + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/do_plc.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/do_plc.c new file mode 100644 index 0000000000..9ca6ca48e9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/do_plc.c @@ -0,0 +1,309 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_DoThePlc.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/do_plc.h" + +#include "modules/audio_coding/codecs/ilbc/bw_expand.h" +#include "modules/audio_coding/codecs/ilbc/comp_corr.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Packet loss concealment routine. Conceals a residual signal + * and LP parameters. If no packet loss, update state. + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_DoThePlc( + int16_t *PLCresidual, /* (o) concealed residual */ + int16_t *PLClpc, /* (o) concealed LP parameters */ + int16_t PLI, /* (i) packet loss indicator + 0 - no PL, 1 = PL */ + int16_t *decresidual, /* (i) decoded residual */ + int16_t *lpc, /* (i) decoded LPC (only used for no PL) */ + size_t inlag, /* (i) pitch lag */ + IlbcDecoder *iLBCdec_inst + /* (i/o) decoder instance */ + ){ + size_t i; + int32_t cross, ener, cross_comp, ener_comp = 0; + int32_t measure, maxMeasure, energy; + int32_t noise_energy_threshold_30dB; + int16_t max, crossSquareMax, crossSquare; + size_t j, lag, randlag; + int16_t tmp1, tmp2; + int16_t shift1, shift2, shift3, shiftMax; + int16_t scale3; + size_t corrLen; + int32_t tmpW32, tmp2W32; + int16_t use_gain; + int16_t tot_gain; + int16_t max_perSquare; + int16_t scale1, scale2; + int16_t totscale; + int32_t nom; + int16_t denom; + int16_t pitchfact; + size_t use_lag; + int ind; + int16_t randvec[BLOCKL_MAX]; + + /* Packet Loss */ + if (PLI == 1) { + + (*iLBCdec_inst).consPLICount += 1; + + /* if previous frame not lost, + determine pitch pred. gain */ + + if (iLBCdec_inst->prevPLI != 1) { + + /* Maximum 60 samples are correlated, preserve as high accuracy + as possible without getting overflow */ + max = WebRtcSpl_MaxAbsValueW16((*iLBCdec_inst).prevResidual, + iLBCdec_inst->blockl); + scale3 = (WebRtcSpl_GetSizeInBits(max)<<1) - 25; + if (scale3 < 0) { + scale3 = 0; + } + + /* Store scale for use when interpolating between the + * concealment and the received packet */ + iLBCdec_inst->prevScale = scale3; + + /* Search around the previous lag +/-3 to find the + best pitch period */ + lag = inlag - 3; + + /* Guard against getting outside the frame */ + corrLen = (size_t)WEBRTC_SPL_MIN(60, iLBCdec_inst->blockl-(inlag+3)); + + WebRtcIlbcfix_CompCorr( &cross, &ener, + iLBCdec_inst->prevResidual, lag, iLBCdec_inst->blockl, corrLen, scale3); + + /* Normalize and store cross^2 and the number of shifts */ + shiftMax = WebRtcSpl_GetSizeInBits(WEBRTC_SPL_ABS_W32(cross))-15; + crossSquareMax = (int16_t)(( + (int16_t)WEBRTC_SPL_SHIFT_W32(cross, -shiftMax) * + (int16_t)WEBRTC_SPL_SHIFT_W32(cross, -shiftMax)) >> 15); + + for (j=inlag-2;j<=inlag+3;j++) { + WebRtcIlbcfix_CompCorr( &cross_comp, &ener_comp, + iLBCdec_inst->prevResidual, j, iLBCdec_inst->blockl, corrLen, scale3); + + /* Use the criteria (corr*corr)/energy to compare if + this lag is better or not. To avoid the division, + do a cross multiplication */ + shift1 = WebRtcSpl_GetSizeInBits(WEBRTC_SPL_ABS_W32(cross_comp))-15; + crossSquare = (int16_t)(( + (int16_t)WEBRTC_SPL_SHIFT_W32(cross_comp, -shift1) * + (int16_t)WEBRTC_SPL_SHIFT_W32(cross_comp, -shift1)) >> 15); + + shift2 = WebRtcSpl_GetSizeInBits(ener)-15; + measure = (int16_t)WEBRTC_SPL_SHIFT_W32(ener, -shift2) * crossSquare; + + shift3 = WebRtcSpl_GetSizeInBits(ener_comp)-15; + maxMeasure = (int16_t)WEBRTC_SPL_SHIFT_W32(ener_comp, -shift3) * + crossSquareMax; + + /* Calculate shift value, so that the two measures can + be put in the same Q domain */ + if(2 * shiftMax + shift3 > 2 * shift1 + shift2) { + tmp1 = + WEBRTC_SPL_MIN(31, 2 * shiftMax + shift3 - 2 * shift1 - shift2); + tmp2 = 0; + } else { + tmp1 = 0; + tmp2 = + WEBRTC_SPL_MIN(31, 2 * shift1 + shift2 - 2 * shiftMax - shift3); + } + + if ((measure>>tmp1) > (maxMeasure>>tmp2)) { + /* New lag is better => record lag, measure and domain */ + lag = j; + crossSquareMax = crossSquare; + cross = cross_comp; + shiftMax = shift1; + ener = ener_comp; + } + } + + /* Calculate the periodicity for the lag with the maximum correlation. + + Definition of the periodicity: + abs(corr(vec1, vec2))/(sqrt(energy(vec1))*sqrt(energy(vec2))) + + Work in the Square domain to simplify the calculations + max_perSquare is less than 1 (in Q15) + */ + tmp2W32=WebRtcSpl_DotProductWithScale(&iLBCdec_inst->prevResidual[iLBCdec_inst->blockl-corrLen], + &iLBCdec_inst->prevResidual[iLBCdec_inst->blockl-corrLen], + corrLen, scale3); + + if ((tmp2W32>0)&&(ener_comp>0)) { + /* norm energies to int16_t, compute the product of the energies and + use the upper int16_t as the denominator */ + + scale1=(int16_t)WebRtcSpl_NormW32(tmp2W32)-16; + tmp1=(int16_t)WEBRTC_SPL_SHIFT_W32(tmp2W32, scale1); + + scale2=(int16_t)WebRtcSpl_NormW32(ener)-16; + tmp2=(int16_t)WEBRTC_SPL_SHIFT_W32(ener, scale2); + denom = (int16_t)((tmp1 * tmp2) >> 16); /* in Q(scale1+scale2-16) */ + + /* Square the cross correlation and norm it such that max_perSquare + will be in Q15 after the division */ + + totscale = scale1+scale2-1; + tmp1 = (int16_t)WEBRTC_SPL_SHIFT_W32(cross, (totscale>>1)); + tmp2 = (int16_t)WEBRTC_SPL_SHIFT_W32(cross, totscale-(totscale>>1)); + + nom = tmp1 * tmp2; + max_perSquare = (int16_t)WebRtcSpl_DivW32W16(nom, denom); + + } else { + max_perSquare = 0; + } + } + + /* previous frame lost, use recorded lag and gain */ + + else { + lag = iLBCdec_inst->prevLag; + max_perSquare = iLBCdec_inst->perSquare; + } + + /* Attenuate signal and scale down pitch pred gain if + several frames lost consecutively */ + + use_gain = 32767; /* 1.0 in Q15 */ + + if (iLBCdec_inst->consPLICount*iLBCdec_inst->blockl>320) { + use_gain = 29491; /* 0.9 in Q15 */ + } else if (iLBCdec_inst->consPLICount*iLBCdec_inst->blockl>640) { + use_gain = 22938; /* 0.7 in Q15 */ + } else if (iLBCdec_inst->consPLICount*iLBCdec_inst->blockl>960) { + use_gain = 16384; /* 0.5 in Q15 */ + } else if (iLBCdec_inst->consPLICount*iLBCdec_inst->blockl>1280) { + use_gain = 0; /* 0.0 in Q15 */ + } + + /* Compute mixing factor of picth repeatition and noise: + for max_per>0.7 set periodicity to 1.0 + 0.4<max_per<0.7 set periodicity to (maxper-0.4)/0.7-0.4) + max_per<0.4 set periodicity to 0.0 + */ + + if (max_perSquare>7868) { /* periodicity > 0.7 (0.7^4=0.2401 in Q15) */ + pitchfact = 32767; + } else if (max_perSquare>839) { /* 0.4 < periodicity < 0.7 (0.4^4=0.0256 in Q15) */ + /* find best index and interpolate from that */ + ind = 5; + while ((max_perSquare<WebRtcIlbcfix_kPlcPerSqr[ind])&&(ind>0)) { + ind--; + } + /* pitch fact is approximated by first order */ + tmpW32 = (int32_t)WebRtcIlbcfix_kPlcPitchFact[ind] + + ((WebRtcIlbcfix_kPlcPfSlope[ind] * + (max_perSquare - WebRtcIlbcfix_kPlcPerSqr[ind])) >> 11); + + pitchfact = (int16_t)WEBRTC_SPL_MIN(tmpW32, 32767); /* guard against overflow */ + + } else { /* periodicity < 0.4 */ + pitchfact = 0; + } + + /* avoid repetition of same pitch cycle (buzzyness) */ + use_lag = lag; + if (lag<80) { + use_lag = 2*lag; + } + + /* compute concealed residual */ + noise_energy_threshold_30dB = (int32_t)iLBCdec_inst->blockl * 900; + energy = 0; + for (i=0; i<iLBCdec_inst->blockl; i++) { + + /* noise component - 52 < randlagFIX < 117 */ + iLBCdec_inst->seed = (int16_t)(iLBCdec_inst->seed * 31821 + 13849); + randlag = 53 + (iLBCdec_inst->seed & 63); + if (randlag > i) { + randvec[i] = + iLBCdec_inst->prevResidual[iLBCdec_inst->blockl + i - randlag]; + } else { + randvec[i] = iLBCdec_inst->prevResidual[i - randlag]; + } + + /* pitch repeatition component */ + if (use_lag > i) { + PLCresidual[i] = + iLBCdec_inst->prevResidual[iLBCdec_inst->blockl + i - use_lag]; + } else { + PLCresidual[i] = PLCresidual[i - use_lag]; + } + + /* Attinuate total gain for each 10 ms */ + if (i<80) { + tot_gain=use_gain; + } else if (i<160) { + tot_gain = (int16_t)((31130 * use_gain) >> 15); /* 0.95*use_gain */ + } else { + tot_gain = (int16_t)((29491 * use_gain) >> 15); /* 0.9*use_gain */ + } + + + /* mix noise and pitch repeatition */ + PLCresidual[i] = (int16_t)((tot_gain * + ((pitchfact * PLCresidual[i] + (32767 - pitchfact) * randvec[i] + + 16384) >> 15)) >> 15); + + /* Compute energy until threshold for noise energy is reached */ + if (energy < noise_energy_threshold_30dB) { + energy += PLCresidual[i] * PLCresidual[i]; + } + } + + /* less than 30 dB, use only noise */ + if (energy < noise_energy_threshold_30dB) { + for (i=0; i<iLBCdec_inst->blockl; i++) { + PLCresidual[i] = randvec[i]; + } + } + + /* use the old LPC */ + WEBRTC_SPL_MEMCPY_W16(PLClpc, (*iLBCdec_inst).prevLpc, LPC_FILTERORDER+1); + + /* Update state in case there are multiple frame losses */ + iLBCdec_inst->prevLag = lag; + iLBCdec_inst->perSquare = max_perSquare; + } + + /* no packet loss, copy input */ + + else { + WEBRTC_SPL_MEMCPY_W16(PLCresidual, decresidual, iLBCdec_inst->blockl); + WEBRTC_SPL_MEMCPY_W16(PLClpc, lpc, (LPC_FILTERORDER+1)); + iLBCdec_inst->consPLICount = 0; + } + + /* update state */ + iLBCdec_inst->prevPLI = PLI; + WEBRTC_SPL_MEMCPY_W16(iLBCdec_inst->prevLpc, PLClpc, (LPC_FILTERORDER+1)); + WEBRTC_SPL_MEMCPY_W16(iLBCdec_inst->prevResidual, PLCresidual, iLBCdec_inst->blockl); + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/do_plc.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/do_plc.h new file mode 100644 index 0000000000..c19c4eca32 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/do_plc.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_DoThePlc.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_DO_PLC_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_DO_PLC_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Packet loss concealment routine. Conceals a residual signal + * and LP parameters. If no packet loss, update state. + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_DoThePlc( + int16_t* PLCresidual, /* (o) concealed residual */ + int16_t* PLClpc, /* (o) concealed LP parameters */ + int16_t PLI, /* (i) packet loss indicator + 0 - no PL, 1 = PL */ + int16_t* decresidual, /* (i) decoded residual */ + int16_t* lpc, /* (i) decoded LPC (only used for no PL) */ + size_t inlag, /* (i) pitch lag */ + IlbcDecoder* iLBCdec_inst + /* (i/o) decoder instance */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/encode.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/encode.c new file mode 100644 index 0000000000..8e536221cd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/encode.c @@ -0,0 +1,517 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Encode.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/encode.h" + +#include <string.h> + +#include "modules/audio_coding/codecs/ilbc/cb_construct.h" +#include "modules/audio_coding/codecs/ilbc/cb_search.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/frame_classify.h" +#include "modules/audio_coding/codecs/ilbc/hp_input.h" +#include "modules/audio_coding/codecs/ilbc/index_conv_enc.h" +#include "modules/audio_coding/codecs/ilbc/lpc_encode.h" +#include "modules/audio_coding/codecs/ilbc/pack_bits.h" +#include "modules/audio_coding/codecs/ilbc/state_construct.h" +#include "modules/audio_coding/codecs/ilbc/state_search.h" +#include "rtc_base/checks.h" +#include "rtc_base/system/arch.h" + +#ifdef SPLIT_10MS +#include "modules/audio_coding/codecs/ilbc/unpack_bits.h" +#include "modules/audio_coding/codecs/ilbc/index_conv_dec.h" +#endif + +#ifndef WEBRTC_ARCH_BIG_ENDIAN +#include "modules/audio_coding/codecs/ilbc/swap_bytes.h" +#endif + +/*----------------------------------------------------------------* + * main encoder function + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_EncodeImpl( + uint16_t *bytes, /* (o) encoded data bits iLBC */ + const int16_t *block, /* (i) speech vector to encode */ + IlbcEncoder *iLBCenc_inst /* (i/o) the general encoder + state */ + ){ + size_t n, meml_gotten, Nfor; + size_t diff, start_pos; + size_t index; + size_t subcount, subframe; + size_t start_count, end_count; + int16_t *residual; + int32_t en1, en2; + int16_t scale, max; + int16_t *syntdenum; + int16_t *decresidual; + int16_t *reverseResidual; + int16_t *reverseDecresidual; + /* Stack based */ + int16_t weightdenum[(LPC_FILTERORDER + 1)*NSUB_MAX]; + int16_t dataVec[BLOCKL_MAX + LPC_FILTERORDER]; + int16_t memVec[CB_MEML+CB_FILTERLEN]; + int16_t bitsMemory[sizeof(iLBC_bits)/sizeof(int16_t)]; + iLBC_bits *iLBCbits_inst = (iLBC_bits*)bitsMemory; + + +#ifdef SPLIT_10MS + int16_t *weightdenumbuf = iLBCenc_inst->weightdenumbuf; + int16_t last_bit; +#endif + + int16_t *data = &dataVec[LPC_FILTERORDER]; + int16_t *mem = &memVec[CB_HALFFILTERLEN]; + + /* Reuse som buffers to save stack memory */ + residual = &iLBCenc_inst->lpc_buffer[LPC_LOOKBACK+BLOCKL_MAX-iLBCenc_inst->blockl]; + syntdenum = mem; /* syntdenum[(LPC_FILTERORDER + 1)*NSUB_MAX] and mem are used non overlapping in the code */ + decresidual = residual; /* Already encoded residual is overwritten by the decoded version */ + reverseResidual = data; /* data and reverseResidual are used non overlapping in the code */ + reverseDecresidual = reverseResidual; /* Already encoded residual is overwritten by the decoded version */ + +#ifdef SPLIT_10MS + + WebRtcSpl_MemSetW16 ( (int16_t *) iLBCbits_inst, 0, + sizeof(iLBC_bits) / sizeof(int16_t) ); + + start_pos = iLBCenc_inst->start_pos; + diff = iLBCenc_inst->diff; + + if (iLBCenc_inst->section != 0){ + WEBRTC_SPL_MEMCPY_W16 (weightdenum, weightdenumbuf, + SCRATCH_ENCODE_DATAVEC - SCRATCH_ENCODE_WEIGHTDENUM); + /* Un-Packetize the frame into parameters */ + last_bit = WebRtcIlbcfix_UnpackBits (iLBCenc_inst->bytes, iLBCbits_inst, iLBCenc_inst->mode); + if (last_bit) + return; + /* adjust index */ + WebRtcIlbcfix_IndexConvDec (iLBCbits_inst->cb_index); + + if (iLBCenc_inst->section == 1){ + /* Save first 80 samples of a 160/240 sample frame for 20/30msec */ + WEBRTC_SPL_MEMCPY_W16 (iLBCenc_inst->past_samples, block, 80); + } + else{ // iLBCenc_inst->section == 2 AND mode = 30ms + /* Save second 80 samples of a 240 sample frame for 30msec */ + WEBRTC_SPL_MEMCPY_W16 (iLBCenc_inst->past_samples + 80, block, 80); + } + } + else{ // iLBCenc_inst->section == 0 + /* form a complete frame of 160/240 for 20msec/30msec mode */ + WEBRTC_SPL_MEMCPY_W16 (data + (iLBCenc_inst->mode * 8) - 80, block, 80); + WEBRTC_SPL_MEMCPY_W16 (data, iLBCenc_inst->past_samples, + (iLBCenc_inst->mode * 8) - 80); + iLBCenc_inst->Nfor_flag = 0; + iLBCenc_inst->Nback_flag = 0; +#else + /* copy input block to data*/ + WEBRTC_SPL_MEMCPY_W16(data,block,iLBCenc_inst->blockl); +#endif + + /* high pass filtering of input signal and scale down the residual (*0.5) */ + WebRtcIlbcfix_HpInput(data, (int16_t*)WebRtcIlbcfix_kHpInCoefs, + iLBCenc_inst->hpimemy, iLBCenc_inst->hpimemx, + iLBCenc_inst->blockl); + + /* LPC of hp filtered input data */ + WebRtcIlbcfix_LpcEncode(syntdenum, weightdenum, iLBCbits_inst->lsf, data, + iLBCenc_inst); + + /* Set up state */ + WEBRTC_SPL_MEMCPY_W16(dataVec, iLBCenc_inst->anaMem, LPC_FILTERORDER); + + /* inverse filter to get residual */ + for (n=0; n<iLBCenc_inst->nsub; n++ ) { + WebRtcSpl_FilterMAFastQ12( + &data[n*SUBL], &residual[n*SUBL], + &syntdenum[n*(LPC_FILTERORDER+1)], + LPC_FILTERORDER+1, SUBL); + } + + /* Copy the state for next frame */ + WEBRTC_SPL_MEMCPY_W16(iLBCenc_inst->anaMem, &data[iLBCenc_inst->blockl-LPC_FILTERORDER], LPC_FILTERORDER); + + /* find state location */ + + iLBCbits_inst->startIdx = WebRtcIlbcfix_FrameClassify(iLBCenc_inst,residual); + + /* check if state should be in first or last part of the + two subframes */ + + index = (iLBCbits_inst->startIdx-1)*SUBL; + max=WebRtcSpl_MaxAbsValueW16(&residual[index], 2*SUBL); + scale = WebRtcSpl_GetSizeInBits((uint32_t)(max * max)); + + /* Scale to maximum 25 bits so that the MAC won't cause overflow */ + scale = scale - 25; + if(scale < 0) { + scale = 0; + } + + diff = STATE_LEN - iLBCenc_inst->state_short_len; + en1=WebRtcSpl_DotProductWithScale(&residual[index], &residual[index], + iLBCenc_inst->state_short_len, scale); + index += diff; + en2=WebRtcSpl_DotProductWithScale(&residual[index], &residual[index], + iLBCenc_inst->state_short_len, scale); + if (en1 > en2) { + iLBCbits_inst->state_first = 1; + start_pos = (iLBCbits_inst->startIdx-1)*SUBL; + } else { + iLBCbits_inst->state_first = 0; + start_pos = (iLBCbits_inst->startIdx-1)*SUBL + diff; + } + + /* scalar quantization of state */ + + WebRtcIlbcfix_StateSearch(iLBCenc_inst, iLBCbits_inst, &residual[start_pos], + &syntdenum[(iLBCbits_inst->startIdx-1)*(LPC_FILTERORDER+1)], + &weightdenum[(iLBCbits_inst->startIdx-1)*(LPC_FILTERORDER+1)]); + + WebRtcIlbcfix_StateConstruct(iLBCbits_inst->idxForMax, iLBCbits_inst->idxVec, + &syntdenum[(iLBCbits_inst->startIdx-1)*(LPC_FILTERORDER+1)], + &decresidual[start_pos], iLBCenc_inst->state_short_len + ); + + /* predictive quantization in state */ + + if (iLBCbits_inst->state_first) { /* put adaptive part in the end */ + + /* setup memory */ + + WebRtcSpl_MemSetW16(mem, 0, CB_MEML - iLBCenc_inst->state_short_len); + WEBRTC_SPL_MEMCPY_W16(mem+CB_MEML-iLBCenc_inst->state_short_len, + decresidual+start_pos, iLBCenc_inst->state_short_len); + + /* encode subframes */ + + WebRtcIlbcfix_CbSearch(iLBCenc_inst, iLBCbits_inst->cb_index, iLBCbits_inst->gain_index, + &residual[start_pos+iLBCenc_inst->state_short_len], + mem+CB_MEML-ST_MEM_L_TBL, ST_MEM_L_TBL, diff, + &weightdenum[iLBCbits_inst->startIdx*(LPC_FILTERORDER+1)], 0); + + /* construct decoded vector */ + + RTC_CHECK(WebRtcIlbcfix_CbConstruct( + &decresidual[start_pos + iLBCenc_inst->state_short_len], + iLBCbits_inst->cb_index, iLBCbits_inst->gain_index, + mem + CB_MEML - ST_MEM_L_TBL, ST_MEM_L_TBL, diff)); + + } + else { /* put adaptive part in the beginning */ + + /* create reversed vectors for prediction */ + + WebRtcSpl_MemCpyReversedOrder(&reverseResidual[diff-1], + &residual[(iLBCbits_inst->startIdx+1)*SUBL-STATE_LEN], diff); + + /* setup memory */ + + meml_gotten = iLBCenc_inst->state_short_len; + WebRtcSpl_MemCpyReversedOrder(&mem[CB_MEML-1], &decresidual[start_pos], meml_gotten); + WebRtcSpl_MemSetW16(mem, 0, CB_MEML - iLBCenc_inst->state_short_len); + + /* encode subframes */ + WebRtcIlbcfix_CbSearch(iLBCenc_inst, iLBCbits_inst->cb_index, iLBCbits_inst->gain_index, + reverseResidual, mem+CB_MEML-ST_MEM_L_TBL, ST_MEM_L_TBL, diff, + &weightdenum[(iLBCbits_inst->startIdx-1)*(LPC_FILTERORDER+1)], + 0); + + /* construct decoded vector */ + RTC_CHECK(WebRtcIlbcfix_CbConstruct( + reverseDecresidual, iLBCbits_inst->cb_index, + iLBCbits_inst->gain_index, mem + CB_MEML - ST_MEM_L_TBL, + ST_MEM_L_TBL, diff)); + + /* get decoded residual from reversed vector */ + + WebRtcSpl_MemCpyReversedOrder(&decresidual[start_pos-1], reverseDecresidual, diff); + } + +#ifdef SPLIT_10MS + iLBCenc_inst->start_pos = start_pos; + iLBCenc_inst->diff = diff; + iLBCenc_inst->section++; + /* adjust index */ + WebRtcIlbcfix_IndexConvEnc (iLBCbits_inst->cb_index); + /* Packetize the parameters into the frame */ + WebRtcIlbcfix_PackBits (iLBCenc_inst->bytes, iLBCbits_inst, iLBCenc_inst->mode); + WEBRTC_SPL_MEMCPY_W16 (weightdenumbuf, weightdenum, + SCRATCH_ENCODE_DATAVEC - SCRATCH_ENCODE_WEIGHTDENUM); + return; + } +#endif + + /* forward prediction of subframes */ + + Nfor = iLBCenc_inst->nsub-iLBCbits_inst->startIdx-1; + + /* counter for predicted subframes */ +#ifdef SPLIT_10MS + if (iLBCenc_inst->mode == 20) + { + subcount = 1; + } + if (iLBCenc_inst->mode == 30) + { + if (iLBCenc_inst->section == 1) + { + subcount = 1; + } + if (iLBCenc_inst->section == 2) + { + subcount = 3; + } + } +#else + subcount=1; +#endif + + if( Nfor > 0 ){ + + /* setup memory */ + + WebRtcSpl_MemSetW16(mem, 0, CB_MEML-STATE_LEN); + WEBRTC_SPL_MEMCPY_W16(mem+CB_MEML-STATE_LEN, + decresidual+(iLBCbits_inst->startIdx-1)*SUBL, STATE_LEN); + +#ifdef SPLIT_10MS + if (iLBCenc_inst->Nfor_flag > 0) + { + for (subframe = 0; subframe < WEBRTC_SPL_MIN (Nfor, 2); subframe++) + { + /* update memory */ + WEBRTC_SPL_MEMCPY_W16 (mem, mem + SUBL, (CB_MEML - SUBL)); + WEBRTC_SPL_MEMCPY_W16 (mem + CB_MEML - SUBL, + &decresidual[(iLBCbits_inst->startIdx + 1 + + subframe) * SUBL], SUBL); + } + } + + iLBCenc_inst->Nfor_flag++; + + if (iLBCenc_inst->mode == 20) + { + start_count = 0; + end_count = Nfor; + } + if (iLBCenc_inst->mode == 30) + { + if (iLBCenc_inst->section == 1) + { + start_count = 0; + end_count = WEBRTC_SPL_MIN (Nfor, (size_t)2); + } + if (iLBCenc_inst->section == 2) + { + start_count = WEBRTC_SPL_MIN (Nfor, (size_t)2); + end_count = Nfor; + } + } +#else + start_count = 0; + end_count = Nfor; +#endif + + /* loop over subframes to encode */ + + for (subframe = start_count; subframe < end_count; subframe++){ + + /* encode subframe */ + + WebRtcIlbcfix_CbSearch(iLBCenc_inst, iLBCbits_inst->cb_index+subcount*CB_NSTAGES, + iLBCbits_inst->gain_index+subcount*CB_NSTAGES, + &residual[(iLBCbits_inst->startIdx+1+subframe)*SUBL], + mem, MEM_LF_TBL, SUBL, + &weightdenum[(iLBCbits_inst->startIdx+1+subframe)*(LPC_FILTERORDER+1)], + subcount); + + /* construct decoded vector */ + RTC_CHECK(WebRtcIlbcfix_CbConstruct( + &decresidual[(iLBCbits_inst->startIdx + 1 + subframe) * SUBL], + iLBCbits_inst->cb_index + subcount * CB_NSTAGES, + iLBCbits_inst->gain_index + subcount * CB_NSTAGES, mem, MEM_LF_TBL, + SUBL)); + + /* update memory */ + + memmove(mem, mem + SUBL, (CB_MEML - SUBL) * sizeof(*mem)); + WEBRTC_SPL_MEMCPY_W16(mem+CB_MEML-SUBL, + &decresidual[(iLBCbits_inst->startIdx+1+subframe)*SUBL], SUBL); + + subcount++; + } + } + +#ifdef SPLIT_10MS + if ((iLBCenc_inst->section == 1) && + (iLBCenc_inst->mode == 30) && (Nfor > 0) && (end_count == 2)) + { + iLBCenc_inst->section++; + /* adjust index */ + WebRtcIlbcfix_IndexConvEnc (iLBCbits_inst->cb_index); + /* Packetize the parameters into the frame */ + WebRtcIlbcfix_PackBits (iLBCenc_inst->bytes, iLBCbits_inst, iLBCenc_inst->mode); + WEBRTC_SPL_MEMCPY_W16 (weightdenumbuf, weightdenum, + SCRATCH_ENCODE_DATAVEC - SCRATCH_ENCODE_WEIGHTDENUM); + return; + } +#endif + + /* backward prediction of subframes */ + + if (iLBCbits_inst->startIdx > 1) { + + /* create reverse order vectors + (The decresidual does not need to be copied since it is + contained in the same vector as the residual) + */ + + size_t Nback = iLBCbits_inst->startIdx - 1; + WebRtcSpl_MemCpyReversedOrder(&reverseResidual[Nback*SUBL-1], residual, Nback*SUBL); + + /* setup memory */ + + meml_gotten = SUBL*(iLBCenc_inst->nsub+1-iLBCbits_inst->startIdx); + if( meml_gotten > CB_MEML ) { + meml_gotten=CB_MEML; + } + + WebRtcSpl_MemCpyReversedOrder(&mem[CB_MEML-1], &decresidual[Nback*SUBL], meml_gotten); + WebRtcSpl_MemSetW16(mem, 0, CB_MEML - meml_gotten); + +#ifdef SPLIT_10MS + if (iLBCenc_inst->Nback_flag > 0) + { + for (subframe = 0; subframe < WEBRTC_SPL_MAX (2 - Nfor, 0); subframe++) + { + /* update memory */ + WEBRTC_SPL_MEMCPY_W16 (mem, mem + SUBL, (CB_MEML - SUBL)); + WEBRTC_SPL_MEMCPY_W16 (mem + CB_MEML - SUBL, + &reverseDecresidual[subframe * SUBL], SUBL); + } + } + + iLBCenc_inst->Nback_flag++; + + + if (iLBCenc_inst->mode == 20) + { + start_count = 0; + end_count = Nback; + } + if (iLBCenc_inst->mode == 30) + { + if (iLBCenc_inst->section == 1) + { + start_count = 0; + end_count = (Nfor >= 2) ? 0 : (2 - NFor); + } + if (iLBCenc_inst->section == 2) + { + start_count = (Nfor >= 2) ? 0 : (2 - NFor); + end_count = Nback; + } + } +#else + start_count = 0; + end_count = Nback; +#endif + + /* loop over subframes to encode */ + + for (subframe = start_count; subframe < end_count; subframe++){ + + /* encode subframe */ + + WebRtcIlbcfix_CbSearch(iLBCenc_inst, iLBCbits_inst->cb_index+subcount*CB_NSTAGES, + iLBCbits_inst->gain_index+subcount*CB_NSTAGES, &reverseResidual[subframe*SUBL], + mem, MEM_LF_TBL, SUBL, + &weightdenum[(iLBCbits_inst->startIdx-2-subframe)*(LPC_FILTERORDER+1)], + subcount); + + /* construct decoded vector */ + RTC_CHECK(WebRtcIlbcfix_CbConstruct( + &reverseDecresidual[subframe * SUBL], + iLBCbits_inst->cb_index + subcount * CB_NSTAGES, + iLBCbits_inst->gain_index + subcount * CB_NSTAGES, mem, MEM_LF_TBL, + SUBL)); + + /* update memory */ + memmove(mem, mem + SUBL, (CB_MEML - SUBL) * sizeof(*mem)); + WEBRTC_SPL_MEMCPY_W16(mem+CB_MEML-SUBL, + &reverseDecresidual[subframe*SUBL], SUBL); + + subcount++; + + } + + /* get decoded residual from reversed vector */ + + WebRtcSpl_MemCpyReversedOrder(&decresidual[SUBL*Nback-1], reverseDecresidual, SUBL*Nback); + } + /* end encoding part */ + + /* adjust index */ + + WebRtcIlbcfix_IndexConvEnc(iLBCbits_inst->cb_index); + + /* Packetize the parameters into the frame */ + +#ifdef SPLIT_10MS + if( (iLBCenc_inst->mode==30) && (iLBCenc_inst->section==1) ){ + WebRtcIlbcfix_PackBits(iLBCenc_inst->bytes, iLBCbits_inst, iLBCenc_inst->mode); + } + else{ + WebRtcIlbcfix_PackBits(bytes, iLBCbits_inst, iLBCenc_inst->mode); + } +#else + WebRtcIlbcfix_PackBits(bytes, iLBCbits_inst, iLBCenc_inst->mode); +#endif + +#ifndef WEBRTC_ARCH_BIG_ENDIAN + /* Swap bytes for LITTLE ENDIAN since the packbits() + function assumes BIG_ENDIAN machine */ +#ifdef SPLIT_10MS + if (( (iLBCenc_inst->section == 1) && (iLBCenc_inst->mode == 20) ) || + ( (iLBCenc_inst->section == 2) && (iLBCenc_inst->mode == 30) )){ + WebRtcIlbcfix_SwapBytes(bytes, iLBCenc_inst->no_of_words, bytes); + } +#else + WebRtcIlbcfix_SwapBytes(bytes, iLBCenc_inst->no_of_words, bytes); +#endif +#endif + +#ifdef SPLIT_10MS + if (subcount == (iLBCenc_inst->nsub - 1)) + { + iLBCenc_inst->section = 0; + } + else + { + iLBCenc_inst->section++; + WEBRTC_SPL_MEMCPY_W16 (weightdenumbuf, weightdenum, + SCRATCH_ENCODE_DATAVEC - SCRATCH_ENCODE_WEIGHTDENUM); + } +#endif + +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/encode.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/encode.h new file mode 100644 index 0000000000..bc3e187d92 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/encode.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Encode.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_ENCODE_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_ENCODE_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * main encoder function + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_EncodeImpl( + uint16_t* bytes, /* (o) encoded data bits iLBC */ + const int16_t* block, /* (i) speech vector to encode */ + IlbcEncoder* iLBCenc_inst /* (i/o) the general encoder + state */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/energy_inverse.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/energy_inverse.c new file mode 100644 index 0000000000..7f00254aea --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/energy_inverse.c @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_EnergyInverse.c + +******************************************************************/ + +/* Inverses the in vector in into Q29 domain */ + +#include "modules/audio_coding/codecs/ilbc/energy_inverse.h" + +void WebRtcIlbcfix_EnergyInverse( + int16_t *energy, /* (i/o) Energy and inverse + energy (in Q29) */ + size_t noOfEnergies) /* (i) The length of the energy + vector */ +{ + int32_t Nom=(int32_t)0x1FFFFFFF; + int16_t *energyPtr; + size_t i; + + /* Set the minimum energy value to 16384 to avoid overflow */ + energyPtr=energy; + for (i=0; i<noOfEnergies; i++) { + (*energyPtr)=WEBRTC_SPL_MAX((*energyPtr),16384); + energyPtr++; + } + + /* Calculate inverse energy in Q29 */ + energyPtr=energy; + for (i=0; i<noOfEnergies; i++) { + (*energyPtr) = (int16_t)WebRtcSpl_DivW32W16(Nom, (*energyPtr)); + energyPtr++; + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/energy_inverse.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/energy_inverse.h new file mode 100644 index 0000000000..15391cf230 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/energy_inverse.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_EnergyInverse.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_ENERGY_INVERSE_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_ENERGY_INVERSE_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/* Inverses the in vector in into Q29 domain */ + +void WebRtcIlbcfix_EnergyInverse( + int16_t* + energy, /* (i/o) Energy and inverse + energy (in Q29) */ + size_t noOfEnergies); /* (i) The length of the energy + vector */ + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enh_upsample.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enh_upsample.c new file mode 100644 index 0000000000..cd3d0a4db1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enh_upsample.c @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_EnhUpsample.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/enh_upsample.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * upsample finite array assuming zeros outside bounds + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_EnhUpsample( + int32_t *useq1, /* (o) upsampled output sequence */ + int16_t *seq1 /* (i) unupsampled sequence */ + ){ + int j; + int32_t *pu1, *pu11; + int16_t *ps, *w16tmp; + const int16_t *pp; + + /* filtering: filter overhangs left side of sequence */ + pu1=useq1; + for (j=0;j<ENH_UPS0; j++) { + pu11=pu1; + /* i = 2 */ + pp=WebRtcIlbcfix_kEnhPolyPhaser[j]+1; + ps=seq1+2; + *pu11 = (*ps--) * *pp++; + *pu11 += (*ps--) * *pp++; + *pu11 += (*ps--) * *pp++; + pu11+=ENH_UPS0; + /* i = 3 */ + pp=WebRtcIlbcfix_kEnhPolyPhaser[j]+1; + ps=seq1+3; + *pu11 = (*ps--) * *pp++; + *pu11 += (*ps--) * *pp++; + *pu11 += (*ps--) * *pp++; + *pu11 += (*ps--) * *pp++; + pu11+=ENH_UPS0; + /* i = 4 */ + pp=WebRtcIlbcfix_kEnhPolyPhaser[j]+1; + ps=seq1+4; + *pu11 = (*ps--) * *pp++; + *pu11 += (*ps--) * *pp++; + *pu11 += (*ps--) * *pp++; + *pu11 += (*ps--) * *pp++; + *pu11 += (*ps--) * *pp++; + pu1++; + } + + /* filtering: simple convolution=inner products + (not needed since the sequence is so short) + */ + + /* filtering: filter overhangs right side of sequence */ + + /* Code with loops, which is equivivalent to the expanded version below + + filterlength = 5; + hf1 = 2; + for(j=0;j<ENH_UPS0; j++){ + pu = useq1 + (filterlength-hfl)*ENH_UPS0 + j; + for(i=1; i<=hfl; i++){ + *pu=0; + pp = polyp[j]+i; + ps = seq1+dim1-1; + for(k=0;k<filterlength-i;k++) { + *pu += (*ps--) * *pp++; + } + pu+=ENH_UPS0; + } + } + */ + pu1 = useq1 + 12; + w16tmp = seq1+4; + for (j=0;j<ENH_UPS0; j++) { + pu11 = pu1; + /* i = 1 */ + pp = WebRtcIlbcfix_kEnhPolyPhaser[j]+2; + ps = w16tmp; + *pu11 = (*ps--) * *pp++; + *pu11 += (*ps--) * *pp++; + *pu11 += (*ps--) * *pp++; + *pu11 += (*ps--) * *pp++; + pu11+=ENH_UPS0; + /* i = 2 */ + pp = WebRtcIlbcfix_kEnhPolyPhaser[j]+3; + ps = w16tmp; + *pu11 = (*ps--) * *pp++; + *pu11 += (*ps--) * *pp++; + *pu11 += (*ps--) * *pp++; + pu11+=ENH_UPS0; + + pu1++; + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enh_upsample.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enh_upsample.h new file mode 100644 index 0000000000..b427eca50a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enh_upsample.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_EnhUpsample.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_ENH_UPSAMPLE_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_ENH_UPSAMPLE_H_ + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * upsample finite array assuming zeros outside bounds + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_EnhUpsample( + int32_t* useq1, /* (o) upsampled output sequence */ + int16_t* seq1 /* (i) unupsampled sequence */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enhancer.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enhancer.c new file mode 100644 index 0000000000..bd4e60015c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enhancer.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Enhancer.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/enhancer.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/get_sync_seq.h" +#include "modules/audio_coding/codecs/ilbc/smooth.h" + +/*----------------------------------------------------------------* + * perform enhancement on idata+centerStartPos through + * idata+centerStartPos+ENH_BLOCKL-1 + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Enhancer( + int16_t *odata, /* (o) smoothed block, dimension blockl */ + int16_t *idata, /* (i) data buffer used for enhancing */ + size_t idatal, /* (i) dimension idata */ + size_t centerStartPos, /* (i) first sample current block within idata */ + size_t *period, /* (i) pitch period array (pitch bward-in time) */ + const size_t *plocs, /* (i) locations where period array values valid */ + size_t periodl /* (i) dimension of period and plocs */ + ){ + /* Stack based */ + int16_t surround[ENH_BLOCKL]; + + WebRtcSpl_MemSetW16(surround, 0, ENH_BLOCKL); + + /* get said second sequence of segments */ + + WebRtcIlbcfix_GetSyncSeq(idata, idatal, centerStartPos, period, plocs, + periodl, ENH_HL, surround); + + /* compute the smoothed output from said second sequence */ + + WebRtcIlbcfix_Smooth(odata, idata + centerStartPos, surround); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enhancer.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enhancer.h new file mode 100644 index 0000000000..386949347a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enhancer.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Enhancer.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_ENHANCER_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_ENHANCER_H_ + +#include <stddef.h> +#include <stdint.h> + +/*----------------------------------------------------------------* + * perform enhancement on idata+centerStartPos through + * idata+centerStartPos+ENH_BLOCKL-1 + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Enhancer( + int16_t* odata, /* (o) smoothed block, dimension blockl */ + int16_t* idata, /* (i) data buffer used for enhancing */ + size_t idatal, /* (i) dimension idata */ + size_t centerStartPos, /* (i) first sample current block within idata */ + size_t* period, /* (i) pitch period array (pitch bward-in time) */ + const size_t* plocs, /* (i) locations where period array values valid */ + size_t periodl /* (i) dimension of period and plocs */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enhancer_interface.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enhancer_interface.c new file mode 100644 index 0000000000..ca23e19ae3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enhancer_interface.c @@ -0,0 +1,382 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_EnhancerInterface.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/enhancer_interface.h" + +#include <stdlib.h> +#include <string.h> + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/enhancer.h" +#include "modules/audio_coding/codecs/ilbc/hp_output.h" +#include "modules/audio_coding/codecs/ilbc/xcorr_coef.h" + + + +/*----------------------------------------------------------------* + * interface for enhancer + *---------------------------------------------------------------*/ + +size_t // (o) Estimated lag in end of in[] + WebRtcIlbcfix_EnhancerInterface( + int16_t* out, // (o) enhanced signal + const int16_t* in, // (i) unenhanced signal + IlbcDecoder* iLBCdec_inst) { // (i) buffers etc + size_t iblock; + size_t lag=20, tlag=20; + size_t inLen=iLBCdec_inst->blockl+120; + int16_t scale, scale1; + size_t plc_blockl; + int16_t *enh_buf; + size_t *enh_period; + int32_t tmp1, tmp2, max; + size_t new_blocks; + int16_t *enh_bufPtr1; + size_t i; + size_t k; + int16_t EnChange; + int16_t SqrtEnChange; + int16_t inc; + int16_t win; + int16_t *tmpW16ptr; + size_t startPos; + int16_t *plc_pred; + const int16_t *target, *regressor; + int16_t max16; + int shifts; + int32_t ener; + int16_t enerSh; + int16_t corrSh; + size_t ind; + int16_t sh; + size_t start, stop; + /* Stack based */ + int16_t totsh[3]; + int16_t downsampled[(BLOCKL_MAX+120)>>1]; /* length 180 */ + int32_t corr32[50]; + int32_t corrmax[3]; + int16_t corr16[3]; + int16_t en16[3]; + size_t lagmax[3]; + + plc_pred = downsampled; /* Reuse memory since plc_pred[ENH_BLOCKL] and + downsampled are non overlapping */ + enh_buf=iLBCdec_inst->enh_buf; + enh_period=iLBCdec_inst->enh_period; + + /* Copy in the new data into the enhancer buffer */ + memmove(enh_buf, &enh_buf[iLBCdec_inst->blockl], + (ENH_BUFL - iLBCdec_inst->blockl) * sizeof(*enh_buf)); + + WEBRTC_SPL_MEMCPY_W16(&enh_buf[ENH_BUFL-iLBCdec_inst->blockl], in, + iLBCdec_inst->blockl); + + /* Set variables that are dependent on frame size */ + if (iLBCdec_inst->mode==30) { + plc_blockl=ENH_BLOCKL; + new_blocks=3; + startPos=320; /* Start position for enhancement + (640-new_blocks*ENH_BLOCKL-80) */ + } else { + plc_blockl=40; + new_blocks=2; + startPos=440; /* Start position for enhancement + (640-new_blocks*ENH_BLOCKL-40) */ + } + + /* Update the pitch prediction for each enhancer block, move the old ones */ + memmove(enh_period, &enh_period[new_blocks], + (ENH_NBLOCKS_TOT - new_blocks) * sizeof(*enh_period)); + + WebRtcSpl_DownsampleFast( + enh_buf+ENH_BUFL-inLen, /* Input samples */ + inLen + ENH_BUFL_FILTEROVERHEAD, + downsampled, + inLen / 2, + (int16_t*)WebRtcIlbcfix_kLpFiltCoefs, /* Coefficients in Q12 */ + FILTERORDER_DS_PLUS1, /* Length of filter (order-1) */ + FACTOR_DS, + DELAY_DS); + + /* Estimate the pitch in the down sampled domain. */ + for(iblock = 0; iblock<new_blocks; iblock++){ + + /* references */ + target = downsampled + 60 + iblock * ENH_BLOCKL_HALF; + regressor = target - 10; + + /* scaling */ + max16 = WebRtcSpl_MaxAbsValueW16(®ressor[-50], ENH_BLOCKL_HALF + 50 - 1); + shifts = WebRtcSpl_GetSizeInBits((uint32_t)(max16 * max16)) - 25; + shifts = WEBRTC_SPL_MAX(0, shifts); + + /* compute cross correlation */ + WebRtcSpl_CrossCorrelation(corr32, target, regressor, ENH_BLOCKL_HALF, 50, + shifts, -1); + + /* Find 3 highest correlations that should be compared for the + highest (corr*corr)/ener */ + + for (i=0;i<2;i++) { + lagmax[i] = WebRtcSpl_MaxIndexW32(corr32, 50); + corrmax[i] = corr32[lagmax[i]]; + start = WEBRTC_SPL_MAX(2, lagmax[i]) - 2; + stop = WEBRTC_SPL_MIN(47, lagmax[i]) + 2; + for (k = start; k <= stop; k++) { + corr32[k] = 0; + } + } + lagmax[2] = WebRtcSpl_MaxIndexW32(corr32, 50); + corrmax[2] = corr32[lagmax[2]]; + + /* Calculate normalized corr^2 and ener */ + for (i=0;i<3;i++) { + corrSh = 15-WebRtcSpl_GetSizeInBits(corrmax[i]); + ener = WebRtcSpl_DotProductWithScale(regressor - lagmax[i], + regressor - lagmax[i], + ENH_BLOCKL_HALF, shifts); + enerSh = 15-WebRtcSpl_GetSizeInBits(ener); + corr16[i] = (int16_t)WEBRTC_SPL_SHIFT_W32(corrmax[i], corrSh); + corr16[i] = (int16_t)((corr16[i] * corr16[i]) >> 16); + en16[i] = (int16_t)WEBRTC_SPL_SHIFT_W32(ener, enerSh); + totsh[i] = enerSh - 2 * corrSh; + } + + /* Compare lagmax[0..3] for the (corr^2)/ener criteria */ + ind = 0; + for (i=1; i<3; i++) { + if (totsh[ind] > totsh[i]) { + sh = WEBRTC_SPL_MIN(31, totsh[ind]-totsh[i]); + if (corr16[ind] * en16[i] < (corr16[i] * en16[ind]) >> sh) { + ind = i; + } + } else { + sh = WEBRTC_SPL_MIN(31, totsh[i]-totsh[ind]); + if ((corr16[ind] * en16[i]) >> sh < corr16[i] * en16[ind]) { + ind = i; + } + } + } + + lag = lagmax[ind] + 10; + + /* Store the estimated lag in the non-downsampled domain */ + enh_period[ENH_NBLOCKS_TOT - new_blocks + iblock] = lag * 8; + + /* Store the estimated lag for backward PLC */ + if (iLBCdec_inst->prev_enh_pl==1) { + if (!iblock) { + tlag = lag * 2; + } + } else { + if (iblock==1) { + tlag = lag * 2; + } + } + + lag *= 2; + } + + if ((iLBCdec_inst->prev_enh_pl==1)||(iLBCdec_inst->prev_enh_pl==2)) { + + /* Calculate the best lag of the new frame + This is used to interpolate backwards and mix with the PLC'd data + */ + + /* references */ + target=in; + regressor=in+tlag-1; + + /* scaling */ + // Note that this is not abs-max, so we will take the absolute value below. + max16 = WebRtcSpl_MaxAbsElementW16(regressor, plc_blockl + 3 - 1); + const int16_t max_target = + WebRtcSpl_MaxAbsElementW16(target, plc_blockl + 3 - 1); + const int64_t max_val = plc_blockl * abs(max16 * max_target); + const int32_t factor = max_val >> 31; + shifts = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor); + + /* compute cross correlation */ + WebRtcSpl_CrossCorrelation(corr32, target, regressor, plc_blockl, 3, shifts, + 1); + + /* find lag */ + lag=WebRtcSpl_MaxIndexW32(corr32, 3); + lag+=tlag-1; + + /* Copy the backward PLC to plc_pred */ + + if (iLBCdec_inst->prev_enh_pl==1) { + if (lag>plc_blockl) { + WEBRTC_SPL_MEMCPY_W16(plc_pred, &in[lag-plc_blockl], plc_blockl); + } else { + WEBRTC_SPL_MEMCPY_W16(&plc_pred[plc_blockl-lag], in, lag); + WEBRTC_SPL_MEMCPY_W16( + plc_pred, &enh_buf[ENH_BUFL-iLBCdec_inst->blockl-plc_blockl+lag], + (plc_blockl-lag)); + } + } else { + size_t pos; + + pos = plc_blockl; + + while (lag<pos) { + WEBRTC_SPL_MEMCPY_W16(&plc_pred[pos-lag], in, lag); + pos = pos - lag; + } + WEBRTC_SPL_MEMCPY_W16(plc_pred, &in[lag-pos], pos); + + } + + if (iLBCdec_inst->prev_enh_pl==1) { + /* limit energy change + if energy in backward PLC is more than 4 times higher than the forward + PLC, then reduce the energy in the backward PLC vector: + sample 1...len-16 set energy of the to 4 times forward PLC + sample len-15..len interpolate between 4 times fw PLC and bw PLC energy + + Note: Compared to floating point code there is a slight change, + the window is 16 samples long instead of 10 samples to simplify the + calculations + */ + + max=WebRtcSpl_MaxAbsValueW16( + &enh_buf[ENH_BUFL-iLBCdec_inst->blockl-plc_blockl], plc_blockl); + max16=WebRtcSpl_MaxAbsValueW16(plc_pred, plc_blockl); + max = WEBRTC_SPL_MAX(max, max16); + scale=22-(int16_t)WebRtcSpl_NormW32(max); + scale=WEBRTC_SPL_MAX(scale,0); + + tmp2 = WebRtcSpl_DotProductWithScale( + &enh_buf[ENH_BUFL-iLBCdec_inst->blockl-plc_blockl], + &enh_buf[ENH_BUFL-iLBCdec_inst->blockl-plc_blockl], + plc_blockl, scale); + tmp1 = WebRtcSpl_DotProductWithScale(plc_pred, plc_pred, + plc_blockl, scale); + + /* Check the energy difference */ + if ((tmp1>0)&&((tmp1>>2)>tmp2)) { + /* EnChange is now guaranteed to be <0.5 + Calculate EnChange=tmp2/tmp1 in Q16 + */ + + scale1=(int16_t)WebRtcSpl_NormW32(tmp1); + tmp1=WEBRTC_SPL_SHIFT_W32(tmp1, (scale1-16)); /* using 15 bits */ + + tmp2=WEBRTC_SPL_SHIFT_W32(tmp2, (scale1)); + EnChange = (int16_t)WebRtcSpl_DivW32W16(tmp2, + (int16_t)tmp1); + + /* Calculate the Sqrt of the energy in Q15 ((14+16)/2) */ + SqrtEnChange = (int16_t)WebRtcSpl_SqrtFloor(EnChange << 14); + + + /* Multiply first part of vector with 2*SqrtEnChange */ + WebRtcSpl_ScaleVector(plc_pred, plc_pred, SqrtEnChange, plc_blockl-16, + 14); + + /* Calculate increase parameter for window part (16 last samples) */ + /* (1-2*SqrtEnChange)/16 in Q15 */ + inc = 2048 - (SqrtEnChange >> 3); + + win=0; + tmpW16ptr=&plc_pred[plc_blockl-16]; + + for (i=16;i>0;i--) { + *tmpW16ptr = (int16_t)( + (*tmpW16ptr * (SqrtEnChange + (win >> 1))) >> 14); + /* multiply by (2.0*SqrtEnChange+win) */ + + win += inc; + tmpW16ptr++; + } + } + + /* Make the linear interpolation between the forward PLC'd data + and the backward PLC'd data (from the new frame) + */ + + if (plc_blockl==40) { + inc=400; /* 1/41 in Q14 */ + } else { /* plc_blockl==80 */ + inc=202; /* 1/81 in Q14 */ + } + win=0; + enh_bufPtr1=&enh_buf[ENH_BUFL-1-iLBCdec_inst->blockl]; + for (i=0; i<plc_blockl; i++) { + win+=inc; + *enh_bufPtr1 = (int16_t)((*enh_bufPtr1 * win) >> 14); + *enh_bufPtr1 += (int16_t)( + ((16384 - win) * plc_pred[plc_blockl - 1 - i]) >> 14); + enh_bufPtr1--; + } + } else { + int16_t *synt = &downsampled[LPC_FILTERORDER]; + + enh_bufPtr1=&enh_buf[ENH_BUFL-iLBCdec_inst->blockl-plc_blockl]; + WEBRTC_SPL_MEMCPY_W16(enh_bufPtr1, plc_pred, plc_blockl); + + /* Clear fileter memory */ + WebRtcSpl_MemSetW16(iLBCdec_inst->syntMem, 0, LPC_FILTERORDER); + WebRtcSpl_MemSetW16(iLBCdec_inst->hpimemy, 0, 4); + WebRtcSpl_MemSetW16(iLBCdec_inst->hpimemx, 0, 2); + + /* Initialize filter memory by filtering through 2 lags */ + WEBRTC_SPL_MEMCPY_W16(&synt[-LPC_FILTERORDER], iLBCdec_inst->syntMem, + LPC_FILTERORDER); + WebRtcSpl_FilterARFastQ12( + enh_bufPtr1, + synt, + &iLBCdec_inst->old_syntdenum[ + (iLBCdec_inst->nsub-1)*(LPC_FILTERORDER+1)], + LPC_FILTERORDER+1, lag); + + WEBRTC_SPL_MEMCPY_W16(&synt[-LPC_FILTERORDER], &synt[lag-LPC_FILTERORDER], + LPC_FILTERORDER); + WebRtcIlbcfix_HpOutput(synt, (int16_t*)WebRtcIlbcfix_kHpOutCoefs, + iLBCdec_inst->hpimemy, iLBCdec_inst->hpimemx, + lag); + WebRtcSpl_FilterARFastQ12( + enh_bufPtr1, synt, + &iLBCdec_inst->old_syntdenum[ + (iLBCdec_inst->nsub-1)*(LPC_FILTERORDER+1)], + LPC_FILTERORDER+1, lag); + + WEBRTC_SPL_MEMCPY_W16(iLBCdec_inst->syntMem, &synt[lag-LPC_FILTERORDER], + LPC_FILTERORDER); + WebRtcIlbcfix_HpOutput(synt, (int16_t*)WebRtcIlbcfix_kHpOutCoefs, + iLBCdec_inst->hpimemy, iLBCdec_inst->hpimemx, + lag); + } + } + + + /* Perform enhancement block by block */ + + for (iblock = 0; iblock<new_blocks; iblock++) { + WebRtcIlbcfix_Enhancer(out + iblock * ENH_BLOCKL, + enh_buf, + ENH_BUFL, + iblock * ENH_BLOCKL + startPos, + enh_period, + WebRtcIlbcfix_kEnhPlocs, ENH_NBLOCKS_TOT); + } + + return (lag); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enhancer_interface.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enhancer_interface.h new file mode 100644 index 0000000000..5022a47c3a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/enhancer_interface.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_EnhancerInterface.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_ENHANCER_INTERFACE_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_ENHANCER_INTERFACE_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * interface for enhancer + *---------------------------------------------------------------*/ + +size_t // (o) Estimated lag in end of in[] +WebRtcIlbcfix_EnhancerInterface(int16_t* out, // (o) enhanced signal + const int16_t* in, // (i) unenhanced signal + IlbcDecoder* iLBCdec_inst); // (i) buffers etc + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/filtered_cb_vecs.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/filtered_cb_vecs.c new file mode 100644 index 0000000000..6b4f30c96b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/filtered_cb_vecs.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_FilteredCbVecs.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/filtered_cb_vecs.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Construct an additional codebook vector by filtering the + * initial codebook buffer. This vector is then used to expand + * the codebook with an additional section. + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_FilteredCbVecs( + int16_t *cbvectors, /* (o) Codebook vector for the higher section */ + int16_t *CBmem, /* (i) Codebook memory that is filtered to create a + second CB section */ + size_t lMem, /* (i) Length of codebook memory */ + size_t samples /* (i) Number of samples to filter */ + ) { + + /* Set up the memory, start with zero state */ + WebRtcSpl_MemSetW16(CBmem+lMem, 0, CB_HALFFILTERLEN); + WebRtcSpl_MemSetW16(CBmem-CB_HALFFILTERLEN, 0, CB_HALFFILTERLEN); + WebRtcSpl_MemSetW16(cbvectors, 0, lMem-samples); + + /* Filter to obtain the filtered CB memory */ + + WebRtcSpl_FilterMAFastQ12( + CBmem+CB_HALFFILTERLEN+lMem-samples, cbvectors+lMem-samples, + (int16_t*)WebRtcIlbcfix_kCbFiltersRev, CB_FILTERLEN, samples); + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/filtered_cb_vecs.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/filtered_cb_vecs.h new file mode 100644 index 0000000000..661262e42e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/filtered_cb_vecs.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_FilteredCbVecs.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_FILTERED_CB_VECS_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_FILTERED_CB_VECS_H_ + +#include <stddef.h> +#include <stdint.h> + +/*----------------------------------------------------------------* + * Construct an additional codebook vector by filtering the + * initial codebook buffer. This vector is then used to expand + * the codebook with an additional section. + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_FilteredCbVecs( + int16_t* cbvectors, /* (o) Codebook vector for the higher section */ + int16_t* CBmem, /* (i) Codebook memory that is filtered to create a + second CB section */ + size_t lMem, /* (i) Length of codebook memory */ + size_t samples /* (i) Number of samples to filter */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/frame_classify.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/frame_classify.c new file mode 100644 index 0000000000..c1084b1645 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/frame_classify.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_FrameClassify.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/frame_classify.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Classification of subframes to localize start state + *---------------------------------------------------------------*/ + +size_t WebRtcIlbcfix_FrameClassify( + /* (o) Index to the max-energy sub frame */ + IlbcEncoder *iLBCenc_inst, + /* (i/o) the encoder state structure */ + int16_t *residualFIX /* (i) lpc residual signal */ + ){ + int16_t max, scale; + int32_t ssqEn[NSUB_MAX-1]; + int16_t *ssqPtr; + int32_t *seqEnPtr; + int32_t maxW32; + int16_t scale1; + size_t pos; + size_t n; + + /* + Calculate the energy of each of the 80 sample blocks + in the draft the 4 first and last samples are windowed with 1/5...4/5 + and 4/5...1/5 respectively. To simplify for the fixpoint we have changed + this to 0 0 1 1 and 1 1 0 0 + */ + + max = WebRtcSpl_MaxAbsValueW16(residualFIX, iLBCenc_inst->blockl); + scale = WebRtcSpl_GetSizeInBits((uint32_t)(max * max)); + + /* Scale to maximum 24 bits so that it won't overflow for 76 samples */ + scale = scale-24; + scale1 = WEBRTC_SPL_MAX(0, scale); + + /* Calculate energies */ + ssqPtr=residualFIX + 2; + seqEnPtr=ssqEn; + for (n=(iLBCenc_inst->nsub-1); n>0; n--) { + (*seqEnPtr) = WebRtcSpl_DotProductWithScale(ssqPtr, ssqPtr, 76, scale1); + ssqPtr += 40; + seqEnPtr++; + } + + /* Scale to maximum 20 bits in order to allow for the 11 bit window */ + maxW32 = WebRtcSpl_MaxValueW32(ssqEn, iLBCenc_inst->nsub - 1); + scale = WebRtcSpl_GetSizeInBits(maxW32) - 20; + scale1 = WEBRTC_SPL_MAX(0, scale); + + /* Window each 80 block with the ssqEn_winTbl window to give higher probability for + the blocks in the middle + */ + seqEnPtr=ssqEn; + if (iLBCenc_inst->mode==20) { + ssqPtr=(int16_t*)WebRtcIlbcfix_kStartSequenceEnrgWin+1; + } else { + ssqPtr=(int16_t*)WebRtcIlbcfix_kStartSequenceEnrgWin; + } + for (n=(iLBCenc_inst->nsub-1); n>0; n--) { + (*seqEnPtr)=WEBRTC_SPL_MUL(((*seqEnPtr)>>scale1), (*ssqPtr)); + seqEnPtr++; + ssqPtr++; + } + + /* Extract the best choise of start state */ + pos = WebRtcSpl_MaxIndexW32(ssqEn, iLBCenc_inst->nsub - 1) + 1; + + return(pos); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/frame_classify.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/frame_classify.h new file mode 100644 index 0000000000..7615106d70 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/frame_classify.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_FrameClassify.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_FRAME_CLASSIFY_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_FRAME_CLASSIFY_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +size_t WebRtcIlbcfix_FrameClassify( + /* (o) Index to the max-energy sub frame */ + IlbcEncoder* iLBCenc_inst, + /* (i/o) the encoder state structure */ + int16_t* residualFIX /* (i) lpc residual signal */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_dequant.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_dequant.c new file mode 100644 index 0000000000..1357dece33 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_dequant.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_GainDequant.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/gain_dequant.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * decoder for quantized gains in the gain-shape coding of + * residual + *---------------------------------------------------------------*/ + +int16_t WebRtcIlbcfix_GainDequant( + /* (o) quantized gain value (Q14) */ + int16_t index, /* (i) quantization index */ + int16_t maxIn, /* (i) maximum of unquantized gain (Q14) */ + int16_t stage /* (i) The stage of the search */ + ){ + int16_t scale; + const int16_t *gain; + + /* obtain correct scale factor */ + + scale=WEBRTC_SPL_ABS_W16(maxIn); + scale = WEBRTC_SPL_MAX(1638, scale); /* if lower than 0.1, set it to 0.1 */ + + /* select the quantization table and return the decoded value */ + gain = WebRtcIlbcfix_kGain[stage]; + + return (int16_t)((scale * gain[index] + 8192) >> 14); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_dequant.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_dequant.h new file mode 100644 index 0000000000..2b97550b6c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_dequant.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_GainDequant.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_GAIN_DEQUANT_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_GAIN_DEQUANT_H_ + +#include <stdint.h> + +/*----------------------------------------------------------------* + * decoder for quantized gains in the gain-shape coding of + * residual + *---------------------------------------------------------------*/ + +int16_t WebRtcIlbcfix_GainDequant( + /* (o) quantized gain value (Q14) */ + int16_t index, /* (i) quantization index */ + int16_t maxIn, /* (i) maximum of unquantized gain (Q14) */ + int16_t stage /* (i) The stage of the search */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_quant.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_quant.c new file mode 100644 index 0000000000..9a6d49d51a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_quant.c @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_GainQuant.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/gain_quant.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * quantizer for the gain in the gain-shape coding of residual + *---------------------------------------------------------------*/ + +int16_t WebRtcIlbcfix_GainQuant( /* (o) quantized gain value */ + int16_t gain, /* (i) gain value Q14 */ + int16_t maxIn, /* (i) maximum of gain value Q14 */ + int16_t stage, /* (i) The stage of the search */ + int16_t *index /* (o) quantization index */ + ) { + + int16_t scale, cblen; + int32_t gainW32, measure1, measure2; + const int16_t *cbPtr, *cb; + int loc, noMoves, noChecks, i; + + /* ensure a lower bound (0.1) on the scaling factor */ + + scale = WEBRTC_SPL_MAX(1638, maxIn); + + /* select the quantization table and calculate + the length of the table and the number of + steps in the binary search that are needed */ + cb = WebRtcIlbcfix_kGain[stage]; + cblen = 32>>stage; + noChecks = 4-stage; + + /* Multiply the gain with 2^14 to make the comparison + easier and with higher precision */ + gainW32 = gain << 14; + + /* Do a binary search, starting in the middle of the CB + loc - defines the current position in the table + noMoves - defines the number of steps to move in the CB in order + to get next CB location + */ + + loc = cblen>>1; + noMoves = loc; + cbPtr = cb + loc; /* Centre of CB */ + + for (i=noChecks;i>0;i--) { + noMoves>>=1; + measure1 = scale * *cbPtr; + + /* Move up if gain is larger, otherwise move down in table */ + measure1 = measure1 - gainW32; + + if (0>measure1) { + cbPtr+=noMoves; + loc+=noMoves; + } else { + cbPtr-=noMoves; + loc-=noMoves; + } + } + + /* Check which value is the closest one: loc-1, loc or loc+1 */ + + measure1 = scale * *cbPtr; + if (gainW32>measure1) { + /* Check against value above loc */ + measure2 = scale * cbPtr[1]; + if ((measure2-gainW32)<(gainW32-measure1)) { + loc+=1; + } + } else { + /* Check against value below loc */ + measure2 = scale * cbPtr[-1]; + if ((gainW32-measure2)<=(measure1-gainW32)) { + loc-=1; + } + } + + /* Guard against getting outside the table. The calculation above can give a location + which is one above the maximum value (in very rare cases) */ + loc=WEBRTC_SPL_MIN(loc, (cblen-1)); + *index=loc; + + /* Calculate and return the quantized gain value (in Q14) */ + return (int16_t)((scale * cb[loc] + 8192) >> 14); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_quant.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_quant.h new file mode 100644 index 0000000000..761f7d2f79 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/gain_quant.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_GainQuant.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_GAIN_QUANT_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_GAIN_QUANT_H_ + +#include <stdint.h> + +/*----------------------------------------------------------------* + * quantizer for the gain in the gain-shape coding of residual + *---------------------------------------------------------------*/ + +int16_t +WebRtcIlbcfix_GainQuant( /* (o) quantized gain value */ + int16_t gain, /* (i) gain value Q14 */ + int16_t maxIn, /* (i) maximum of gain value Q14 */ + int16_t stage, /* (i) The stage of the search */ + int16_t* index /* (o) quantization index */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_cd_vec.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_cd_vec.c new file mode 100644 index 0000000000..e9cd2008e0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_cd_vec.c @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_GetCbVec.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/get_cd_vec.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/create_augmented_vec.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Construct codebook vector for given index. + *---------------------------------------------------------------*/ + +bool WebRtcIlbcfix_GetCbVec( + int16_t *cbvec, /* (o) Constructed codebook vector */ + int16_t *mem, /* (i) Codebook buffer */ + size_t index, /* (i) Codebook index */ + size_t lMem, /* (i) Length of codebook buffer */ + size_t cbveclen /* (i) Codebook vector length */ + ){ + size_t k, base_size; + size_t lag; + /* Stack based */ + int16_t tempbuff2[SUBL+5]; + + /* Determine size of codebook sections */ + + base_size=lMem-cbveclen+1; + + if (cbveclen==SUBL) { + base_size += cbveclen / 2; + } + + /* No filter -> First codebook section */ + + if (index<lMem-cbveclen+1) { + + /* first non-interpolated vectors */ + + k=index+cbveclen; + /* get vector */ + WEBRTC_SPL_MEMCPY_W16(cbvec, mem+lMem-k, cbveclen); + + } else if (index < base_size) { + + /* Calculate lag */ + + k = (2 * (index - (lMem - cbveclen + 1))) + cbveclen; + + lag = k / 2; + + WebRtcIlbcfix_CreateAugmentedVec(lag, mem+lMem, cbvec); + + } + + /* Higher codebbok section based on filtering */ + + else { + + size_t memIndTest; + + /* first non-interpolated vectors */ + + if (index-base_size<lMem-cbveclen+1) { + + /* Set up filter memory, stuff zeros outside memory buffer */ + + memIndTest = lMem-(index-base_size+cbveclen); + + WebRtcSpl_MemSetW16(mem-CB_HALFFILTERLEN, 0, CB_HALFFILTERLEN); + WebRtcSpl_MemSetW16(mem+lMem, 0, CB_HALFFILTERLEN); + + /* do filtering to get the codebook vector */ + + WebRtcSpl_FilterMAFastQ12( + &mem[memIndTest+4], cbvec, (int16_t*)WebRtcIlbcfix_kCbFiltersRev, + CB_FILTERLEN, cbveclen); + } + + /* interpolated vectors */ + + else { + if (cbveclen < SUBL) { + // We're going to fill in cbveclen + 5 elements of tempbuff2 in + // WebRtcSpl_FilterMAFastQ12, less than the SUBL + 5 elements we'll be + // using in WebRtcIlbcfix_CreateAugmentedVec. This error is caused by + // bad values in `index` (which come from the encoded stream). Tell the + // caller that things went south, and that the decoder state is now + // corrupt (because it's half-way through an update that we can't + // complete). + return false; + } + + /* Stuff zeros outside memory buffer */ + memIndTest = lMem-cbveclen-CB_FILTERLEN; + WebRtcSpl_MemSetW16(mem+lMem, 0, CB_HALFFILTERLEN); + + /* do filtering */ + WebRtcSpl_FilterMAFastQ12( + &mem[memIndTest+7], tempbuff2, (int16_t*)WebRtcIlbcfix_kCbFiltersRev, + CB_FILTERLEN, cbveclen+5); + + /* Calculate lag index */ + lag = (cbveclen<<1)-20+index-base_size-lMem-1; + + WebRtcIlbcfix_CreateAugmentedVec(lag, tempbuff2+SUBL+5, cbvec); + } + } + + return true; // Success. +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_cd_vec.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_cd_vec.h new file mode 100644 index 0000000000..99537dd0f7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_cd_vec.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_GetCbVec.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_GET_CD_VEC_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_GET_CD_VEC_H_ + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +#include "absl/base/attributes.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +// Returns true on success, false on failure. In case of failure, the decoder +// state may be corrupted and needs resetting. +ABSL_MUST_USE_RESULT +bool WebRtcIlbcfix_GetCbVec( + int16_t* cbvec, /* (o) Constructed codebook vector */ + int16_t* mem, /* (i) Codebook buffer */ + size_t index, /* (i) Codebook index */ + size_t lMem, /* (i) Length of codebook buffer */ + size_t cbveclen /* (i) Codebook vector length */ +); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_lsp_poly.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_lsp_poly.c new file mode 100644 index 0000000000..e0fb21caf0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_lsp_poly.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_GetLspPoly.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/get_lsp_poly.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Construct the polynomials F1(z) and F2(z) from the LSP + * (Computations are done in Q24) + * + * The expansion is performed using the following recursion: + * + * f[0] = 1; + * tmp = -2.0 * lsp[0]; + * f[1] = tmp; + * for (i=2; i<=5; i++) { + * b = -2.0 * lsp[2*i-2]; + * f[i] = tmp*f[i-1] + 2.0*f[i-2]; + * for (j=i; j>=2; j--) { + * f[j] = f[j] + tmp*f[j-1] + f[j-2]; + * } + * f[i] = f[i] + tmp; + * } + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_GetLspPoly( + int16_t *lsp, /* (i) LSP in Q15 */ + int32_t *f) /* (o) polonymial in Q24 */ +{ + int32_t tmpW32; + int i, j; + int16_t high, low; + int16_t *lspPtr; + int32_t *fPtr; + + lspPtr = lsp; + fPtr = f; + /* f[0] = 1.0 (Q24) */ + (*fPtr) = (int32_t)16777216; + fPtr++; + + (*fPtr) = WEBRTC_SPL_MUL((*lspPtr), -1024); + fPtr++; + lspPtr+=2; + + for(i=2; i<=5; i++) + { + (*fPtr) = fPtr[-2]; + + for(j=i; j>1; j--) + { + /* Compute f[j] = f[j] + tmp*f[j-1] + f[j-2]; */ + high = (int16_t)(fPtr[-1] >> 16); + low = (int16_t)((fPtr[-1] & 0xffff) >> 1); + + tmpW32 = 4 * high * *lspPtr + 4 * ((low * *lspPtr) >> 15); + + (*fPtr) += fPtr[-2]; + (*fPtr) -= tmpW32; + fPtr--; + } + *fPtr -= *lspPtr * (1 << 10); + + fPtr+=i; + lspPtr+=2; + } + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_lsp_poly.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_lsp_poly.h new file mode 100644 index 0000000000..70c9c4d4b4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_lsp_poly.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_GetLspPoly.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_GET_LSP_POLY_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_GET_LSP_POLY_H_ + +#include <stdint.h> + +/*----------------------------------------------------------------* + * Construct the polynomials F1(z) and F2(z) from the LSP + * (Computations are done in Q24) + * + * The expansion is performed using the following recursion: + * + * f[0] = 1; + * tmp = -2.0 * lsp[0]; + * f[1] = tmp; + * for (i=2; i<=5; i++) { + * b = -2.0 * lsp[2*i-2]; + * f[i] = tmp*f[i-1] + 2.0*f[i-2]; + * for (j=i; j>=2; j--) { + * f[j] = f[j] + tmp*f[j-1] + f[j-2]; + * } + * f[i] = f[i] + tmp; + * } + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_GetLspPoly(int16_t* lsp, /* (i) LSP in Q15 */ + int32_t* f); /* (o) polonymial in Q24 */ + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_sync_seq.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_sync_seq.c new file mode 100644 index 0000000000..68a569a40a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_sync_seq.c @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_GetSyncSeq.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/get_sync_seq.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/nearest_neighbor.h" +#include "modules/audio_coding/codecs/ilbc/refiner.h" + +/*----------------------------------------------------------------* + * get the pitch-synchronous sample sequence + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_GetSyncSeq( + int16_t *idata, /* (i) original data */ + size_t idatal, /* (i) dimension of data */ + size_t centerStartPos, /* (i) where current block starts */ + size_t *period, /* (i) rough-pitch-period array (Q-2) */ + const size_t *plocs, /* (i) where periods of period array are taken (Q-2) */ + size_t periodl, /* (i) dimension period array */ + size_t hl, /* (i) 2*hl+1 is the number of sequences */ + int16_t *surround /* (i/o) The contribution from this sequence + summed with earlier contributions */ + ){ + size_t i, centerEndPos, q; + /* Stack based */ + size_t lagBlock[2 * ENH_HL + 1]; + size_t blockStartPos[2 * ENH_HL + 1]; /* The position to search around (Q2) */ + size_t plocs2[ENH_PLOCSL]; + + centerEndPos = centerStartPos + ENH_BLOCKL - 1; + + /* present (find predicted lag from this position) */ + + WebRtcIlbcfix_NearestNeighbor(lagBlock + hl, + plocs, + 2 * (centerStartPos + centerEndPos), + periodl); + + blockStartPos[hl] = 4 * centerStartPos; + + /* past (find predicted position and perform a refined + search to find the best sequence) */ + + for (q = hl; q > 0; q--) { + size_t qq = q - 1; + size_t period_q = period[lagBlock[q]]; + /* Stop if this sequence would be outside the buffer; that means all + further-past sequences would also be outside the buffer. */ + if (blockStartPos[q] < period_q + (4 * ENH_OVERHANG)) + break; + blockStartPos[qq] = blockStartPos[q] - period_q; + + size_t value = blockStartPos[qq] + 4 * ENH_BLOCKL_HALF; + value = (value > period_q) ? (value - period_q) : 0; + WebRtcIlbcfix_NearestNeighbor(lagBlock + qq, plocs, value, periodl); + + /* Find the best possible sequence in the 4 times upsampled + domain around blockStartPos+q */ + WebRtcIlbcfix_Refiner(blockStartPos + qq, idata, idatal, centerStartPos, + blockStartPos[qq], surround, + WebRtcIlbcfix_kEnhWt[qq]); + } + + /* future (find predicted position and perform a refined + search to find the best sequence) */ + + for (i = 0; i < periodl; i++) { + plocs2[i] = plocs[i] - period[i]; + } + + for (q = hl + 1; q <= (2 * hl); q++) { + + WebRtcIlbcfix_NearestNeighbor( + lagBlock + q, + plocs2, + blockStartPos[q - 1] + 4 * ENH_BLOCKL_HALF, + periodl); + + blockStartPos[q]=blockStartPos[q-1]+period[lagBlock[q]]; + + if (blockStartPos[q] + 4 * (ENH_BLOCKL + ENH_OVERHANG) < 4 * idatal) { + + /* Find the best possible sequence in the 4 times upsampled + domain around blockStartPos+q */ + WebRtcIlbcfix_Refiner(blockStartPos + q, idata, idatal, centerStartPos, + blockStartPos[q], surround, + WebRtcIlbcfix_kEnhWt[2 * hl - q]); + + } else { + /* Don't add anything since this sequence would + be outside the buffer */ + } + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_sync_seq.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_sync_seq.h new file mode 100644 index 0000000000..90962fa063 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/get_sync_seq.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_GetSyncSeq.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_GET_SYNC_SEQ_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_GET_SYNC_SEQ_H_ + +#include <stddef.h> +#include <stdint.h> + +/*----------------------------------------------------------------* + * get the pitch-synchronous sample sequence + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_GetSyncSeq( + int16_t* idata, /* (i) original data */ + size_t idatal, /* (i) dimension of data */ + size_t centerStartPos, /* (i) where current block starts */ + size_t* period, /* (i) rough-pitch-period array (Q-2) */ + const size_t* plocs, /* (i) where periods of period array are taken (Q-2) */ + size_t periodl, /* (i) dimension period array */ + size_t hl, /* (i) 2*hl+1 is the number of sequences */ + int16_t* surround /* (i/o) The contribution from this sequence + summed with earlier contributions */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_input.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_input.c new file mode 100644 index 0000000000..be582f2e23 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_input.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_HpInput.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/hp_input.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * high-pass filter of input with *0.5 and saturation + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_HpInput( + int16_t *signal, /* (i/o) signal vector */ + int16_t *ba, /* (i) B- and A-coefficients (2:nd order) + {b[0] b[1] b[2] -a[1] -a[2]} a[0] + is assumed to be 1.0 */ + int16_t *y, /* (i/o) Filter state yhi[n-1] ylow[n-1] + yhi[n-2] ylow[n-2] */ + int16_t *x, /* (i/o) Filter state x[n-1] x[n-2] */ + size_t len) /* (i) Number of samples to filter */ +{ + size_t i; + int32_t tmpW32; + int32_t tmpW32b; + + for (i=0; i<len; i++) { + + /* + y[i] = b[0]*x[i] + b[1]*x[i-1] + b[2]*x[i-2] + + (-a[1])*y[i-1] + (-a[2])*y[i-2]; + */ + + tmpW32 = y[1] * ba[3]; /* (-a[1])*y[i-1] (low part) */ + tmpW32 += y[3] * ba[4]; /* (-a[2])*y[i-2] (low part) */ + tmpW32 = (tmpW32>>15); + tmpW32 += y[0] * ba[3]; /* (-a[1])*y[i-1] (high part) */ + tmpW32 += y[2] * ba[4]; /* (-a[2])*y[i-2] (high part) */ + tmpW32 = (tmpW32<<1); + + tmpW32 += signal[i] * ba[0]; /* b[0]*x[0] */ + tmpW32 += x[0] * ba[1]; /* b[1]*x[i-1] */ + tmpW32 += x[1] * ba[2]; /* b[2]*x[i-2] */ + + /* Update state (input part) */ + x[1] = x[0]; + x[0] = signal[i]; + + /* Rounding in Q(12+1), i.e. add 2^12 */ + tmpW32b = tmpW32 + 4096; + + /* Saturate (to 2^28) so that the HP filtered signal does not overflow */ + tmpW32b = WEBRTC_SPL_SAT((int32_t)268435455, tmpW32b, (int32_t)-268435456); + + /* Convert back to Q0 and multiply with 0.5 */ + signal[i] = (int16_t)(tmpW32b >> 13); + + /* Update state (filtered part) */ + y[2] = y[0]; + y[3] = y[1]; + + /* upshift tmpW32 by 3 with saturation */ + if (tmpW32>268435455) { + tmpW32 = WEBRTC_SPL_WORD32_MAX; + } else if (tmpW32<-268435456) { + tmpW32 = WEBRTC_SPL_WORD32_MIN; + } else { + tmpW32 <<= 3; + } + + y[0] = (int16_t)(tmpW32 >> 16); + y[1] = (int16_t)((tmpW32 - (y[0] << 16)) >> 1); + } + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_input.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_input.h new file mode 100644 index 0000000000..9143d8efed --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_input.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_HpInput.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_HP_INPUT_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_HP_INPUT_H_ + +#include <stddef.h> +#include <stdint.h> + +// clang-format off +// Bad job here. https://bugs.llvm.org/show_bug.cgi?id=34274 +void WebRtcIlbcfix_HpInput( + int16_t* signal, /* (i/o) signal vector */ + int16_t* ba, /* (i) B- and A-coefficients (2:nd order) + {b[0] b[1] b[2] -a[1] -a[2]} + a[0] is assumed to be 1.0 */ + int16_t* y, /* (i/o) Filter state yhi[n-1] ylow[n-1] + yhi[n-2] ylow[n-2] */ + int16_t* x, /* (i/o) Filter state x[n-1] x[n-2] */ + size_t len); /* (i) Number of samples to filter */ +// clang-format on + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_output.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_output.c new file mode 100644 index 0000000000..cc5f6dcd37 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_output.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_HpOutput.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/hp_output.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * high-pass filter of output and *2 with saturation + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_HpOutput( + int16_t *signal, /* (i/o) signal vector */ + int16_t *ba, /* (i) B- and A-coefficients (2:nd order) + {b[0] b[1] b[2] -a[1] -a[2]} a[0] + is assumed to be 1.0 */ + int16_t *y, /* (i/o) Filter state yhi[n-1] ylow[n-1] + yhi[n-2] ylow[n-2] */ + int16_t *x, /* (i/o) Filter state x[n-1] x[n-2] */ + size_t len) /* (i) Number of samples to filter */ +{ + size_t i; + int32_t tmpW32; + int32_t tmpW32b; + + for (i=0; i<len; i++) { + + /* + y[i] = b[0]*x[i] + b[1]*x[i-1] + b[2]*x[i-2] + + (-a[1])*y[i-1] + (-a[2])*y[i-2]; + */ + + tmpW32 = y[1] * ba[3]; /* (-a[1])*y[i-1] (low part) */ + tmpW32 += y[3] * ba[4]; /* (-a[2])*y[i-2] (low part) */ + tmpW32 = (tmpW32>>15); + tmpW32 += y[0] * ba[3]; /* (-a[1])*y[i-1] (high part) */ + tmpW32 += y[2] * ba[4]; /* (-a[2])*y[i-2] (high part) */ + tmpW32 *= 2; + + tmpW32 += signal[i] * ba[0]; /* b[0]*x[0] */ + tmpW32 += x[0] * ba[1]; /* b[1]*x[i-1] */ + tmpW32 += x[1] * ba[2]; /* b[2]*x[i-2] */ + + /* Update state (input part) */ + x[1] = x[0]; + x[0] = signal[i]; + + /* Rounding in Q(12-1), i.e. add 2^10 */ + tmpW32b = tmpW32 + 1024; + + /* Saturate (to 2^26) so that the HP filtered signal does not overflow */ + tmpW32b = WEBRTC_SPL_SAT((int32_t)67108863, tmpW32b, (int32_t)-67108864); + + /* Convert back to Q0 and multiply with 2 */ + signal[i] = (int16_t)(tmpW32b >> 11); + + /* Update state (filtered part) */ + y[2] = y[0]; + y[3] = y[1]; + + /* upshift tmpW32 by 3 with saturation */ + if (tmpW32>268435455) { + tmpW32 = WEBRTC_SPL_WORD32_MAX; + } else if (tmpW32<-268435456) { + tmpW32 = WEBRTC_SPL_WORD32_MIN; + } else { + tmpW32 *= 8; + } + + y[0] = (int16_t)(tmpW32 >> 16); + y[1] = (int16_t)((tmpW32 & 0xffff) >> 1); + + } + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_output.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_output.h new file mode 100644 index 0000000000..6d1bd3cd88 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/hp_output.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_HpOutput.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_HP_OUTPUT_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_HP_OUTPUT_H_ + +#include <stddef.h> +#include <stdint.h> + +// clang-format off +// Bad job here. https://bugs.llvm.org/show_bug.cgi?id=34274 +void WebRtcIlbcfix_HpOutput( + int16_t* signal, /* (i/o) signal vector */ + int16_t* ba, /* (i) B- and A-coefficients (2:nd order) + {b[0] b[1] b[2] -a[1] -a[2]} a[0] + is assumed to be 1.0 */ + int16_t* y, /* (i/o) Filter state yhi[n-1] ylow[n-1] + yhi[n-2] ylow[n-2] */ + int16_t* x, /* (i/o) Filter state x[n-1] x[n-2] */ + size_t len); /* (i) Number of samples to filter */ +// clang-format on + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/ilbc.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/ilbc.c new file mode 100644 index 0000000000..ba6c3e46c3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/ilbc.c @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + iLBCInterface.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/ilbc.h" + +#include <stdlib.h> + +#include "modules/audio_coding/codecs/ilbc/decode.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/encode.h" +#include "modules/audio_coding/codecs/ilbc/init_decode.h" +#include "modules/audio_coding/codecs/ilbc/init_encode.h" +#include "rtc_base/checks.h" + +int16_t WebRtcIlbcfix_EncoderAssign(IlbcEncoderInstance** iLBC_encinst, + int16_t* ILBCENC_inst_Addr, + int16_t* size) { + *iLBC_encinst=(IlbcEncoderInstance*)ILBCENC_inst_Addr; + *size=sizeof(IlbcEncoder)/sizeof(int16_t); + if (*iLBC_encinst!=NULL) { + return(0); + } else { + return(-1); + } +} + +int16_t WebRtcIlbcfix_DecoderAssign(IlbcDecoderInstance** iLBC_decinst, + int16_t* ILBCDEC_inst_Addr, + int16_t* size) { + *iLBC_decinst=(IlbcDecoderInstance*)ILBCDEC_inst_Addr; + *size=sizeof(IlbcDecoder)/sizeof(int16_t); + if (*iLBC_decinst!=NULL) { + return(0); + } else { + return(-1); + } +} + +int16_t WebRtcIlbcfix_EncoderCreate(IlbcEncoderInstance **iLBC_encinst) { + *iLBC_encinst=(IlbcEncoderInstance*)malloc(sizeof(IlbcEncoder)); + if (*iLBC_encinst!=NULL) { + return(0); + } else { + return(-1); + } +} + +int16_t WebRtcIlbcfix_DecoderCreate(IlbcDecoderInstance **iLBC_decinst) { + *iLBC_decinst=(IlbcDecoderInstance*)malloc(sizeof(IlbcDecoder)); + if (*iLBC_decinst!=NULL) { + return(0); + } else { + return(-1); + } +} + +int16_t WebRtcIlbcfix_EncoderFree(IlbcEncoderInstance *iLBC_encinst) { + free(iLBC_encinst); + return(0); +} + +int16_t WebRtcIlbcfix_DecoderFree(IlbcDecoderInstance *iLBC_decinst) { + free(iLBC_decinst); + return(0); +} + +int16_t WebRtcIlbcfix_EncoderInit(IlbcEncoderInstance* iLBCenc_inst, + int16_t mode) { + if ((mode==20)||(mode==30)) { + WebRtcIlbcfix_InitEncode((IlbcEncoder*) iLBCenc_inst, mode); + return(0); + } else { + return(-1); + } +} + +int WebRtcIlbcfix_Encode(IlbcEncoderInstance* iLBCenc_inst, + const int16_t* speechIn, + size_t len, + uint8_t* encoded) { + size_t pos = 0; + size_t encpos = 0; + + if ((len != ((IlbcEncoder*)iLBCenc_inst)->blockl) && +#ifdef SPLIT_10MS + (len != 80) && +#endif + (len != 2*((IlbcEncoder*)iLBCenc_inst)->blockl) && + (len != 3*((IlbcEncoder*)iLBCenc_inst)->blockl)) + { + /* A maximum of 3 frames/packet is allowed */ + return(-1); + } else { + + /* call encoder */ + while (pos<len) { + WebRtcIlbcfix_EncodeImpl((uint16_t*)&encoded[2 * encpos], &speechIn[pos], + (IlbcEncoder*)iLBCenc_inst); +#ifdef SPLIT_10MS + pos += 80; + if(((IlbcEncoder*)iLBCenc_inst)->section == 0) +#else + pos += ((IlbcEncoder*)iLBCenc_inst)->blockl; +#endif + encpos += ((IlbcEncoder*)iLBCenc_inst)->no_of_words; + } + return (int)(encpos*2); + } +} + +int16_t WebRtcIlbcfix_DecoderInit(IlbcDecoderInstance* iLBCdec_inst, + int16_t mode) { + if ((mode==20)||(mode==30)) { + WebRtcIlbcfix_InitDecode((IlbcDecoder*) iLBCdec_inst, mode, 1); + return(0); + } else { + return(-1); + } +} +void WebRtcIlbcfix_DecoderInit20Ms(IlbcDecoderInstance* iLBCdec_inst) { + WebRtcIlbcfix_InitDecode((IlbcDecoder*) iLBCdec_inst, 20, 1); +} +void WebRtcIlbcfix_Decoderinit30Ms(IlbcDecoderInstance* iLBCdec_inst) { + WebRtcIlbcfix_InitDecode((IlbcDecoder*) iLBCdec_inst, 30, 1); +} + + +int WebRtcIlbcfix_Decode(IlbcDecoderInstance* iLBCdec_inst, + const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType) +{ + size_t i=0; + /* Allow for automatic switching between the frame sizes + (although you do get some discontinuity) */ + if ((len==((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)|| + (len==2*((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)|| + (len==3*((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)) { + /* ok, do nothing */ + } else { + /* Test if the mode has changed */ + if (((IlbcDecoder*)iLBCdec_inst)->mode==20) { + if ((len==NO_OF_BYTES_30MS)|| + (len==2*NO_OF_BYTES_30MS)|| + (len==3*NO_OF_BYTES_30MS)) { + WebRtcIlbcfix_InitDecode( + ((IlbcDecoder*)iLBCdec_inst), 30, + ((IlbcDecoder*)iLBCdec_inst)->use_enhancer); + } else { + /* Unsupported frame length */ + return(-1); + } + } else { + if ((len==NO_OF_BYTES_20MS)|| + (len==2*NO_OF_BYTES_20MS)|| + (len==3*NO_OF_BYTES_20MS)) { + WebRtcIlbcfix_InitDecode( + ((IlbcDecoder*)iLBCdec_inst), 20, + ((IlbcDecoder*)iLBCdec_inst)->use_enhancer); + } else { + /* Unsupported frame length */ + return(-1); + } + } + } + + while ((i*((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)<len) { + if (WebRtcIlbcfix_DecodeImpl( + &decoded[i * ((IlbcDecoder*)iLBCdec_inst)->blockl], + (const uint16_t*)&encoded + [2 * i * ((IlbcDecoder*)iLBCdec_inst)->no_of_words], + (IlbcDecoder*)iLBCdec_inst, 1) == -1) + return -1; + i++; + } + /* iLBC does not support VAD/CNG yet */ + *speechType=1; + return (int)(i*((IlbcDecoder*)iLBCdec_inst)->blockl); +} + +int WebRtcIlbcfix_Decode20Ms(IlbcDecoderInstance* iLBCdec_inst, + const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType) +{ + size_t i=0; + if ((len==((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)|| + (len==2*((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)|| + (len==3*((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)) { + /* ok, do nothing */ + } else { + return(-1); + } + + while ((i*((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)<len) { + if (!WebRtcIlbcfix_DecodeImpl( + &decoded[i * ((IlbcDecoder*)iLBCdec_inst)->blockl], + (const uint16_t*)&encoded + [2 * i * ((IlbcDecoder*)iLBCdec_inst)->no_of_words], + (IlbcDecoder*)iLBCdec_inst, 1)) + return -1; + i++; + } + /* iLBC does not support VAD/CNG yet */ + *speechType=1; + return (int)(i*((IlbcDecoder*)iLBCdec_inst)->blockl); +} + +int WebRtcIlbcfix_Decode30Ms(IlbcDecoderInstance* iLBCdec_inst, + const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType) +{ + size_t i=0; + if ((len==((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)|| + (len==2*((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)|| + (len==3*((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)) { + /* ok, do nothing */ + } else { + return(-1); + } + + while ((i*((IlbcDecoder*)iLBCdec_inst)->no_of_bytes)<len) { + if (!WebRtcIlbcfix_DecodeImpl( + &decoded[i * ((IlbcDecoder*)iLBCdec_inst)->blockl], + (const uint16_t*)&encoded + [2 * i * ((IlbcDecoder*)iLBCdec_inst)->no_of_words], + (IlbcDecoder*)iLBCdec_inst, 1)) + return -1; + i++; + } + /* iLBC does not support VAD/CNG yet */ + *speechType=1; + return (int)(i*((IlbcDecoder*)iLBCdec_inst)->blockl); +} + +size_t WebRtcIlbcfix_DecodePlc(IlbcDecoderInstance* iLBCdec_inst, + int16_t* decoded, + size_t noOfLostFrames) { + size_t i; + uint16_t dummy; + + for (i=0;i<noOfLostFrames;i++) { + // PLC decoding shouldn't fail, because there is no external input data + // that can be bad. + int result = WebRtcIlbcfix_DecodeImpl( + &decoded[i * ((IlbcDecoder*)iLBCdec_inst)->blockl], &dummy, + (IlbcDecoder*)iLBCdec_inst, 0); + RTC_CHECK_EQ(result, 0); + } + return (noOfLostFrames*((IlbcDecoder*)iLBCdec_inst)->blockl); +} + +size_t WebRtcIlbcfix_NetEqPlc(IlbcDecoderInstance* iLBCdec_inst, + int16_t* decoded, + size_t noOfLostFrames) { + /* Two input parameters not used, but needed for function pointers in NetEQ */ + (void)(decoded = NULL); + (void)(noOfLostFrames = 0); + + WebRtcSpl_MemSetW16(((IlbcDecoder*)iLBCdec_inst)->enh_buf, 0, ENH_BUFL); + ((IlbcDecoder*)iLBCdec_inst)->prev_enh_pl = 2; + + return (0); +} + +void WebRtcIlbcfix_version(char *version) +{ + strcpy((char*)version, "1.1.1"); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/ilbc.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/ilbc.h new file mode 100644 index 0000000000..de8cfde111 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/ilbc.h @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * ilbc.h + * + * This header file contains all of the API's for iLBC. + * + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_ILBC_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_ILBC_H_ + +#include <stddef.h> +#include <stdint.h> + +/* + * Solution to support multiple instances + * Customer has to cast instance to proper type + */ + +typedef struct iLBC_encinst_t_ IlbcEncoderInstance; + +typedef struct iLBC_decinst_t_ IlbcDecoderInstance; + +/* + * Comfort noise constants + */ + +#define ILBC_SPEECH 1 +#define ILBC_CNG 2 + +#ifdef __cplusplus +extern "C" { +#endif + +/**************************************************************************** + * WebRtcIlbcfix_XxxAssign(...) + * + * These functions assigns the encoder/decoder instance to the specified + * memory location + * + * Input: + * - XXX_xxxinst : Pointer to created instance that should be + * assigned + * - ILBCXXX_inst_Addr : Pointer to the desired memory space + * - size : The size that this structure occupies (in Word16) + * + * Return value : 0 - Ok + * -1 - Error + */ + +int16_t WebRtcIlbcfix_EncoderAssign(IlbcEncoderInstance** iLBC_encinst, + int16_t* ILBCENC_inst_Addr, + int16_t* size); +int16_t WebRtcIlbcfix_DecoderAssign(IlbcDecoderInstance** iLBC_decinst, + int16_t* ILBCDEC_inst_Addr, + int16_t* size); + +/**************************************************************************** + * WebRtcIlbcfix_XxxAssign(...) + * + * These functions create a instance to the specified structure + * + * Input: + * - XXX_inst : Pointer to created instance that should be created + * + * Return value : 0 - Ok + * -1 - Error + */ + +int16_t WebRtcIlbcfix_EncoderCreate(IlbcEncoderInstance** iLBC_encinst); +int16_t WebRtcIlbcfix_DecoderCreate(IlbcDecoderInstance** iLBC_decinst); + +/**************************************************************************** + * WebRtcIlbcfix_XxxFree(...) + * + * These functions frees the dynamic memory of a specified instance + * + * Input: + * - XXX_inst : Pointer to created instance that should be freed + * + * Return value : 0 - Ok + * -1 - Error + */ + +int16_t WebRtcIlbcfix_EncoderFree(IlbcEncoderInstance* iLBC_encinst); +int16_t WebRtcIlbcfix_DecoderFree(IlbcDecoderInstance* iLBC_decinst); + +/**************************************************************************** + * WebRtcIlbcfix_EncoderInit(...) + * + * This function initializes a iLBC instance + * + * Input: + * - iLBCenc_inst : iLBC instance, i.e. the user that should receive + * be initialized + * - frameLen : The frame length of the codec 20/30 (ms) + * + * Return value : 0 - Ok + * -1 - Error + */ + +int16_t WebRtcIlbcfix_EncoderInit(IlbcEncoderInstance* iLBCenc_inst, + int16_t frameLen); + +/**************************************************************************** + * WebRtcIlbcfix_Encode(...) + * + * This function encodes one iLBC frame. Input speech length has be a + * multiple of the frame length. + * + * Input: + * - iLBCenc_inst : iLBC instance, i.e. the user that should encode + * a package + * - speechIn : Input speech vector + * - len : Samples in speechIn (160, 240, 320 or 480) + * + * Output: + * - encoded : The encoded data vector + * + * Return value : >0 - Length (in bytes) of coded data + * -1 - Error + */ + +int WebRtcIlbcfix_Encode(IlbcEncoderInstance* iLBCenc_inst, + const int16_t* speechIn, + size_t len, + uint8_t* encoded); + +/**************************************************************************** + * WebRtcIlbcfix_DecoderInit(...) + * + * This function initializes a iLBC instance with either 20 or 30 ms frames + * Alternatively the WebRtcIlbcfix_DecoderInit_XXms can be used. Then it's + * not needed to specify the frame length with a variable. + * + * Input: + * - IlbcDecoderInstance : iLBC decoder instance + * - frameLen : The frame length of the codec 20/30 (ms) + * + * Return value : 0 - Ok + * -1 - Error + */ + +int16_t WebRtcIlbcfix_DecoderInit(IlbcDecoderInstance* iLBCdec_inst, + int16_t frameLen); +void WebRtcIlbcfix_DecoderInit20Ms(IlbcDecoderInstance* iLBCdec_inst); +void WebRtcIlbcfix_Decoderinit30Ms(IlbcDecoderInstance* iLBCdec_inst); + +/**************************************************************************** + * WebRtcIlbcfix_Decode(...) + * + * This function decodes a packet with iLBC frame(s). Output speech length + * will be a multiple of 160 or 240 samples ((160 or 240)*frames/packet). + * + * Input: + * - iLBCdec_inst : iLBC instance, i.e. the user that should decode + * a packet + * - encoded : Encoded iLBC frame(s) + * - len : Bytes in encoded vector + * + * Output: + * - decoded : The decoded vector + * - speechType : 1 normal, 2 CNG + * + * Return value : >0 - Samples in decoded vector + * -1 - Error + */ + +int WebRtcIlbcfix_Decode(IlbcDecoderInstance* iLBCdec_inst, + const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType); +int WebRtcIlbcfix_Decode20Ms(IlbcDecoderInstance* iLBCdec_inst, + const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType); +int WebRtcIlbcfix_Decode30Ms(IlbcDecoderInstance* iLBCdec_inst, + const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType); + +/**************************************************************************** + * WebRtcIlbcfix_DecodePlc(...) + * + * This function conducts PLC for iLBC frame(s). Output speech length + * will be a multiple of 160 or 240 samples. + * + * Input: + * - iLBCdec_inst : iLBC instance, i.e. the user that should perform + * a PLC + * - noOfLostFrames : Number of PLC frames to produce + * + * Output: + * - decoded : The "decoded" vector + * + * Return value : Samples in decoded PLC vector + */ + +size_t WebRtcIlbcfix_DecodePlc(IlbcDecoderInstance* iLBCdec_inst, + int16_t* decoded, + size_t noOfLostFrames); + +/**************************************************************************** + * WebRtcIlbcfix_NetEqPlc(...) + * + * This function updates the decoder when a packet loss has occured, but it + * does not produce any PLC data. Function can be used if another PLC method + * is used (i.e NetEq). + * + * Input: + * - iLBCdec_inst : iLBC instance that should be updated + * - noOfLostFrames : Number of lost frames + * + * Output: + * - decoded : The "decoded" vector (nothing in this case) + * + * Return value : Samples in decoded PLC vector + */ + +size_t WebRtcIlbcfix_NetEqPlc(IlbcDecoderInstance* iLBCdec_inst, + int16_t* decoded, + size_t noOfLostFrames); + +/**************************************************************************** + * WebRtcIlbcfix_version(...) + * + * This function returns the version number of iLBC + * + * Output: + * - version : Version number of iLBC (maximum 20 char) + */ + +void WebRtcIlbcfix_version(char* version); + +#ifdef __cplusplus +} +#endif + +#endif // MODULES_AUDIO_CODING_CODECS_ILBC_ILBC_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/ilbc_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/ilbc_unittest.cc new file mode 100644 index 0000000000..689292f131 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/ilbc_unittest.cc @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.h" +#include "modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.h" +#include "modules/audio_coding/codecs/legacy_encoded_audio_frame.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(IlbcTest, BadPacket) { + // Get a good packet. + AudioEncoderIlbcConfig config; + config.frame_size_ms = 20; // We need 20 ms rather than the default 30 ms; + // otherwise, all possible values of cb_index[2] + // are valid. + AudioEncoderIlbcImpl encoder(config, 102); + std::vector<int16_t> samples(encoder.SampleRateHz() / 100, 4711); + rtc::Buffer packet; + int num_10ms_chunks = 0; + while (packet.size() == 0) { + encoder.Encode(0, samples, &packet); + num_10ms_chunks += 1; + } + + // Break the packet by setting all bits of the unsigned 7-bit number + // cb_index[2] to 1, giving it a value of 127. For a 20 ms packet, this is + // too large. + EXPECT_EQ(38u, packet.size()); + rtc::Buffer bad_packet(packet.data(), packet.size()); + bad_packet[29] |= 0x3f; // Bits 1-6. + bad_packet[30] |= 0x80; // Bit 0. + + // Decode the bad packet. We expect the decoder to respond by returning -1. + AudioDecoderIlbcImpl decoder; + std::vector<int16_t> decoded_samples(num_10ms_chunks * samples.size()); + AudioDecoder::SpeechType speech_type; + EXPECT_EQ(-1, decoder.Decode(bad_packet.data(), bad_packet.size(), + encoder.SampleRateHz(), + sizeof(int16_t) * decoded_samples.size(), + decoded_samples.data(), &speech_type)); + + // Decode the good packet. This should work, because the failed decoding + // should not have left the decoder in a broken state. + EXPECT_EQ(static_cast<int>(decoded_samples.size()), + decoder.Decode(packet.data(), packet.size(), encoder.SampleRateHz(), + sizeof(int16_t) * decoded_samples.size(), + decoded_samples.data(), &speech_type)); +} + +class SplitIlbcTest : public ::testing::TestWithParam<std::pair<int, int> > { + protected: + virtual void SetUp() { + const std::pair<int, int> parameters = GetParam(); + num_frames_ = parameters.first; + frame_length_ms_ = parameters.second; + frame_length_bytes_ = (frame_length_ms_ == 20) ? 38 : 50; + } + size_t num_frames_; + int frame_length_ms_; + size_t frame_length_bytes_; +}; + +TEST_P(SplitIlbcTest, NumFrames) { + AudioDecoderIlbcImpl decoder; + const size_t frame_length_samples = frame_length_ms_ * 8; + const auto generate_payload = [](size_t payload_length_bytes) { + rtc::Buffer payload(payload_length_bytes); + // Fill payload with increasing integers {0, 1, 2, ...}. + for (size_t i = 0; i < payload.size(); ++i) { + payload[i] = static_cast<uint8_t>(i); + } + return payload; + }; + + const auto results = decoder.ParsePayload( + generate_payload(frame_length_bytes_ * num_frames_), 0); + EXPECT_EQ(num_frames_, results.size()); + + size_t frame_num = 0; + uint8_t payload_value = 0; + for (const auto& result : results) { + EXPECT_EQ(frame_length_samples * frame_num, result.timestamp); + const LegacyEncodedAudioFrame* frame = + static_cast<const LegacyEncodedAudioFrame*>(result.frame.get()); + const rtc::Buffer& payload = frame->payload(); + EXPECT_EQ(frame_length_bytes_, payload.size()); + for (size_t i = 0; i < payload.size(); ++i, ++payload_value) { + EXPECT_EQ(payload_value, payload[i]); + } + ++frame_num; + } +} + +// Test 1 through 5 frames of 20 and 30 ms size. +// Also test the maximum number of frames in one packet for 20 and 30 ms. +// The maximum is defined by the largest payload length that can be uniquely +// resolved to a frame size of either 38 bytes (20 ms) or 50 bytes (30 ms). +INSTANTIATE_TEST_SUITE_P( + IlbcTest, + SplitIlbcTest, + ::testing::Values(std::pair<int, int>(1, 20), // 1 frame, 20 ms. + std::pair<int, int>(2, 20), // 2 frames, 20 ms. + std::pair<int, int>(3, 20), // And so on. + std::pair<int, int>(4, 20), + std::pair<int, int>(5, 20), + std::pair<int, int>(24, 20), + std::pair<int, int>(1, 30), + std::pair<int, int>(2, 30), + std::pair<int, int>(3, 30), + std::pair<int, int>(4, 30), + std::pair<int, int>(5, 30), + std::pair<int, int>(18, 30))); + +// Test too large payload size. +TEST(IlbcTest, SplitTooLargePayload) { + AudioDecoderIlbcImpl decoder; + constexpr size_t kPayloadLengthBytes = 950; + const auto results = + decoder.ParsePayload(rtc::Buffer(kPayloadLengthBytes), 0); + EXPECT_TRUE(results.empty()); +} + +// Payload not an integer number of frames. +TEST(IlbcTest, SplitUnevenPayload) { + AudioDecoderIlbcImpl decoder; + constexpr size_t kPayloadLengthBytes = 39; // Not an even number of frames. + const auto results = + decoder.ParsePayload(rtc::Buffer(kPayloadLengthBytes), 0); + EXPECT_TRUE(results.empty()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_dec.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_dec.c new file mode 100644 index 0000000000..d78f81a897 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_dec.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_IndexConvDec.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/index_conv_dec.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +void WebRtcIlbcfix_IndexConvDec( + int16_t *index /* (i/o) Codebook indexes */ + ){ + int k; + + for (k=4;k<6;k++) { + /* Readjust the second and third codebook index for the first 40 sample + so that they look the same as the first (in terms of lag) + */ + if ((index[k]>=44)&&(index[k]<108)) { + index[k]+=64; + } else if ((index[k]>=108)&&(index[k]<128)) { + index[k]+=128; + } else { + /* ERROR */ + } + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_dec.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_dec.h new file mode 100644 index 0000000000..4f08ce04df --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_dec.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_IndexConvDec.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INDEX_CONV_DEC_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INDEX_CONV_DEC_H_ + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +void WebRtcIlbcfix_IndexConvDec(int16_t* index /* (i/o) Codebook indexes */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_enc.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_enc.c new file mode 100644 index 0000000000..83144150b4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_enc.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + IiLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_IndexConvEnc.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/index_conv_enc.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Convert the codebook indexes to make the search easier + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_IndexConvEnc( + int16_t *index /* (i/o) Codebook indexes */ + ){ + int k; + + for (k=4;k<6;k++) { + /* Readjust the second and third codebook index so that it is + packetized into 7 bits (before it was put in lag-wise the same + way as for the first codebook which uses 8 bits) + */ + if ((index[k]>=108)&&(index[k]<172)) { + index[k]-=64; + } else if (index[k]>=236) { + index[k]-=128; + } else { + /* ERROR */ + } + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_enc.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_enc.h new file mode 100644 index 0000000000..4fbf98084e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/index_conv_enc.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_IndexConvEnc.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INDEX_CONV_ENC_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INDEX_CONV_ENC_H_ + +#include <stdint.h> + +/*----------------------------------------------------------------* + * Convert the codebook indexes to make the search easier + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_IndexConvEnc(int16_t* index /* (i/o) Codebook indexes */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_decode.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_decode.c new file mode 100644 index 0000000000..3eb41e33b0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_decode.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_InitDecode.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/init_decode.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Initiation of decoder instance. + *---------------------------------------------------------------*/ + +int WebRtcIlbcfix_InitDecode( /* (o) Number of decoded samples */ + IlbcDecoder *iLBCdec_inst, /* (i/o) Decoder instance */ + int16_t mode, /* (i) frame size mode */ + int use_enhancer) { /* (i) 1: use enhancer, 0: no enhancer */ + int i; + + iLBCdec_inst->mode = mode; + + /* Set all the variables that are dependent on the frame size mode */ + if (mode==30) { + iLBCdec_inst->blockl = BLOCKL_30MS; + iLBCdec_inst->nsub = NSUB_30MS; + iLBCdec_inst->nasub = NASUB_30MS; + iLBCdec_inst->lpc_n = LPC_N_30MS; + iLBCdec_inst->no_of_bytes = NO_OF_BYTES_30MS; + iLBCdec_inst->no_of_words = NO_OF_WORDS_30MS; + iLBCdec_inst->state_short_len=STATE_SHORT_LEN_30MS; + } + else if (mode==20) { + iLBCdec_inst->blockl = BLOCKL_20MS; + iLBCdec_inst->nsub = NSUB_20MS; + iLBCdec_inst->nasub = NASUB_20MS; + iLBCdec_inst->lpc_n = LPC_N_20MS; + iLBCdec_inst->no_of_bytes = NO_OF_BYTES_20MS; + iLBCdec_inst->no_of_words = NO_OF_WORDS_20MS; + iLBCdec_inst->state_short_len=STATE_SHORT_LEN_20MS; + } + else { + return(-1); + } + + /* Reset all the previous LSF to mean LSF */ + WEBRTC_SPL_MEMCPY_W16(iLBCdec_inst->lsfdeqold, WebRtcIlbcfix_kLsfMean, LPC_FILTERORDER); + + /* Clear the synthesis filter memory */ + WebRtcSpl_MemSetW16(iLBCdec_inst->syntMem, 0, LPC_FILTERORDER); + + /* Set the old synthesis filter to {1.0 0.0 ... 0.0} */ + WebRtcSpl_MemSetW16(iLBCdec_inst->old_syntdenum, 0, ((LPC_FILTERORDER + 1)*NSUB_MAX)); + for (i=0; i<NSUB_MAX; i++) { + iLBCdec_inst->old_syntdenum[i*(LPC_FILTERORDER+1)] = 4096; + } + + /* Clear the variables that are used for the PLC */ + iLBCdec_inst->last_lag = 20; + iLBCdec_inst->consPLICount = 0; + iLBCdec_inst->prevPLI = 0; + iLBCdec_inst->perSquare = 0; + iLBCdec_inst->prevLag = 120; + iLBCdec_inst->prevLpc[0] = 4096; + WebRtcSpl_MemSetW16(iLBCdec_inst->prevLpc+1, 0, LPC_FILTERORDER); + WebRtcSpl_MemSetW16(iLBCdec_inst->prevResidual, 0, BLOCKL_MAX); + + /* Initialize the seed for the random number generator */ + iLBCdec_inst->seed = 777; + + /* Set the filter state of the HP filter to 0 */ + WebRtcSpl_MemSetW16(iLBCdec_inst->hpimemx, 0, 2); + WebRtcSpl_MemSetW16(iLBCdec_inst->hpimemy, 0, 4); + + /* Set the variables that are used in the ehnahcer */ + iLBCdec_inst->use_enhancer = use_enhancer; + WebRtcSpl_MemSetW16(iLBCdec_inst->enh_buf, 0, (ENH_BUFL+ENH_BUFL_FILTEROVERHEAD)); + for (i=0;i<ENH_NBLOCKS_TOT;i++) { + iLBCdec_inst->enh_period[i]=160; /* Q(-4) */ + } + + iLBCdec_inst->prev_enh_pl = 0; + + return (int)(iLBCdec_inst->blockl); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_decode.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_decode.h new file mode 100644 index 0000000000..a2b7b91287 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_decode.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_InitDecode.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INIT_DECODE_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INIT_DECODE_H_ + +#include <stdint.h> + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Initiation of decoder instance. + *---------------------------------------------------------------*/ + +int WebRtcIlbcfix_InitDecode(/* (o) Number of decoded samples */ + IlbcDecoder* + iLBCdec_inst, /* (i/o) Decoder instance */ + int16_t mode, /* (i) frame size mode */ + int use_enhancer /* (i) 1 to use enhancer + 0 to run without enhancer */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_encode.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_encode.c new file mode 100644 index 0000000000..aa858e94bb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_encode.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_InitEncode.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/init_encode.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Initiation of encoder instance. + *---------------------------------------------------------------*/ + +int WebRtcIlbcfix_InitEncode( /* (o) Number of bytes encoded */ + IlbcEncoder *iLBCenc_inst, /* (i/o) Encoder instance */ + int16_t mode) { /* (i) frame size mode */ + iLBCenc_inst->mode = mode; + + /* Set all the variables that are dependent on the frame size mode */ + if (mode==30) { + iLBCenc_inst->blockl = BLOCKL_30MS; + iLBCenc_inst->nsub = NSUB_30MS; + iLBCenc_inst->nasub = NASUB_30MS; + iLBCenc_inst->lpc_n = LPC_N_30MS; + iLBCenc_inst->no_of_bytes = NO_OF_BYTES_30MS; + iLBCenc_inst->no_of_words = NO_OF_WORDS_30MS; + iLBCenc_inst->state_short_len=STATE_SHORT_LEN_30MS; + } + else if (mode==20) { + iLBCenc_inst->blockl = BLOCKL_20MS; + iLBCenc_inst->nsub = NSUB_20MS; + iLBCenc_inst->nasub = NASUB_20MS; + iLBCenc_inst->lpc_n = LPC_N_20MS; + iLBCenc_inst->no_of_bytes = NO_OF_BYTES_20MS; + iLBCenc_inst->no_of_words = NO_OF_WORDS_20MS; + iLBCenc_inst->state_short_len=STATE_SHORT_LEN_20MS; + } + else { + return(-1); + } + + /* Clear the buffers and set the previous LSF and LSP to the mean value */ + WebRtcSpl_MemSetW16(iLBCenc_inst->anaMem, 0, LPC_FILTERORDER); + WEBRTC_SPL_MEMCPY_W16(iLBCenc_inst->lsfold, WebRtcIlbcfix_kLsfMean, LPC_FILTERORDER); + WEBRTC_SPL_MEMCPY_W16(iLBCenc_inst->lsfdeqold, WebRtcIlbcfix_kLsfMean, LPC_FILTERORDER); + WebRtcSpl_MemSetW16(iLBCenc_inst->lpc_buffer, 0, LPC_LOOKBACK + BLOCKL_MAX); + + /* Set the filter state of the HP filter to 0 */ + WebRtcSpl_MemSetW16(iLBCenc_inst->hpimemx, 0, 2); + WebRtcSpl_MemSetW16(iLBCenc_inst->hpimemy, 0, 4); + +#ifdef SPLIT_10MS + /*Zeroing the past samples for 10msec Split*/ + WebRtcSpl_MemSetW16(iLBCenc_inst->past_samples,0,160); + iLBCenc_inst->section = 0; +#endif + + return (int)(iLBCenc_inst->no_of_bytes); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_encode.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_encode.h new file mode 100644 index 0000000000..4ada6a30c8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/init_encode.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_InitEncode.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INIT_ENCODE_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INIT_ENCODE_H_ + +#include <stdint.h> + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * Initiation of encoder instance. + *---------------------------------------------------------------*/ + +int WebRtcIlbcfix_InitEncode(/* (o) Number of bytes encoded */ + IlbcEncoder* + iLBCenc_inst, /* (i/o) Encoder instance */ + int16_t mode /* (i) frame size mode */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate.c new file mode 100644 index 0000000000..17ed244bd4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Interpolate.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/interpolate.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * interpolation between vectors + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Interpolate( + int16_t *out, /* (o) output vector */ + int16_t *in1, /* (i) first input vector */ + int16_t *in2, /* (i) second input vector */ + int16_t coef, /* (i) weight coefficient in Q14 */ + int16_t length) /* (i) number of sample is vectors */ +{ + int i; + int16_t invcoef; + + /* + Performs the operation out[i] = in[i]*coef + (1-coef)*in2[i] (with rounding) + */ + + invcoef = 16384 - coef; /* 16384 = 1.0 (Q14)*/ + for (i = 0; i < length; i++) { + out[i] = (int16_t)((coef * in1[i] + invcoef * in2[i] + 8192) >> 14); + } + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate.h new file mode 100644 index 0000000000..892082b75c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Interpolate.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INTERPOLATE_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INTERPOLATE_H_ + +#include <stdint.h> + +/*----------------------------------------------------------------* + * interpolation between vectors + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Interpolate( + int16_t* out, /* (o) output vector */ + int16_t* in1, /* (i) first input vector */ + int16_t* in2, /* (i) second input vector */ + int16_t coef, /* (i) weight coefficient in Q14 */ + int16_t length); /* (i) number of sample is vectors */ + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate_samples.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate_samples.c new file mode 100644 index 0000000000..6dddd6fb86 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate_samples.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_InterpolateSamples.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/interpolate_samples.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +void WebRtcIlbcfix_InterpolateSamples( + int16_t *interpSamples, /* (o) The interpolated samples */ + int16_t *CBmem, /* (i) The CB memory */ + size_t lMem /* (i) Length of the CB memory */ + ) { + int16_t *ppi, *ppo, i, j, temp1, temp2; + int16_t *tmpPtr; + + /* Calculate the 20 vectors of interpolated samples (4 samples each) + that are used in the codebooks for lag 20 to 39 */ + tmpPtr = interpSamples; + for (j=0; j<20; j++) { + temp1 = 0; + temp2 = 3; + ppo = CBmem+lMem-4; + ppi = CBmem+lMem-j-24; + for (i=0; i<4; i++) { + + *tmpPtr++ = (int16_t)((WebRtcIlbcfix_kAlpha[temp2] * *ppo) >> 15) + + (int16_t)((WebRtcIlbcfix_kAlpha[temp1] * *ppi) >> 15); + + ppo++; + ppi++; + temp1++; + temp2--; + } + } + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate_samples.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate_samples.h new file mode 100644 index 0000000000..bc665d7854 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/interpolate_samples.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_InterpolateSamples.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INTERPOLATE_SAMPLES_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_INTERPOLATE_SAMPLES_H_ + +#include <stddef.h> +#include <stdint.h> + +/*----------------------------------------------------------------* + * Construct the interpolated samples for the Augmented CB + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_InterpolateSamples( + int16_t* interpSamples, /* (o) The interpolated samples */ + int16_t* CBmem, /* (i) The CB memory */ + size_t lMem /* (i) Length of the CB memory */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lpc_encode.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lpc_encode.c new file mode 100644 index 0000000000..89f6d29724 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lpc_encode.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_LpcEncode.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/lpc_encode.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/lsf_check.h" +#include "modules/audio_coding/codecs/ilbc/simple_interpolate_lsf.h" +#include "modules/audio_coding/codecs/ilbc/simple_lpc_analysis.h" +#include "modules/audio_coding/codecs/ilbc/simple_lsf_quant.h" + +/*----------------------------------------------------------------* + * lpc encoder + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_LpcEncode( + int16_t *syntdenum, /* (i/o) synthesis filter coefficients + before/after encoding */ + int16_t *weightdenum, /* (i/o) weighting denumerator coefficients + before/after encoding */ + int16_t *lsf_index, /* (o) lsf quantization index */ + int16_t *data, /* (i) Speech to do LPC analysis on */ + IlbcEncoder *iLBCenc_inst + /* (i/o) the encoder state structure */ + ) { + /* Stack based */ + int16_t lsf[LPC_FILTERORDER * LPC_N_MAX]; + int16_t lsfdeq[LPC_FILTERORDER * LPC_N_MAX]; + + /* Calculate LSF's from the input speech */ + WebRtcIlbcfix_SimpleLpcAnalysis(lsf, data, iLBCenc_inst); + + /* Quantize the LSF's */ + WebRtcIlbcfix_SimpleLsfQ(lsfdeq, lsf_index, lsf, iLBCenc_inst->lpc_n); + + /* Stableize the LSF's if needed */ + WebRtcIlbcfix_LsfCheck(lsfdeq, LPC_FILTERORDER, iLBCenc_inst->lpc_n); + + /* Calculate the synthesis and weighting filter coefficients from + the optimal LSF and the dequantized LSF */ + WebRtcIlbcfix_SimpleInterpolateLsf(syntdenum, weightdenum, + lsf, lsfdeq, iLBCenc_inst->lsfold, + iLBCenc_inst->lsfdeqold, LPC_FILTERORDER, iLBCenc_inst); + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lpc_encode.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lpc_encode.h new file mode 100644 index 0000000000..a67b77acbf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lpc_encode.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_LpcEncode.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LPC_ENCODE_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LPC_ENCODE_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * lpc encoder + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_LpcEncode( + int16_t* syntdenum, /* (i/o) synthesis filter coefficients + before/after encoding */ + int16_t* weightdenum, /* (i/o) weighting denumerator coefficients + before/after encoding */ + int16_t* lsf_index, /* (o) lsf quantization index */ + int16_t* data, /* (i) Speech to do LPC analysis on */ + IlbcEncoder* iLBCenc_inst + /* (i/o) the encoder state structure */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_check.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_check.c new file mode 100644 index 0000000000..9f0e19a2d9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_check.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_LsfCheck.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/lsf_check.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * check for stability of lsf coefficients + *---------------------------------------------------------------*/ + +int WebRtcIlbcfix_LsfCheck( + int16_t *lsf, /* LSF parameters */ + int dim, /* dimension of LSF */ + int NoAn) /* No of analysis per frame */ +{ + int k,n,m, Nit=2, change=0,pos; + const int16_t eps=319; /* 0.039 in Q13 (50 Hz)*/ + const int16_t eps2=160; /* eps/2.0 in Q13;*/ + const int16_t maxlsf=25723; /* 3.14; (4000 Hz)*/ + const int16_t minlsf=82; /* 0.01; (0 Hz)*/ + + /* LSF separation check*/ + for (n=0;n<Nit;n++) { /* Run through a 2 times */ + for (m=0;m<NoAn;m++) { /* Number of analyses per frame */ + for (k=0;k<(dim-1);k++) { + pos=m*dim+k; + + /* Seperate coefficients with a safety margin of 50 Hz */ + if ((lsf[pos+1]-lsf[pos])<eps) { + + if (lsf[pos+1]<lsf[pos]) { + lsf[pos+1]= lsf[pos]+eps2; + lsf[pos]= lsf[pos+1]-eps2; + } else { + lsf[pos]-=eps2; + lsf[pos+1]+=eps2; + } + change=1; + } + + /* Limit minimum and maximum LSF */ + if (lsf[pos]<minlsf) { + lsf[pos]=minlsf; + change=1; + } + + if (lsf[pos]>maxlsf) { + lsf[pos]=maxlsf; + change=1; + } + } + } + } + + return change; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_check.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_check.h new file mode 100644 index 0000000000..9ba90a31e6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_check.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_LsfCheck.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSF_CHECK_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSF_CHECK_H_ + +#include <stdint.h> + +/*----------------------------------------------------------------* + * check for stability of lsf coefficients + *---------------------------------------------------------------*/ + +int WebRtcIlbcfix_LsfCheck(int16_t* lsf, /* LSF parameters */ + int dim, /* dimension of LSF */ + int NoAn); /* No of analysis per frame */ + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_dec.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_dec.c new file mode 100644 index 0000000000..04de5e7e6c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_dec.c @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_LspInterpolate2PolyDec.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_dec.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/interpolate.h" +#include "modules/audio_coding/codecs/ilbc/lsf_to_poly.h" + +/*----------------------------------------------------------------* + * interpolation of lsf coefficients for the decoder + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_LspInterpolate2PolyDec( + int16_t *a, /* (o) lpc coefficients Q12 */ + int16_t *lsf1, /* (i) first set of lsf coefficients Q13 */ + int16_t *lsf2, /* (i) second set of lsf coefficients Q13 */ + int16_t coef, /* (i) weighting coefficient to use between + lsf1 and lsf2 Q14 */ + int16_t length /* (i) length of coefficient vectors */ + ){ + int16_t lsftmp[LPC_FILTERORDER]; + + /* interpolate LSF */ + WebRtcIlbcfix_Interpolate(lsftmp, lsf1, lsf2, coef, length); + + /* Compute the filter coefficients from the LSF */ + WebRtcIlbcfix_Lsf2Poly(a, lsftmp); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_dec.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_dec.h new file mode 100644 index 0000000000..6cc9d9746d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_dec.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_LspInterpolate2PolyDec.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSF_INTERPOLATE_TO_POLY_DEC_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSF_INTERPOLATE_TO_POLY_DEC_H_ + +#include <stdint.h> + +/*----------------------------------------------------------------* + * interpolation of lsf coefficients for the decoder + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_LspInterpolate2PolyDec( + int16_t* a, /* (o) lpc coefficients Q12 */ + int16_t* lsf1, /* (i) first set of lsf coefficients Q13 */ + int16_t* lsf2, /* (i) second set of lsf coefficients Q13 */ + int16_t coef, /* (i) weighting coefficient to use between + lsf1 and lsf2 Q14 */ + int16_t length /* (i) length of coefficient vectors */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_enc.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_enc.c new file mode 100644 index 0000000000..618821216c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_enc.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_LsfInterpolate2PloyEnc.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_enc.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/interpolate.h" +#include "modules/audio_coding/codecs/ilbc/lsf_to_poly.h" + +/*----------------------------------------------------------------* + * lsf interpolator and conversion from lsf to a coefficients + * (subrutine to SimpleInterpolateLSF) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_LsfInterpolate2PloyEnc( + int16_t *a, /* (o) lpc coefficients Q12 */ + int16_t *lsf1, /* (i) first set of lsf coefficients Q13 */ + int16_t *lsf2, /* (i) second set of lsf coefficients Q13 */ + int16_t coef, /* (i) weighting coefficient to use between + lsf1 and lsf2 Q14 */ + int16_t length /* (i) length of coefficient vectors */ + ) { + /* Stack based */ + int16_t lsftmp[LPC_FILTERORDER]; + + /* interpolate LSF */ + WebRtcIlbcfix_Interpolate(lsftmp, lsf1, lsf2, coef, length); + + /* Compute the filter coefficients from the LSF */ + WebRtcIlbcfix_Lsf2Poly(a, lsftmp); + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_enc.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_enc.h new file mode 100644 index 0000000000..b278a10f4b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_enc.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_LsfInterpolate2PloyEnc.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSF_INTERPOLATE_TO_POLY_ENC_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSF_INTERPOLATE_TO_POLY_ENC_H_ + +#include <stdint.h> + +/*----------------------------------------------------------------* + * lsf interpolator and conversion from lsf to a coefficients + * (subrutine to SimpleInterpolateLSF) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_LsfInterpolate2PloyEnc( + int16_t* a, /* (o) lpc coefficients Q12 */ + int16_t* lsf1, /* (i) first set of lsf coefficients Q13 */ + int16_t* lsf2, /* (i) second set of lsf coefficients Q13 */ + int16_t coef, /* (i) weighting coefficient to use between + lsf1 and lsf2 Q14 */ + int16_t length /* (i) length of coefficient vectors */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_lsp.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_lsp.c new file mode 100644 index 0000000000..ee8292f394 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_lsp.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Lsf2Lsp.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/lsf_to_lsp.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * conversion from lsf to lsp coefficients + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Lsf2Lsp( + int16_t *lsf, /* (i) lsf in Q13 values between 0 and pi */ + int16_t *lsp, /* (o) lsp in Q15 values between -1 and 1 */ + int16_t m /* (i) number of coefficients */ + ) { + int16_t i, k; + int16_t diff; /* difference, which is used for the + linear approximation (Q8) */ + int16_t freq; /* normalized frequency in Q15 (0..1) */ + int32_t tmpW32; + + for(i=0; i<m; i++) + { + freq = (int16_t)((lsf[i] * 20861) >> 15); + /* 20861: 1.0/(2.0*PI) in Q17 */ + /* + Upper 8 bits give the index k and + Lower 8 bits give the difference, which needs + to be approximated linearly + */ + k = freq >> 8; + diff = (freq&0x00ff); + + /* Guard against getting outside table */ + + if (k>63) { + k = 63; + } + + /* Calculate linear approximation */ + tmpW32 = WebRtcIlbcfix_kCosDerivative[k] * diff; + lsp[i] = WebRtcIlbcfix_kCos[k] + (int16_t)(tmpW32 >> 12); + } + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_lsp.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_lsp.h new file mode 100644 index 0000000000..6bc6c44dbd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_lsp.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Lsf2Lsp.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSF_TO_LSP_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSF_TO_LSP_H_ + +#include <stdint.h> + +/*----------------------------------------------------------------* + * conversion from lsf to lsp coefficients + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Lsf2Lsp( + int16_t* lsf, /* (i) lsf in Q13 values between 0 and pi */ + int16_t* lsp, /* (o) lsp in Q15 values between -1 and 1 */ + int16_t m /* (i) number of coefficients */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_poly.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_poly.c new file mode 100644 index 0000000000..8ca91d82f8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_poly.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Lsf2Poly.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/lsf_to_poly.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/get_lsp_poly.h" +#include "modules/audio_coding/codecs/ilbc/lsf_to_lsp.h" + +void WebRtcIlbcfix_Lsf2Poly( + int16_t *a, /* (o) predictor coefficients (order = 10) in Q12 */ + int16_t *lsf /* (i) line spectral frequencies in Q13 */ + ) { + int32_t f[2][6]; /* f[0][] and f[1][] corresponds to + F1(z) and F2(z) respectivly */ + int32_t *f1ptr, *f2ptr; + int16_t *a1ptr, *a2ptr; + int32_t tmpW32; + int16_t lsp[10]; + int i; + + /* Convert lsf to lsp */ + WebRtcIlbcfix_Lsf2Lsp(lsf, lsp, LPC_FILTERORDER); + + /* Get F1(z) and F2(z) from the lsp */ + f1ptr=f[0]; + f2ptr=f[1]; + WebRtcIlbcfix_GetLspPoly(&lsp[0],f1ptr); + WebRtcIlbcfix_GetLspPoly(&lsp[1],f2ptr); + + /* for i = 5 down to 1 + Compute f1[i] += f1[i-1]; + and f2[i] += f2[i-1]; + */ + f1ptr=&f[0][5]; + f2ptr=&f[1][5]; + for (i=5; i>0; i--) + { + (*f1ptr) += (*(f1ptr-1)); + (*f2ptr) -= (*(f2ptr-1)); + f1ptr--; + f2ptr--; + } + + /* Get the A(z) coefficients + a[0] = 1.0 + for i = 1 to 5 + a[i] = (f1[i] + f2[i] + round)>>13; + for i = 1 to 5 + a[11-i] = (f1[i] - f2[i] + round)>>13; + */ + a[0]=4096; + a1ptr=&a[1]; + a2ptr=&a[10]; + f1ptr=&f[0][1]; + f2ptr=&f[1][1]; + for (i=5; i>0; i--) + { + tmpW32 = (*f1ptr) + (*f2ptr); + *a1ptr = (int16_t)((tmpW32 + 4096) >> 13); + + tmpW32 = (*f1ptr) - (*f2ptr); + *a2ptr = (int16_t)((tmpW32 + 4096) >> 13); + + a1ptr++; + a2ptr--; + f1ptr++; + f2ptr++; + } + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_poly.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_poly.h new file mode 100644 index 0000000000..f26d3a8d2d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsf_to_poly.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Lsf2Poly.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSF_TO_POLY_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSF_TO_POLY_H_ + +#include <stdint.h> + +/*----------------------------------------------------------------* + * Convert from LSF coefficients to A coefficients + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Lsf2Poly( + int16_t* a, /* (o) predictor coefficients (order = 10) in Q12 */ + int16_t* lsf /* (i) line spectral frequencies in Q13 */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsp_to_lsf.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsp_to_lsf.c new file mode 100644 index 0000000000..227f4d45b4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsp_to_lsf.c @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Lsp2Lsf.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/lsp_to_lsf.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * conversion from LSP coefficients to LSF coefficients + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Lsp2Lsf( + int16_t *lsp, /* (i) lsp vector -1...+1 in Q15 */ + int16_t *lsf, /* (o) Lsf vector 0...Pi in Q13 + (ordered, so that lsf[i]<lsf[i+1]) */ + int16_t m /* (i) Number of coefficients */ + ) +{ + int16_t i, k; + int16_t diff; /* diff between table value and desired value (Q15) */ + int16_t freq; /* lsf/(2*pi) (Q16) */ + int16_t *lspPtr, *lsfPtr, *cosTblPtr; + int16_t tmp; + + /* set the index to maximum index value in WebRtcIlbcfix_kCos */ + k = 63; + + /* + Start with the highest LSP and then work the way down + For each LSP the lsf is calculated by first order approximation + of the acos(x) function + */ + lspPtr = &lsp[9]; + lsfPtr = &lsf[9]; + cosTblPtr=(int16_t*)&WebRtcIlbcfix_kCos[k]; + for(i=m-1; i>=0; i--) + { + /* + locate value in the table, which is just above lsp[i], + basically an approximation to acos(x) + */ + while( (((int32_t)(*cosTblPtr)-(*lspPtr)) < 0)&&(k>0) ) + { + k-=1; + cosTblPtr--; + } + + /* Calculate diff, which is used in the linear approximation of acos(x) */ + diff = (*lspPtr)-(*cosTblPtr); + + /* + The linear approximation of acos(lsp[i]) : + acos(lsp[i])= k*512 + (WebRtcIlbcfix_kAcosDerivative[ind]*offset >> 11) + */ + + /* tmp (linear offset) in Q16 */ + tmp = (int16_t)((WebRtcIlbcfix_kAcosDerivative[k] * diff) >> 11); + + /* freq in Q16 */ + freq = (k << 9) + tmp; + + /* lsf = freq*2*pi */ + (*lsfPtr) = (int16_t)(((int32_t)freq*25736)>>15); + + lsfPtr--; + lspPtr--; + } + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsp_to_lsf.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsp_to_lsf.h new file mode 100644 index 0000000000..c2f4b7692d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/lsp_to_lsf.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Lsp2Lsf.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSP_TO_LSF_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_LSP_TO_LSF_H_ + +#include <stdint.h> + +/*----------------------------------------------------------------* + * conversion from LSP coefficients to LSF coefficients + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Lsp2Lsf( + int16_t* lsp, /* (i) lsp vector -1...+1 in Q15 */ + int16_t* lsf, /* (o) Lsf vector 0...Pi in Q13 + (ordered, so that lsf[i]<lsf[i+1]) */ + int16_t m /* (i) Number of coefficients */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/my_corr.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/my_corr.c new file mode 100644 index 0000000000..9b870e0ef0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/my_corr.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_MyCorr.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/my_corr.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * compute cross correlation between sequences + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_MyCorr( + int32_t* corr, /* (o) correlation of seq1 and seq2 */ + const int16_t* seq1, /* (i) first sequence */ + size_t dim1, /* (i) dimension first seq1 */ + const int16_t* seq2, /* (i) second sequence */ + size_t dim2 /* (i) dimension seq2 */ + ){ + uint32_t max1, max2; + size_t loops; + int right_shift; + + // Calculate a right shift that will let us sum dim2 pairwise products of + // values from the two sequences without overflowing an int32_t. (The +1 in + // max1 and max2 are because WebRtcSpl_MaxAbsValueW16 will return 2**15 - 1 + // if the input array contains -2**15.) + max1 = WebRtcSpl_MaxAbsValueW16(seq1, dim1) + 1; + max2 = WebRtcSpl_MaxAbsValueW16(seq2, dim2) + 1; + right_shift = + (64 - 31) - WebRtcSpl_CountLeadingZeros64((max1 * max2) * (uint64_t)dim2); + if (right_shift < 0) { + right_shift = 0; + } + + loops=dim1-dim2+1; + + /* Calculate the cross correlations */ + WebRtcSpl_CrossCorrelation(corr, seq2, seq1, dim2, loops, right_shift, 1); + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/my_corr.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/my_corr.h new file mode 100644 index 0000000000..c0c2fa4a48 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/my_corr.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_MyCorr.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_MY_CORR_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_MY_CORR_H_ + +#include <stddef.h> +#include <stdint.h> + +/*----------------------------------------------------------------* + * compute cross correlation between sequences + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_MyCorr(int32_t* corr, /* (o) correlation of seq1 and seq2 */ + const int16_t* seq1, /* (i) first sequence */ + size_t dim1, /* (i) dimension first seq1 */ + const int16_t* seq2, /* (i) second sequence */ + size_t dim2 /* (i) dimension seq2 */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/nearest_neighbor.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/nearest_neighbor.c new file mode 100644 index 0000000000..1ecdd96d5a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/nearest_neighbor.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_NearestNeighbor.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/nearest_neighbor.h" + +void WebRtcIlbcfix_NearestNeighbor(size_t* index, + const size_t* array, + size_t value, + size_t arlength) { + size_t i; + size_t min_diff = (size_t)-1; + for (i = 0; i < arlength; i++) { + const size_t diff = + (array[i] < value) ? (value - array[i]) : (array[i] - value); + if (diff < min_diff) { + *index = i; + min_diff = diff; + } + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/nearest_neighbor.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/nearest_neighbor.h new file mode 100644 index 0000000000..704cf2a37d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/nearest_neighbor.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_NearestNeighbor.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_NEAREST_NEIGHBOR_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_NEAREST_NEIGHBOR_H_ + +#include <stddef.h> +#include <stdint.h> + +/*----------------------------------------------------------------* + * Find index in array such that the array element with said + * index is the element of said array closest to "value" + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_NearestNeighbor( + size_t* index, /* (o) index of array element closest to value */ + const size_t* array, /* (i) data array (Q2) */ + size_t value, /* (i) value (Q2) */ + size_t arlength /* (i) dimension of data array (==ENH_NBLOCKS_TOT) */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/pack_bits.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/pack_bits.c new file mode 100644 index 0000000000..dd44eb8fb6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/pack_bits.c @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_PackBits.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/pack_bits.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * unpacking of bits from bitstream, i.e., vector of bytes + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_PackBits( + uint16_t *bitstream, /* (o) The packetized bitstream */ + iLBC_bits *enc_bits, /* (i) Encoded bits */ + int16_t mode /* (i) Codec mode (20 or 30) */ + ){ + uint16_t *bitstreamPtr; + int i, k; + int16_t *tmpPtr; + + bitstreamPtr=bitstream; + + /* Class 1 bits of ULP */ + /* First int16_t */ + (*bitstreamPtr) = ((uint16_t)enc_bits->lsf[0])<<10; /* Bit 0..5 */ + (*bitstreamPtr) |= (enc_bits->lsf[1])<<3; /* Bit 6..12 */ + (*bitstreamPtr) |= (enc_bits->lsf[2]&0x70)>>4; /* Bit 13..15 */ + bitstreamPtr++; + /* Second int16_t */ + (*bitstreamPtr) = ((uint16_t)enc_bits->lsf[2]&0xF)<<12; /* Bit 0..3 */ + + if (mode==20) { + (*bitstreamPtr) |= (enc_bits->startIdx)<<10; /* Bit 4..5 */ + (*bitstreamPtr) |= (enc_bits->state_first)<<9; /* Bit 6 */ + (*bitstreamPtr) |= (enc_bits->idxForMax)<<3; /* Bit 7..12 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[0])&0x70)>>4; /* Bit 13..15 */ + bitstreamPtr++; + /* Third int16_t */ + (*bitstreamPtr) = ((enc_bits->cb_index[0])&0xE)<<12; /* Bit 0..2 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[0])&0x18)<<8; /* Bit 3..4 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[1])&0x8)<<7; /* Bit 5 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[3])&0xFE)<<2; /* Bit 6..12 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[3])&0x10)>>2; /* Bit 13 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[4])&0x8)>>2; /* Bit 14 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[6])&0x10)>>4; /* Bit 15 */ + } else { /* mode==30 */ + (*bitstreamPtr) |= (enc_bits->lsf[3])<<6; /* Bit 4..9 */ + (*bitstreamPtr) |= (enc_bits->lsf[4]&0x7E)>>1; /* Bit 10..15 */ + bitstreamPtr++; + /* Third int16_t */ + (*bitstreamPtr) = ((uint16_t)enc_bits->lsf[4]&0x1)<<15; /* Bit 0 */ + (*bitstreamPtr) |= (enc_bits->lsf[5])<<8; /* Bit 1..7 */ + (*bitstreamPtr) |= (enc_bits->startIdx)<<5; /* Bit 8..10 */ + (*bitstreamPtr) |= (enc_bits->state_first)<<4; /* Bit 11 */ + (*bitstreamPtr) |= ((enc_bits->idxForMax)&0x3C)>>2; /* Bit 12..15 */ + bitstreamPtr++; + /* 4:th int16_t */ + (*bitstreamPtr) = ((uint16_t)enc_bits->idxForMax&0x3)<<14; /* Bit 0..1 */ + (*bitstreamPtr) |= (enc_bits->cb_index[0]&0x78)<<7; /* Bit 2..5 */ + (*bitstreamPtr) |= (enc_bits->gain_index[0]&0x10)<<5; /* Bit 6 */ + (*bitstreamPtr) |= (enc_bits->gain_index[1]&0x8)<<5; /* Bit 7 */ + (*bitstreamPtr) |= (enc_bits->cb_index[3]&0xFC); /* Bit 8..13 */ + (*bitstreamPtr) |= (enc_bits->gain_index[3]&0x10)>>3; /* Bit 14 */ + (*bitstreamPtr) |= (enc_bits->gain_index[4]&0x8)>>3; /* Bit 15 */ + } + /* Class 2 bits of ULP */ + /* 4:th to 6:th int16_t for 20 ms case + 5:th to 7:th int16_t for 30 ms case */ + bitstreamPtr++; + tmpPtr=enc_bits->idxVec; + for (k=0; k<3; k++) { + (*bitstreamPtr) = 0; + for (i=15; i>=0; i--) { + (*bitstreamPtr) |= ((uint16_t)((*tmpPtr)&0x4)>>2)<<i; + /* Bit 15-i */ + tmpPtr++; + } + bitstreamPtr++; + } + + if (mode==20) { + /* 7:th int16_t */ + (*bitstreamPtr) = 0; + for (i=15; i>6; i--) { + (*bitstreamPtr) |= ((uint16_t)((*tmpPtr)&0x4)>>2)<<i; + /* Bit 15-i */ + tmpPtr++; + } + (*bitstreamPtr) |= (enc_bits->gain_index[1]&0x4)<<4; /* Bit 9 */ + (*bitstreamPtr) |= (enc_bits->gain_index[3]&0xC)<<2; /* Bit 10..11 */ + (*bitstreamPtr) |= (enc_bits->gain_index[4]&0x4)<<1; /* Bit 12 */ + (*bitstreamPtr) |= (enc_bits->gain_index[6]&0x8)>>1; /* Bit 13 */ + (*bitstreamPtr) |= (enc_bits->gain_index[7]&0xC)>>2; /* Bit 14..15 */ + + } else { /* mode==30 */ + /* 8:th int16_t */ + (*bitstreamPtr) = 0; + for (i=15; i>5; i--) { + (*bitstreamPtr) |= ((uint16_t)((*tmpPtr)&0x4)>>2)<<i; + /* Bit 15-i */ + tmpPtr++; + } + (*bitstreamPtr) |= (enc_bits->cb_index[0]&0x6)<<3; /* Bit 10..11 */ + (*bitstreamPtr) |= (enc_bits->gain_index[0]&0x8); /* Bit 12 */ + (*bitstreamPtr) |= (enc_bits->gain_index[1]&0x4); /* Bit 13 */ + (*bitstreamPtr) |= (enc_bits->cb_index[3]&0x2); /* Bit 14 */ + (*bitstreamPtr) |= (enc_bits->cb_index[6]&0x80)>>7; /* Bit 15 */ + bitstreamPtr++; + /* 9:th int16_t */ + (*bitstreamPtr) = ((uint16_t)enc_bits->cb_index[6]&0x7E)<<9;/* Bit 0..5 */ + (*bitstreamPtr) |= (enc_bits->cb_index[9]&0xFE)<<2; /* Bit 6..12 */ + (*bitstreamPtr) |= (enc_bits->cb_index[12]&0xE0)>>5; /* Bit 13..15 */ + bitstreamPtr++; + /* 10:th int16_t */ + (*bitstreamPtr) = ((uint16_t)enc_bits->cb_index[12]&0x1E)<<11;/* Bit 0..3 */ + (*bitstreamPtr) |= (enc_bits->gain_index[3]&0xC)<<8; /* Bit 4..5 */ + (*bitstreamPtr) |= (enc_bits->gain_index[4]&0x6)<<7; /* Bit 6..7 */ + (*bitstreamPtr) |= (enc_bits->gain_index[6]&0x18)<<3; /* Bit 8..9 */ + (*bitstreamPtr) |= (enc_bits->gain_index[7]&0xC)<<2; /* Bit 10..11 */ + (*bitstreamPtr) |= (enc_bits->gain_index[9]&0x10)>>1; /* Bit 12 */ + (*bitstreamPtr) |= (enc_bits->gain_index[10]&0x8)>>1; /* Bit 13 */ + (*bitstreamPtr) |= (enc_bits->gain_index[12]&0x10)>>3; /* Bit 14 */ + (*bitstreamPtr) |= (enc_bits->gain_index[13]&0x8)>>3; /* Bit 15 */ + } + bitstreamPtr++; + /* Class 3 bits of ULP */ + /* 8:th to 14:th int16_t for 20 ms case + 11:th to 17:th int16_t for 30 ms case */ + tmpPtr=enc_bits->idxVec; + for (k=0; k<7; k++) { + (*bitstreamPtr) = 0; + for (i=14; i>=0; i-=2) { + (*bitstreamPtr) |= ((uint16_t)((*tmpPtr)&0x3))<<i; /* Bit 15-i..14-i*/ + tmpPtr++; + } + bitstreamPtr++; + } + + if (mode==20) { + /* 15:th int16_t */ + (*bitstreamPtr) = ((uint16_t)((enc_bits->idxVec[56])&0x3))<<14;/* Bit 0..1 */ + (*bitstreamPtr) |= (((enc_bits->cb_index[0])&1))<<13; /* Bit 2 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[1]))<<6; /* Bit 3..9 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[2])&0x7E)>>1; /* Bit 10..15 */ + bitstreamPtr++; + /* 16:th int16_t */ + (*bitstreamPtr) = ((uint16_t)((enc_bits->cb_index[2])&0x1))<<15; + /* Bit 0 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[0])&0x7)<<12; /* Bit 1..3 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[1])&0x3)<<10; /* Bit 4..5 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[2]))<<7; /* Bit 6..8 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[3])&0x1)<<6; /* Bit 9 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[4])&0x7E)>>1; /* Bit 10..15 */ + bitstreamPtr++; + /* 17:th int16_t */ + (*bitstreamPtr) = ((uint16_t)((enc_bits->cb_index[4])&0x1))<<15; + /* Bit 0 */ + (*bitstreamPtr) |= (enc_bits->cb_index[5])<<8; /* Bit 1..7 */ + (*bitstreamPtr) |= (enc_bits->cb_index[6]); /* Bit 8..15 */ + bitstreamPtr++; + /* 18:th int16_t */ + (*bitstreamPtr) = ((uint16_t)(enc_bits->cb_index[7]))<<8; /* Bit 0..7 */ + (*bitstreamPtr) |= (enc_bits->cb_index[8]); /* Bit 8..15 */ + bitstreamPtr++; + /* 19:th int16_t */ + (*bitstreamPtr) = ((uint16_t)((enc_bits->gain_index[3])&0x3))<<14; + /* Bit 0..1 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[4])&0x3)<<12; /* Bit 2..3 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[5]))<<9; /* Bit 4..6 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[6])&0x7)<<6; /* Bit 7..9 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[7])&0x3)<<4; /* Bit 10..11 */ + (*bitstreamPtr) |= (enc_bits->gain_index[8])<<1; /* Bit 12..14 */ + } else { /* mode==30 */ + /* 18:th int16_t */ + (*bitstreamPtr) = ((uint16_t)((enc_bits->idxVec[56])&0x3))<<14;/* Bit 0..1 */ + (*bitstreamPtr) |= (((enc_bits->idxVec[57])&0x3))<<12; /* Bit 2..3 */ + (*bitstreamPtr) |= (((enc_bits->cb_index[0])&1))<<11; /* Bit 4 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[1]))<<4; /* Bit 5..11 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[2])&0x78)>>3; /* Bit 12..15 */ + bitstreamPtr++; + /* 19:th int16_t */ + (*bitstreamPtr) = ((uint16_t)(enc_bits->cb_index[2])&0x7)<<13; + /* Bit 0..2 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[0])&0x7)<<10; /* Bit 3..5 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[1])&0x3)<<8; /* Bit 6..7 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[2])&0x7)<<5; /* Bit 8..10 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[3])&0x1)<<4; /* Bit 11 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[4])&0x78)>>3; /* Bit 12..15 */ + bitstreamPtr++; + /* 20:th int16_t */ + (*bitstreamPtr) = ((uint16_t)(enc_bits->cb_index[4])&0x7)<<13; + /* Bit 0..2 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[5]))<<6; /* Bit 3..9 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[6])&0x1)<<5; /* Bit 10 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[7])&0xF8)>>3; /* Bit 11..15 */ + bitstreamPtr++; + /* 21:st int16_t */ + (*bitstreamPtr) = ((uint16_t)(enc_bits->cb_index[7])&0x7)<<13; + /* Bit 0..2 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[8]))<<5; /* Bit 3..10 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[9])&0x1)<<4; /* Bit 11 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[10])&0xF0)>>4; /* Bit 12..15 */ + bitstreamPtr++; + /* 22:nd int16_t */ + (*bitstreamPtr) = ((uint16_t)(enc_bits->cb_index[10])&0xF)<<12; + /* Bit 0..3 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[11]))<<4; /* Bit 4..11 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[12])&0x1)<<3; /* Bit 12 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[13])&0xE0)>>5; /* Bit 13..15 */ + bitstreamPtr++; + /* 23:rd int16_t */ + (*bitstreamPtr) = ((uint16_t)(enc_bits->cb_index[13])&0x1F)<<11; + /* Bit 0..4 */ + (*bitstreamPtr) |= ((enc_bits->cb_index[14]))<<3; /* Bit 5..12 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[3])&0x3)<<1; /* Bit 13..14 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[4])&0x1); /* Bit 15 */ + bitstreamPtr++; + /* 24:rd int16_t */ + (*bitstreamPtr) = ((uint16_t)(enc_bits->gain_index[5]))<<13; + /* Bit 0..2 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[6])&0x7)<<10; /* Bit 3..5 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[7])&0x3)<<8; /* Bit 6..7 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[8]))<<5; /* Bit 8..10 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[9])&0xF)<<1; /* Bit 11..14 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[10])&0x4)>>2; /* Bit 15 */ + bitstreamPtr++; + /* 25:rd int16_t */ + (*bitstreamPtr) = ((uint16_t)(enc_bits->gain_index[10])&0x3)<<14; + /* Bit 0..1 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[11]))<<11; /* Bit 2..4 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[12])&0xF)<<7; /* Bit 5..8 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[13])&0x7)<<4; /* Bit 9..11 */ + (*bitstreamPtr) |= ((enc_bits->gain_index[14]))<<1; /* Bit 12..14 */ + } + /* Last bit is automatically zero */ + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/pack_bits.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/pack_bits.h new file mode 100644 index 0000000000..8dcf41ce08 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/pack_bits.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_PackBits.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_PACK_BITS_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_PACK_BITS_H_ + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * unpacking of bits from bitstream, i.e., vector of bytes + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_PackBits( + uint16_t* bitstream, /* (o) The packetized bitstream */ + iLBC_bits* enc_bits, /* (i) Encoded bits */ + int16_t mode /* (i) Codec mode (20 or 30) */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsf.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsf.c new file mode 100644 index 0000000000..7192eaab49 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsf.c @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Poly2Lsf.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/poly_to_lsf.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/lsp_to_lsf.h" +#include "modules/audio_coding/codecs/ilbc/poly_to_lsp.h" + +void WebRtcIlbcfix_Poly2Lsf( + int16_t *lsf, /* (o) lsf coefficients (Q13) */ + int16_t *a /* (i) A coefficients (Q12) */ + ) { + int16_t lsp[10]; + WebRtcIlbcfix_Poly2Lsp(a, lsp, (int16_t*)WebRtcIlbcfix_kLspMean); + WebRtcIlbcfix_Lsp2Lsf(lsp, lsf, 10); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsf.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsf.h new file mode 100644 index 0000000000..363e392bb2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsf.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Poly2Lsf.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_POLY_TO_LSF_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_POLY_TO_LSF_H_ + +#include <stdint.h> + +/*----------------------------------------------------------------* + * conversion from lpc coefficients to lsf coefficients + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Poly2Lsf(int16_t* lsf, /* (o) lsf coefficients (Q13) */ + int16_t* a /* (i) A coefficients (Q12) */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsp.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsp.c new file mode 100644 index 0000000000..ad0ecd70ab --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsp.c @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Poly2Lsp.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/poly_to_lsp.h" + +#include "modules/audio_coding/codecs/ilbc/chebyshev.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" + +/*----------------------------------------------------------------* + * conversion from lpc coefficients to lsp coefficients + * function is only for 10:th order LPC + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Poly2Lsp( + int16_t *a, /* (o) A coefficients in Q12 */ + int16_t *lsp, /* (i) LSP coefficients in Q15 */ + int16_t *old_lsp /* (i) old LSP coefficients that are used if the new + coefficients turn out to be unstable */ + ) { + int16_t f[2][6]; /* f[0][] represents f1 and f[1][] represents f2 */ + int16_t *a_i_ptr, *a_10mi_ptr; + int16_t *f1ptr, *f2ptr; + int32_t tmpW32; + int16_t x, y, xlow, ylow, xmid, ymid, xhigh, yhigh, xint; + int16_t shifts, sign; + int i, j; + int foundFreqs; + int fi_select; + + /* + Calculate the two polynomials f1(z) and f2(z) + (the sum and the diff polynomial) + f1[0] = f2[0] = 1.0; + f1[i+1] = a[i+1] + a[10-i] - f1[i]; + f2[i+1] = a[i+1] - a[10-i] - f1[i]; + */ + + a_i_ptr = a + 1; + a_10mi_ptr = a + 10; + f1ptr = f[0]; + f2ptr = f[1]; + (*f1ptr) = 1024; /* 1.0 in Q10 */ + (*f2ptr) = 1024; /* 1.0 in Q10 */ + for (i = 0; i < 5; i++) { + *(f1ptr + 1) = + (int16_t)((((int32_t)(*a_i_ptr) + *a_10mi_ptr) >> 2) - *f1ptr); + *(f2ptr + 1) = + (int16_t)((((int32_t)(*a_i_ptr) - *a_10mi_ptr) >> 2) + *f2ptr); + a_i_ptr++; + a_10mi_ptr--; + f1ptr++; + f2ptr++; + } + + /* + find the LSPs using the Chebychev pol. evaluation + */ + + fi_select = 0; /* selector between f1 and f2, start with f1 */ + + foundFreqs = 0; + + xlow = WebRtcIlbcfix_kCosGrid[0]; + ylow = WebRtcIlbcfix_Chebyshev(xlow, f[fi_select]); + + /* + Iterate until all the 10 LSP's have been found or + all the grid points have been tried. If the 10 LSP's can + not be found, set the LSP vector to previous LSP + */ + + for (j = 1; j < COS_GRID_POINTS && foundFreqs < 10; j++) { + xhigh = xlow; + yhigh = ylow; + xlow = WebRtcIlbcfix_kCosGrid[j]; + ylow = WebRtcIlbcfix_Chebyshev(xlow, f[fi_select]); + + if (ylow * yhigh <= 0) { + /* Run 4 times to reduce the interval */ + for (i = 0; i < 4; i++) { + /* xmid =(xlow + xhigh)/2 */ + xmid = (xlow >> 1) + (xhigh >> 1); + ymid = WebRtcIlbcfix_Chebyshev(xmid, f[fi_select]); + + if (ylow * ymid <= 0) { + yhigh = ymid; + xhigh = xmid; + } else { + ylow = ymid; + xlow = xmid; + } + } + + /* + Calculater xint by linear interpolation: + xint = xlow - ylow*(xhigh-xlow)/(yhigh-ylow); + */ + + x = xhigh - xlow; + y = yhigh - ylow; + + if (y == 0) { + xint = xlow; + } else { + sign = y; + y = WEBRTC_SPL_ABS_W16(y); + shifts = (int16_t)WebRtcSpl_NormW32(y)-16; + y <<= shifts; + y = (int16_t)WebRtcSpl_DivW32W16(536838144, y); /* 1/(yhigh-ylow) */ + + tmpW32 = (x * y) >> (19 - shifts); + + /* y=(xhigh-xlow)/(yhigh-ylow) */ + y = (int16_t)(tmpW32&0xFFFF); + + if (sign < 0) { + y = -y; + } + /* tmpW32 = ylow*(xhigh-xlow)/(yhigh-ylow) */ + tmpW32 = (ylow * y) >> 10; + xint = xlow-(int16_t)(tmpW32&0xFFFF); + } + + /* Store the calculated lsp */ + lsp[foundFreqs] = (int16_t)xint; + foundFreqs++; + + /* if needed, set xlow and ylow for next recursion */ + if (foundFreqs<10) { + xlow = xint; + /* Swap between f1 and f2 (f[0][] and f[1][]) */ + fi_select = ((fi_select+1)&0x1); + + ylow = WebRtcIlbcfix_Chebyshev(xlow, f[fi_select]); + } + } + } + + /* Check if M roots found, if not then use the old LSP */ + if (foundFreqs < 10) { + WEBRTC_SPL_MEMCPY_W16(lsp, old_lsp, 10); + } + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsp.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsp.h new file mode 100644 index 0000000000..928ee4efdb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/poly_to_lsp.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Poly2Lsp.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_POLY_TO_LSP_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_POLY_TO_LSP_H_ + +#include <stdint.h> + +/*----------------------------------------------------------------* + * conversion from lpc coefficients to lsp coefficients + * function is only for 10:th order LPC + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Poly2Lsp( + int16_t* a, /* (o) A coefficients in Q12 */ + int16_t* lsp, /* (i) LSP coefficients in Q15 */ + int16_t* old_lsp /* (i) old LSP coefficients that are used if the new + coefficients turn out to be unstable */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/refiner.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/refiner.c new file mode 100644 index 0000000000..5bdab7a4b0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/refiner.c @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Refiner.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/refiner.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/enh_upsample.h" +#include "modules/audio_coding/codecs/ilbc/my_corr.h" + +/*----------------------------------------------------------------* + * find segment starting near idata+estSegPos that has highest + * correlation with idata+centerStartPos through + * idata+centerStartPos+ENH_BLOCKL-1 segment is found at a + * resolution of ENH_UPSO times the original of the original + * sampling rate + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Refiner( + size_t *updStartPos, /* (o) updated start point (Q-2) */ + int16_t *idata, /* (i) original data buffer */ + size_t idatal, /* (i) dimension of idata */ + size_t centerStartPos, /* (i) beginning center segment */ + size_t estSegPos, /* (i) estimated beginning other segment (Q-2) */ + int16_t *surround, /* (i/o) The contribution from this sequence + summed with earlier contributions */ + int16_t gain /* (i) Gain to use for this sequence */ + ){ + size_t estSegPosRounded, searchSegStartPos, searchSegEndPos, corrdim; + size_t tloc, tloc2, i; + + int32_t maxtemp, scalefact; + int16_t *filtStatePtr, *polyPtr; + /* Stack based */ + int16_t filt[7]; + int32_t corrVecUps[ENH_CORRDIM*ENH_UPS0]; + int32_t corrVecTemp[ENH_CORRDIM]; + int16_t vect[ENH_VECTL]; + int16_t corrVec[ENH_CORRDIM]; + + /* defining array bounds */ + + estSegPosRounded = (estSegPos - 2) >> 2; + + searchSegStartPos = + (estSegPosRounded < ENH_SLOP) ? 0 : (estSegPosRounded - ENH_SLOP); + + searchSegEndPos = estSegPosRounded + ENH_SLOP; + if ((searchSegEndPos + ENH_BLOCKL) >= idatal) { + searchSegEndPos = idatal - ENH_BLOCKL - 1; + } + + corrdim = searchSegEndPos + 1 - searchSegStartPos; + + /* compute upsampled correlation and find + location of max */ + + WebRtcIlbcfix_MyCorr(corrVecTemp, idata + searchSegStartPos, + corrdim + ENH_BLOCKL - 1, idata + centerStartPos, + ENH_BLOCKL); + + /* Calculate the rescaling factor for the correlation in order to + put the correlation in a int16_t vector instead */ + maxtemp = WebRtcSpl_MaxAbsValueW32(corrVecTemp, corrdim); + + scalefact = WebRtcSpl_GetSizeInBits(maxtemp) - 15; + + if (scalefact > 0) { + for (i = 0; i < corrdim; i++) { + corrVec[i] = (int16_t)(corrVecTemp[i] >> scalefact); + } + } else { + for (i = 0; i < corrdim; i++) { + corrVec[i] = (int16_t)corrVecTemp[i]; + } + } + /* In order to guarantee that all values are initialized */ + for (i = corrdim; i < ENH_CORRDIM; i++) { + corrVec[i] = 0; + } + + /* Upsample the correlation */ + WebRtcIlbcfix_EnhUpsample(corrVecUps, corrVec); + + /* Find maximum */ + tloc = WebRtcSpl_MaxIndexW32(corrVecUps, ENH_UPS0 * corrdim); + + /* make vector can be upsampled without ever running outside + bounds */ + *updStartPos = searchSegStartPos * 4 + tloc + 4; + + tloc2 = (tloc + 3) >> 2; + + /* initialize the vector to be filtered, stuff with zeros + when data is outside idata buffer */ + if (ENH_FL0 > (searchSegStartPos + tloc2)) { + const size_t st = ENH_FL0 - searchSegStartPos - tloc2; + WebRtcSpl_MemSetW16(vect, 0, st); + WEBRTC_SPL_MEMCPY_W16(&vect[st], idata, ENH_VECTL - st); + } else { + const size_t st = searchSegStartPos + tloc2 - ENH_FL0; + if ((st + ENH_VECTL) > idatal) { + const size_t en = st + ENH_VECTL - idatal; + WEBRTC_SPL_MEMCPY_W16(vect, &idata[st], ENH_VECTL - en); + WebRtcSpl_MemSetW16(&vect[ENH_VECTL - en], 0, en); + } else { + WEBRTC_SPL_MEMCPY_W16(vect, &idata[st], ENH_VECTL); + } + } + + /* compute the segment (this is actually a convolution) */ + filtStatePtr = filt + 6; + polyPtr = (int16_t*)WebRtcIlbcfix_kEnhPolyPhaser[tloc2 * ENH_UPS0 - tloc]; + for (i = 0; i < 7; i++) { + *filtStatePtr-- = *polyPtr++; + } + + WebRtcSpl_FilterMAFastQ12(&vect[6], vect, filt, ENH_FLO_MULT2_PLUS1, + ENH_BLOCKL); + + /* Add the contribution from this vector (scaled with gain) to the total + surround vector */ + WebRtcSpl_AddAffineVectorToVector(surround, vect, gain, 32768, 16, + ENH_BLOCKL); + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/refiner.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/refiner.h new file mode 100644 index 0000000000..564c9d96e6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/refiner.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Refiner.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_REFINER_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_REFINER_H_ + +#include <stddef.h> +#include <stdint.h> + +/*----------------------------------------------------------------* + * find segment starting near idata+estSegPos that has highest + * correlation with idata+centerStartPos through + * idata+centerStartPos+ENH_BLOCKL-1 segment is found at a + * resolution of ENH_UPSO times the original of the original + * sampling rate + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Refiner( + size_t* updStartPos, /* (o) updated start point (Q-2) */ + int16_t* idata, /* (i) original data buffer */ + size_t idatal, /* (i) dimension of idata */ + size_t centerStartPos, /* (i) beginning center segment */ + size_t estSegPos, /* (i) estimated beginning other segment (Q-2) */ + int16_t* surround, /* (i/o) The contribution from this sequence + summed with earlier contributions */ + int16_t gain /* (i) Gain to use for this sequence */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_interpolate_lsf.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_interpolate_lsf.c new file mode 100644 index 0000000000..7343530a5e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_interpolate_lsf.c @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SimpleInterpolateLsf.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/simple_interpolate_lsf.h" + +#include "modules/audio_coding/codecs/ilbc/bw_expand.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/lsf_interpolate_to_poly_enc.h" + +/*----------------------------------------------------------------* + * lsf interpolator (subrutine to LPCencode) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SimpleInterpolateLsf( + int16_t *syntdenum, /* (o) the synthesis filter denominator + resulting from the quantized + interpolated lsf Q12 */ + int16_t *weightdenum, /* (o) the weighting filter denominator + resulting from the unquantized + interpolated lsf Q12 */ + int16_t *lsf, /* (i) the unquantized lsf coefficients Q13 */ + int16_t *lsfdeq, /* (i) the dequantized lsf coefficients Q13 */ + int16_t *lsfold, /* (i) the unquantized lsf coefficients of + the previous signal frame Q13 */ + int16_t *lsfdeqold, /* (i) the dequantized lsf coefficients of the + previous signal frame Q13 */ + int16_t length, /* (i) should equate FILTERORDER */ + IlbcEncoder *iLBCenc_inst + /* (i/o) the encoder state structure */ + ) { + size_t i; + int pos, lp_length; + + int16_t *lsf2, *lsfdeq2; + /* Stack based */ + int16_t lp[LPC_FILTERORDER + 1]; + + lsf2 = lsf + length; + lsfdeq2 = lsfdeq + length; + lp_length = length + 1; + + if (iLBCenc_inst->mode==30) { + /* subframe 1: Interpolation between old and first set of + lsf coefficients */ + + /* Calculate Analysis/Syntehsis filter from quantized LSF */ + WebRtcIlbcfix_LsfInterpolate2PloyEnc(lp, lsfdeqold, lsfdeq, + WebRtcIlbcfix_kLsfWeight30ms[0], + length); + WEBRTC_SPL_MEMCPY_W16(syntdenum, lp, lp_length); + + /* Calculate Weighting filter from quantized LSF */ + WebRtcIlbcfix_LsfInterpolate2PloyEnc(lp, lsfold, lsf, + WebRtcIlbcfix_kLsfWeight30ms[0], + length); + WebRtcIlbcfix_BwExpand(weightdenum, lp, + (int16_t*)WebRtcIlbcfix_kLpcChirpWeightDenum, + (int16_t)lp_length); + + /* subframe 2 to 6: Interpolation between first and second + set of lsf coefficients */ + + pos = lp_length; + for (i = 1; i < iLBCenc_inst->nsub; i++) { + + /* Calculate Analysis/Syntehsis filter from quantized LSF */ + WebRtcIlbcfix_LsfInterpolate2PloyEnc(lp, lsfdeq, lsfdeq2, + WebRtcIlbcfix_kLsfWeight30ms[i], + length); + WEBRTC_SPL_MEMCPY_W16(syntdenum + pos, lp, lp_length); + + /* Calculate Weighting filter from quantized LSF */ + WebRtcIlbcfix_LsfInterpolate2PloyEnc(lp, lsf, lsf2, + WebRtcIlbcfix_kLsfWeight30ms[i], + length); + WebRtcIlbcfix_BwExpand(weightdenum + pos, lp, + (int16_t*)WebRtcIlbcfix_kLpcChirpWeightDenum, + (int16_t)lp_length); + + pos += lp_length; + } + + /* update memory */ + + WEBRTC_SPL_MEMCPY_W16(lsfold, lsf2, length); + WEBRTC_SPL_MEMCPY_W16(lsfdeqold, lsfdeq2, length); + + } else { /* iLBCenc_inst->mode==20 */ + pos = 0; + for (i = 0; i < iLBCenc_inst->nsub; i++) { + + /* Calculate Analysis/Syntehsis filter from quantized LSF */ + WebRtcIlbcfix_LsfInterpolate2PloyEnc(lp, lsfdeqold, lsfdeq, + WebRtcIlbcfix_kLsfWeight20ms[i], + length); + WEBRTC_SPL_MEMCPY_W16(syntdenum + pos, lp, lp_length); + + /* Calculate Weighting filter from quantized LSF */ + WebRtcIlbcfix_LsfInterpolate2PloyEnc(lp, lsfold, lsf, + WebRtcIlbcfix_kLsfWeight20ms[i], + length); + WebRtcIlbcfix_BwExpand(weightdenum+pos, lp, + (int16_t*)WebRtcIlbcfix_kLpcChirpWeightDenum, + (int16_t)lp_length); + + pos += lp_length; + } + + /* update memory */ + + WEBRTC_SPL_MEMCPY_W16(lsfold, lsf, length); + WEBRTC_SPL_MEMCPY_W16(lsfdeqold, lsfdeq, length); + + } + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_interpolate_lsf.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_interpolate_lsf.h new file mode 100644 index 0000000000..ee53e4bd08 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_interpolate_lsf.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SimpleInterpolateLsf.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SIMPLE_INTERPOLATE_LSF_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SIMPLE_INTERPOLATE_LSF_H_ + +#include <stdint.h> + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * lsf interpolator (subrutine to LPCencode) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SimpleInterpolateLsf( + int16_t* syntdenum, /* (o) the synthesis filter denominator + resulting from the quantized + interpolated lsf Q12 */ + int16_t* weightdenum, /* (o) the weighting filter denominator + resulting from the unquantized + interpolated lsf Q12 */ + int16_t* lsf, /* (i) the unquantized lsf coefficients Q13 */ + int16_t* lsfdeq, /* (i) the dequantized lsf coefficients Q13 */ + int16_t* lsfold, /* (i) the unquantized lsf coefficients of + the previous signal frame Q13 */ + int16_t* lsfdeqold, /* (i) the dequantized lsf coefficients of the + previous signal frame Q13 */ + int16_t length, /* (i) should equate FILTERORDER */ + IlbcEncoder* iLBCenc_inst + /* (i/o) the encoder state structure */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lpc_analysis.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lpc_analysis.c new file mode 100644 index 0000000000..fdc4553d95 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lpc_analysis.c @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SimpleLpcAnalysis.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/simple_lpc_analysis.h" + +#include "modules/audio_coding/codecs/ilbc/bw_expand.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/poly_to_lsf.h" +#include "modules/audio_coding/codecs/ilbc/window32_w32.h" + +/*----------------------------------------------------------------* + * lpc analysis (subrutine to LPCencode) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SimpleLpcAnalysis( + int16_t *lsf, /* (o) lsf coefficients */ + int16_t *data, /* (i) new block of speech */ + IlbcEncoder *iLBCenc_inst + /* (i/o) the encoder state structure */ + ) { + int k; + int scale; + size_t is; + int16_t stability; + /* Stack based */ + int16_t A[LPC_FILTERORDER + 1]; + int32_t R[LPC_FILTERORDER + 1]; + int16_t windowedData[BLOCKL_MAX]; + int16_t rc[LPC_FILTERORDER]; + + is=LPC_LOOKBACK+BLOCKL_MAX-iLBCenc_inst->blockl; + WEBRTC_SPL_MEMCPY_W16(iLBCenc_inst->lpc_buffer+is,data,iLBCenc_inst->blockl); + + /* No lookahead, last window is asymmetric */ + + for (k = 0; k < iLBCenc_inst->lpc_n; k++) { + + is = LPC_LOOKBACK; + + if (k < (iLBCenc_inst->lpc_n - 1)) { + + /* Hanning table WebRtcIlbcfix_kLpcWin[] is in Q15-domain so the output is right-shifted 15 */ + WebRtcSpl_ElementwiseVectorMult(windowedData, iLBCenc_inst->lpc_buffer, WebRtcIlbcfix_kLpcWin, BLOCKL_MAX, 15); + } else { + + /* Hanning table WebRtcIlbcfix_kLpcAsymWin[] is in Q15-domain so the output is right-shifted 15 */ + WebRtcSpl_ElementwiseVectorMult(windowedData, iLBCenc_inst->lpc_buffer+is, WebRtcIlbcfix_kLpcAsymWin, BLOCKL_MAX, 15); + } + + /* Compute autocorrelation */ + WebRtcSpl_AutoCorrelation(windowedData, BLOCKL_MAX, LPC_FILTERORDER, R, &scale); + + /* Window autocorrelation vector */ + WebRtcIlbcfix_Window32W32(R, R, WebRtcIlbcfix_kLpcLagWin, LPC_FILTERORDER + 1 ); + + /* Calculate the A coefficients from the Autocorrelation using Levinson Durbin algorithm */ + stability=WebRtcSpl_LevinsonDurbin(R, A, rc, LPC_FILTERORDER); + + /* + Set the filter to {1.0, 0.0, 0.0,...} if filter from Levinson Durbin algorithm is unstable + This should basically never happen... + */ + if (stability!=1) { + A[0]=4096; + WebRtcSpl_MemSetW16(&A[1], 0, LPC_FILTERORDER); + } + + /* Bandwidth expand the filter coefficients */ + WebRtcIlbcfix_BwExpand(A, A, (int16_t*)WebRtcIlbcfix_kLpcChirpSyntDenum, LPC_FILTERORDER+1); + + /* Convert from A to LSF representation */ + WebRtcIlbcfix_Poly2Lsf(lsf + k*LPC_FILTERORDER, A); + } + + is=LPC_LOOKBACK+BLOCKL_MAX-iLBCenc_inst->blockl; + WEBRTC_SPL_MEMCPY_W16(iLBCenc_inst->lpc_buffer, + iLBCenc_inst->lpc_buffer+LPC_LOOKBACK+BLOCKL_MAX-is, is); + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lpc_analysis.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lpc_analysis.h new file mode 100644 index 0000000000..b5c839ba2a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lpc_analysis.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SimpleLpcAnalysis.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SIMPLE_LPC_ANALYSIS_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SIMPLE_LPC_ANALYSIS_H_ + +#include <stdint.h> + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * lpc analysis (subrutine to LPCencode) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SimpleLpcAnalysis( + int16_t* lsf, /* (o) lsf coefficients */ + int16_t* data, /* (i) new block of speech */ + IlbcEncoder* iLBCenc_inst + /* (i/o) the encoder state structure */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_dequant.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_dequant.c new file mode 100644 index 0000000000..e7494ceb59 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_dequant.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SimpleLsfDeQ.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/simple_lsf_dequant.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * obtain dequantized lsf coefficients from quantization index + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SimpleLsfDeQ( + int16_t *lsfdeq, /* (o) dequantized lsf coefficients */ + int16_t *index, /* (i) quantization index */ + int16_t lpc_n /* (i) number of LPCs */ + ){ + int i, j, pos, cb_pos; + + /* decode first LSF */ + + pos = 0; + cb_pos = 0; + for (i = 0; i < LSF_NSPLIT; i++) { + for (j = 0; j < WebRtcIlbcfix_kLsfDimCb[i]; j++) { + lsfdeq[pos + j] = WebRtcIlbcfix_kLsfCb[cb_pos + j + index[i] * + WebRtcIlbcfix_kLsfDimCb[i]]; + } + pos += WebRtcIlbcfix_kLsfDimCb[i]; + cb_pos += WebRtcIlbcfix_kLsfSizeCb[i] * WebRtcIlbcfix_kLsfDimCb[i]; + } + + if (lpc_n>1) { + /* decode last LSF */ + pos = 0; + cb_pos = 0; + for (i = 0; i < LSF_NSPLIT; i++) { + for (j = 0; j < WebRtcIlbcfix_kLsfDimCb[i]; j++) { + lsfdeq[LPC_FILTERORDER + pos + j] = WebRtcIlbcfix_kLsfCb[ + cb_pos + index[LSF_NSPLIT + i] * WebRtcIlbcfix_kLsfDimCb[i] + j]; + } + pos += WebRtcIlbcfix_kLsfDimCb[i]; + cb_pos += WebRtcIlbcfix_kLsfSizeCb[i] * WebRtcIlbcfix_kLsfDimCb[i]; + } + } + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_dequant.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_dequant.h new file mode 100644 index 0000000000..6d97d3df33 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_dequant.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SimpleLsfDeQ.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SIMPLE_LSF_DEQUANT_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SIMPLE_LSF_DEQUANT_H_ + +#include <stdint.h> + +/*----------------------------------------------------------------* + * obtain dequantized lsf coefficients from quantization index + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SimpleLsfDeQ( + int16_t* lsfdeq, /* (o) dequantized lsf coefficients */ + int16_t* index, /* (i) quantization index */ + int16_t lpc_n /* (i) number of LPCs */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_quant.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_quant.c new file mode 100644 index 0000000000..1291d1442e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_quant.c @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SimpleLsfQ.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/simple_lsf_quant.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/split_vq.h" + +/*----------------------------------------------------------------* + * lsf quantizer (subrutine to LPCencode) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SimpleLsfQ( + int16_t *lsfdeq, /* (o) dequantized lsf coefficients + (dimension FILTERORDER) Q13 */ + int16_t *index, /* (o) quantization index */ + int16_t *lsf, /* (i) the lsf coefficient vector to be + quantized (dimension FILTERORDER) Q13 */ + int16_t lpc_n /* (i) number of lsf sets to quantize */ + ){ + + /* Quantize first LSF with memoryless split VQ */ + WebRtcIlbcfix_SplitVq( lsfdeq, index, lsf, + (int16_t*)WebRtcIlbcfix_kLsfCb, (int16_t*)WebRtcIlbcfix_kLsfDimCb, (int16_t*)WebRtcIlbcfix_kLsfSizeCb); + + if (lpc_n==2) { + /* Quantize second LSF with memoryless split VQ */ + WebRtcIlbcfix_SplitVq( lsfdeq + LPC_FILTERORDER, index + LSF_NSPLIT, + lsf + LPC_FILTERORDER, (int16_t*)WebRtcIlbcfix_kLsfCb, + (int16_t*)WebRtcIlbcfix_kLsfDimCb, (int16_t*)WebRtcIlbcfix_kLsfSizeCb); + } + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_quant.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_quant.h new file mode 100644 index 0000000000..66b553213a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/simple_lsf_quant.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SimpleLsfQ.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SIMPLE_LSF_QUANT_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SIMPLE_LSF_QUANT_H_ + +#include <stdint.h> + +/*----------------------------------------------------------------* + * lsf quantizer (subrutine to LPCencode) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SimpleLsfQ( + int16_t* lsfdeq, /* (o) dequantized lsf coefficients + (dimension FILTERORDER) Q13 */ + int16_t* index, /* (o) quantization index */ + int16_t* lsf, /* (i) the lsf coefficient vector to be + quantized (dimension FILTERORDER) Q13 */ + int16_t lpc_n /* (i) number of lsf sets to quantize */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth.c new file mode 100644 index 0000000000..631b2f432a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth.c @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Smooth.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/smooth.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/smooth_out_data.h" + +/*----------------------------------------------------------------* + * find the smoothed output data + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Smooth( + int16_t *odata, /* (o) smoothed output */ + int16_t *current, /* (i) the un enhanced residual for + this block */ + int16_t *surround /* (i) The approximation from the + surrounding sequences */ + ) { + int16_t scale, scale1, scale2; + int16_t A, B, C, denomW16; + int32_t B_W32, denom, num; + int32_t errs; + int32_t w00,w10,w11, endiff, crit; + int32_t w00prim, w10prim, w11_div_w00; + int16_t w11prim; + int16_t bitsw00, bitsw10, bitsw11; + int32_t w11w00, w10w10, w00w00; + uint32_t max1, max2, max12; + + /* compute some inner products (ensure no overflow by first calculating proper scale factor) */ + + w00 = w10 = w11 = 0; + + // Calculate a right shift that will let us sum ENH_BLOCKL pairwise products + // of values from the two sequences without overflowing an int32_t. (The +1 + // in max1 and max2 are because WebRtcSpl_MaxAbsValueW16 will return 2**15 - + // 1 if the input array contains -2**15.) + max1 = WebRtcSpl_MaxAbsValueW16(current, ENH_BLOCKL) + 1; + max2 = WebRtcSpl_MaxAbsValueW16(surround, ENH_BLOCKL) + 1; + max12 = WEBRTC_SPL_MAX(max1, max2); + scale = (64 - 31) - + WebRtcSpl_CountLeadingZeros64((max12 * max12) * (uint64_t)ENH_BLOCKL); + scale=WEBRTC_SPL_MAX(0, scale); + + w00=WebRtcSpl_DotProductWithScale(current,current,ENH_BLOCKL,scale); + w11=WebRtcSpl_DotProductWithScale(surround,surround,ENH_BLOCKL,scale); + w10=WebRtcSpl_DotProductWithScale(surround,current,ENH_BLOCKL,scale); + + if (w00<0) w00 = WEBRTC_SPL_WORD32_MAX; + if (w11<0) w11 = WEBRTC_SPL_WORD32_MAX; + + /* Rescale w00 and w11 to w00prim and w11prim, so that w00prim/w11prim + is in Q16 */ + + bitsw00 = WebRtcSpl_GetSizeInBits(w00); + bitsw11 = WebRtcSpl_GetSizeInBits(w11); + bitsw10 = WebRtcSpl_GetSizeInBits(WEBRTC_SPL_ABS_W32(w10)); + scale1 = 31 - bitsw00; + scale2 = 15 - bitsw11; + + if (scale2>(scale1-16)) { + scale2 = scale1 - 16; + } else { + scale1 = scale2 + 16; + } + + w00prim = w00 << scale1; + w11prim = (int16_t) WEBRTC_SPL_SHIFT_W32(w11, scale2); + + /* Perform C = sqrt(w11/w00) (C is in Q11 since (16+6)/2=11) */ + if (w11prim>64) { + endiff = WebRtcSpl_DivW32W16(w00prim, w11prim) << 6; + C = (int16_t)WebRtcSpl_SqrtFloor(endiff); /* C is in Q11 */ + } else { + C = 1; + } + + /* first try enhancement without power-constraint */ + + errs = WebRtcIlbcfix_Smooth_odata(odata, current, surround, C); + + + + /* if constraint violated by first try, add constraint */ + + if ( (6-scale+scale1) > 31) { + crit=0; + } else { + /* crit = 0.05 * w00 (Result in Q-6) */ + crit = WEBRTC_SPL_SHIFT_W32( + WEBRTC_SPL_MUL(ENH_A0, w00prim >> 14), + -(6-scale+scale1)); + } + + if (errs > crit) { + + if( w00 < 1) { + w00=1; + } + + /* Calculate w11*w00, w10*w10 and w00*w00 in the same Q domain */ + + scale1 = bitsw00-15; + scale2 = bitsw11-15; + + if (scale2>scale1) { + scale = scale2; + } else { + scale = scale1; + } + + w11w00 = (int16_t)WEBRTC_SPL_SHIFT_W32(w11, -scale) * + (int16_t)WEBRTC_SPL_SHIFT_W32(w00, -scale); + + w10w10 = (int16_t)WEBRTC_SPL_SHIFT_W32(w10, -scale) * + (int16_t)WEBRTC_SPL_SHIFT_W32(w10, -scale); + + w00w00 = (int16_t)WEBRTC_SPL_SHIFT_W32(w00, -scale) * + (int16_t)WEBRTC_SPL_SHIFT_W32(w00, -scale); + + /* Calculate (w11*w00-w10*w10)/(w00*w00) in Q16 */ + if (w00w00>65536) { + endiff = (w11w00-w10w10); + endiff = WEBRTC_SPL_MAX(0, endiff); + /* denom is in Q16 */ + denom = WebRtcSpl_DivW32W16(endiff, (int16_t)(w00w00 >> 16)); + } else { + denom = 65536; + } + + if( denom > 7){ /* eliminates numerical problems + for if smooth */ + + scale=WebRtcSpl_GetSizeInBits(denom)-15; + + if (scale>0) { + /* denomW16 is in Q(16+scale) */ + denomW16 = (int16_t)(denom >> scale); + + /* num in Q(34-scale) */ + num = ENH_A0_MINUS_A0A0DIV4 >> scale; + } else { + /* denomW16 is in Q16 */ + denomW16=(int16_t)denom; + + /* num in Q34 */ + num=ENH_A0_MINUS_A0A0DIV4; + } + + /* A sqrt( (ENH_A0-(ENH_A0^2)/4)*(w00*w00)/(w11*w00 + w10*w10) ) in Q9 */ + A = (int16_t)WebRtcSpl_SqrtFloor(WebRtcSpl_DivW32W16(num, denomW16)); + + /* B_W32 is in Q30 ( B = 1 - ENH_A0/2 - A * w10/w00 ) */ + scale1 = 31-bitsw10; + scale2 = 21-scale1; + w10prim = w10 == 0 ? 0 : w10 * (1 << scale1); + w00prim = WEBRTC_SPL_SHIFT_W32(w00, -scale2); + scale = bitsw00-scale2-15; + + if (scale>0) { + w10prim >>= scale; + w00prim >>= scale; + } + + if ((w00prim>0)&&(w10prim>0)) { + w11_div_w00=WebRtcSpl_DivW32W16(w10prim, (int16_t)w00prim); + + if (WebRtcSpl_GetSizeInBits(w11_div_w00)+WebRtcSpl_GetSizeInBits(A)>31) { + B_W32 = 0; + } else { + B_W32 = (int32_t)1073741824 - (int32_t)ENH_A0DIV2 - + WEBRTC_SPL_MUL(A, w11_div_w00); + } + B = (int16_t)(B_W32 >> 16); /* B in Q14. */ + } else { + /* No smoothing */ + A = 0; + B = 16384; /* 1 in Q14 */ + } + } + else{ /* essentially no difference between cycles; + smoothing not needed */ + + A = 0; + B = 16384; /* 1 in Q14 */ + } + + /* create smoothed sequence */ + + WebRtcSpl_ScaleAndAddVectors(surround, A, 9, + current, B, 14, + odata, ENH_BLOCKL); + } + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth.h new file mode 100644 index 0000000000..c8752be64f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Smooth.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SMOOTH_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SMOOTH_H_ + +#include <stdint.h> + +/*----------------------------------------------------------------* + * find the smoothed output data + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Smooth(int16_t* odata, /* (o) smoothed output */ + int16_t* current, /* (i) the un enhanced residual for + this block */ + int16_t* surround /* (i) The approximation from the + surrounding sequences */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth_out_data.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth_out_data.c new file mode 100644 index 0000000000..9f952bfb93 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth_out_data.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Smooth_odata.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/smooth_out_data.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "rtc_base/sanitizer.h" + +// An s32 + s32 -> s32 addition that's allowed to overflow. (It's still +// undefined behavior, so not a good idea; this just makes UBSan ignore the +// violation, so that our old code can continue to do what it's always been +// doing.) +static inline int32_t RTC_NO_SANITIZE("signed-integer-overflow") + OverflowingAdd_S32_S32_To_S32(int32_t a, int32_t b) { + return a + b; +} + +int32_t WebRtcIlbcfix_Smooth_odata( + int16_t *odata, + int16_t *psseq, + int16_t *surround, + int16_t C) +{ + int i; + + int16_t err; + int32_t errs; + + for(i=0;i<80;i++) { + odata[i]= (int16_t)((C * surround[i] + 1024) >> 11); + } + + errs=0; + for(i=0;i<80;i++) { + err = (psseq[i] - odata[i]) >> 3; + errs = OverflowingAdd_S32_S32_To_S32(errs, err * err); // errs in Q-6 + } + + return errs; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth_out_data.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth_out_data.h new file mode 100644 index 0000000000..318e7b04a2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/smooth_out_data.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Smooth_odata.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SMOOTH_OUT_DATA_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SMOOTH_OUT_DATA_H_ + +#include <stdint.h> + +/*----------------------------------------------------------------* + * help function to WebRtcIlbcfix_Smooth() + *---------------------------------------------------------------*/ + +int32_t WebRtcIlbcfix_Smooth_odata(int16_t* odata, + int16_t* psseq, + int16_t* surround, + int16_t C); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/sort_sq.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/sort_sq.c new file mode 100644 index 0000000000..c3a24750f0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/sort_sq.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SortSq.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/sort_sq.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * scalar quantization + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SortSq( + int16_t *xq, /* (o) the quantized value */ + int16_t *index, /* (o) the quantization index */ + int16_t x, /* (i) the value to quantize */ + const int16_t *cb, /* (i) the quantization codebook */ + int16_t cb_size /* (i) the size of the quantization codebook */ + ){ + int i; + + if (x <= cb[0]) { + *index = 0; + *xq = cb[0]; + } else { + i = 0; + while ((x > cb[i]) && (i < (cb_size-1))) { + i++; + } + + if (x > (((int32_t)cb[i] + cb[i - 1] + 1) >> 1)) { + *index = i; + *xq = cb[i]; + } else { + *index = i - 1; + *xq = cb[i - 1]; + } + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/sort_sq.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/sort_sq.h new file mode 100644 index 0000000000..02028dae93 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/sort_sq.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SortSq.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SORT_SQ_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SORT_SQ_H_ + +#include <stdint.h> + +/*----------------------------------------------------------------* + * scalar quantization + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SortSq( + int16_t* xq, /* (o) the quantized value */ + int16_t* index, /* (o) the quantization index */ + int16_t x, /* (i) the value to quantize */ + const int16_t* cb, /* (i) the quantization codebook */ + int16_t cb_size /* (i) the size of the quantization codebook */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/split_vq.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/split_vq.c new file mode 100644 index 0000000000..c1f04d2287 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/split_vq.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SplitVq.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/split_vq.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/vq3.h" +#include "modules/audio_coding/codecs/ilbc/vq4.h" + +/*----------------------------------------------------------------* + * split vector quantization + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SplitVq( + int16_t *qX, /* (o) the quantized vector in Q13 */ + int16_t *index, /* (o) a vector of indexes for all vector + codebooks in the split */ + int16_t *X, /* (i) the vector to quantize */ + int16_t *CB, /* (i) the quantizer codebook in Q13 */ + int16_t *dim, /* (i) the dimension of X and qX */ + int16_t *cbsize /* (i) the number of vectors in the codebook */ + ) { + + int16_t *qXPtr, *indexPtr, *CBPtr, *XPtr; + + /* Quantize X with the 3 vectror quantization tables */ + + qXPtr=qX; + indexPtr=index; + CBPtr=CB; + XPtr=X; + WebRtcIlbcfix_Vq3(qXPtr, indexPtr, CBPtr, XPtr, cbsize[0]); + + qXPtr+=3; + indexPtr+=1; + CBPtr+=(dim[0]*cbsize[0]); + XPtr+=3; + WebRtcIlbcfix_Vq3(qXPtr, indexPtr, CBPtr, XPtr, cbsize[1]); + + qXPtr+=3; + indexPtr+=1; + CBPtr+=(dim[1]*cbsize[1]); + XPtr+=3; + WebRtcIlbcfix_Vq4(qXPtr, indexPtr, CBPtr, XPtr, cbsize[2]); + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/split_vq.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/split_vq.h new file mode 100644 index 0000000000..e4b02a2bc2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/split_vq.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SplitVq.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SPLIT_VQ_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SPLIT_VQ_H_ + +#include <stdint.h> + +/*----------------------------------------------------------------* + * split vector quantization + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SplitVq( + int16_t* qX, /* (o) the quantized vector in Q13 */ + int16_t* index, /* (o) a vector of indexes for all vector + codebooks in the split */ + int16_t* X, /* (i) the vector to quantize */ + int16_t* CB, /* (i) the quantizer codebook in Q13 */ + int16_t* dim, /* (i) the dimension of X and qX */ + int16_t* cbsize /* (i) the number of vectors in the codebook */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_construct.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_construct.c new file mode 100644 index 0000000000..c58086c03b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_construct.c @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_StateConstruct.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/state_construct.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * decoding of the start state + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_StateConstruct( + size_t idxForMax, /* (i) 6-bit index for the quantization of + max amplitude */ + int16_t *idxVec, /* (i) vector of quantization indexes */ + int16_t *syntDenum, /* (i) synthesis filter denumerator */ + int16_t *Out_fix, /* (o) the decoded state vector */ + size_t len /* (i) length of a state vector */ + ) { + size_t k; + int16_t maxVal; + int16_t *tmp1, *tmp2, *tmp3; + /* Stack based */ + int16_t numerator[1+LPC_FILTERORDER]; + int16_t sampleValVec[2*STATE_SHORT_LEN_30MS+LPC_FILTERORDER]; + int16_t sampleMaVec[2*STATE_SHORT_LEN_30MS+LPC_FILTERORDER]; + int16_t *sampleVal = &sampleValVec[LPC_FILTERORDER]; + int16_t *sampleMa = &sampleMaVec[LPC_FILTERORDER]; + int16_t *sampleAr = &sampleValVec[LPC_FILTERORDER]; + + /* initialization of coefficients */ + + for (k=0; k<LPC_FILTERORDER+1; k++){ + numerator[k] = syntDenum[LPC_FILTERORDER-k]; + } + + /* decoding of the maximum value */ + + maxVal = WebRtcIlbcfix_kFrgQuantMod[idxForMax]; + + /* decoding of the sample values */ + tmp1 = sampleVal; + tmp2 = &idxVec[len-1]; + + if (idxForMax<37) { + for(k=0; k<len; k++){ + /*the shifting is due to the Q13 in sq4_fixQ13[i], also the adding of 2097152 (= 0.5 << 22) + maxVal is in Q8 and result is in Q(-1) */ + *tmp1 = (int16_t)((maxVal * WebRtcIlbcfix_kStateSq3[*tmp2] + 2097152) >> + 22); + tmp1++; + tmp2--; + } + } else if (idxForMax<59) { + for(k=0; k<len; k++){ + /*the shifting is due to the Q13 in sq4_fixQ13[i], also the adding of 262144 (= 0.5 << 19) + maxVal is in Q5 and result is in Q(-1) */ + *tmp1 = (int16_t)((maxVal * WebRtcIlbcfix_kStateSq3[*tmp2] + 262144) >> + 19); + tmp1++; + tmp2--; + } + } else { + for(k=0; k<len; k++){ + /*the shifting is due to the Q13 in sq4_fixQ13[i], also the adding of 65536 (= 0.5 << 17) + maxVal is in Q3 and result is in Q(-1) */ + *tmp1 = (int16_t)((maxVal * WebRtcIlbcfix_kStateSq3[*tmp2] + 65536) >> + 17); + tmp1++; + tmp2--; + } + } + + /* Set the rest of the data to zero */ + WebRtcSpl_MemSetW16(&sampleVal[len], 0, len); + + /* circular convolution with all-pass filter */ + + /* Set the state to zero */ + WebRtcSpl_MemSetW16(sampleValVec, 0, (LPC_FILTERORDER)); + + /* Run MA filter + AR filter */ + WebRtcSpl_FilterMAFastQ12( + sampleVal, sampleMa, + numerator, LPC_FILTERORDER+1, len + LPC_FILTERORDER); + WebRtcSpl_MemSetW16(&sampleMa[len + LPC_FILTERORDER], 0, (len - LPC_FILTERORDER)); + WebRtcSpl_FilterARFastQ12( + sampleMa, sampleAr, + syntDenum, LPC_FILTERORDER+1, 2 * len); + + tmp1 = &sampleAr[len-1]; + tmp2 = &sampleAr[2*len-1]; + tmp3 = Out_fix; + for(k=0;k<len;k++){ + (*tmp3) = (*tmp1) + (*tmp2); + tmp1--; + tmp2--; + tmp3++; + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_construct.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_construct.h new file mode 100644 index 0000000000..4c3011937d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_construct.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_StateConstruct.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_STATE_CONSTRUCT_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_STATE_CONSTRUCT_H_ + +#include <stddef.h> +#include <stdint.h> + +/*----------------------------------------------------------------* + * Generate the start state from the quantized indexes + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_StateConstruct( + size_t idxForMax, /* (i) 6-bit index for the quantization of + max amplitude */ + int16_t* idxVec, /* (i) vector of quantization indexes */ + int16_t* syntDenum, /* (i) synthesis filter denumerator */ + int16_t* Out_fix, /* (o) the decoded state vector */ + size_t len /* (i) length of a state vector */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_search.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_search.c new file mode 100644 index 0000000000..7227ac9d45 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_search.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_StateSearch.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/state_search.h" + +#include "modules/audio_coding/codecs/ilbc/abs_quant.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * encoding of start state + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_StateSearch( + IlbcEncoder *iLBCenc_inst, + /* (i) Encoder instance */ + iLBC_bits *iLBC_encbits,/* (i/o) Encoded bits (output idxForMax + and idxVec, input state_first) */ + int16_t *residual, /* (i) target residual vector */ + int16_t *syntDenum, /* (i) lpc synthesis filter */ + int16_t *weightDenum /* (i) weighting filter denuminator */ + ) { + size_t k, index; + int16_t maxVal; + int16_t scale, shift; + int32_t maxValsq; + int16_t scaleRes; + int16_t max; + int i; + /* Stack based */ + int16_t numerator[1+LPC_FILTERORDER]; + int16_t residualLongVec[2*STATE_SHORT_LEN_30MS+LPC_FILTERORDER]; + int16_t sampleMa[2*STATE_SHORT_LEN_30MS]; + int16_t *residualLong = &residualLongVec[LPC_FILTERORDER]; + int16_t *sampleAr = residualLong; + + /* Scale to maximum 12 bits to avoid saturation in circular convolution filter */ + max = WebRtcSpl_MaxAbsValueW16(residual, iLBCenc_inst->state_short_len); + scaleRes = WebRtcSpl_GetSizeInBits(max)-12; + scaleRes = WEBRTC_SPL_MAX(0, scaleRes); + /* Set up the filter coefficients for the circular convolution */ + for (i=0; i<LPC_FILTERORDER+1; i++) { + numerator[i] = (syntDenum[LPC_FILTERORDER-i]>>scaleRes); + } + + /* Copy the residual to a temporary buffer that we can filter + * and set the remaining samples to zero. + */ + WEBRTC_SPL_MEMCPY_W16(residualLong, residual, iLBCenc_inst->state_short_len); + WebRtcSpl_MemSetW16(residualLong + iLBCenc_inst->state_short_len, 0, iLBCenc_inst->state_short_len); + + /* Run the Zero-Pole filter (Ciurcular convolution) */ + WebRtcSpl_MemSetW16(residualLongVec, 0, LPC_FILTERORDER); + WebRtcSpl_FilterMAFastQ12(residualLong, sampleMa, numerator, + LPC_FILTERORDER + 1, + iLBCenc_inst->state_short_len + LPC_FILTERORDER); + WebRtcSpl_MemSetW16(&sampleMa[iLBCenc_inst->state_short_len + LPC_FILTERORDER], 0, iLBCenc_inst->state_short_len - LPC_FILTERORDER); + + WebRtcSpl_FilterARFastQ12( + sampleMa, sampleAr, + syntDenum, LPC_FILTERORDER+1, 2 * iLBCenc_inst->state_short_len); + + for(k=0;k<iLBCenc_inst->state_short_len;k++){ + sampleAr[k] += sampleAr[k+iLBCenc_inst->state_short_len]; + } + + /* Find maximum absolute value in the vector */ + maxVal=WebRtcSpl_MaxAbsValueW16(sampleAr, iLBCenc_inst->state_short_len); + + /* Find the best index */ + + if ((((int32_t)maxVal)<<scaleRes)<23170) { + maxValsq=((int32_t)maxVal*maxVal)<<(2+2*scaleRes); + } else { + maxValsq=(int32_t)WEBRTC_SPL_WORD32_MAX; + } + + index=0; + for (i=0;i<63;i++) { + + if (maxValsq>=WebRtcIlbcfix_kChooseFrgQuant[i]) { + index=i+1; + } else { + i=63; + } + } + iLBC_encbits->idxForMax=index; + + /* Rescale the vector before quantization */ + scale=WebRtcIlbcfix_kScale[index]; + + if (index<27) { /* scale table is in Q16, fout[] is in Q(-1) and we want the result to be in Q11 */ + shift=4; + } else { /* scale table is in Q21, fout[] is in Q(-1) and we want the result to be in Q11 */ + shift=9; + } + + /* Set up vectors for AbsQuant and rescale it with the scale factor */ + WebRtcSpl_ScaleVectorWithSat(sampleAr, sampleAr, scale, + iLBCenc_inst->state_short_len, (int16_t)(shift-scaleRes)); + + /* Quantize the values in fout[] */ + WebRtcIlbcfix_AbsQuant(iLBCenc_inst, iLBC_encbits, sampleAr, weightDenum); + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_search.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_search.h new file mode 100644 index 0000000000..6469138a0e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/state_search.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_StateSearch.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_STATE_SEARCH_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_STATE_SEARCH_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * encoding of start state + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_StateSearch( + IlbcEncoder* iLBCenc_inst, + /* (i) Encoder instance */ + iLBC_bits* iLBC_encbits, /* (i/o) Encoded bits (output idxForMax + and idxVec, input state_first) */ + int16_t* residual, /* (i) target residual vector */ + int16_t* syntDenum, /* (i) lpc synthesis filter */ + int16_t* weightDenum /* (i) weighting filter denuminator */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/swap_bytes.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/swap_bytes.c new file mode 100644 index 0000000000..bbafc1a2ed --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/swap_bytes.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SwapBytes.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/swap_bytes.h" + +/*----------------------------------------------------------------* + * Swap bytes (to simplify operations on Little Endian machines) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SwapBytes( + const uint16_t* input, /* (i) the sequence to swap */ + size_t wordLength, /* (i) number or uint16_t to swap */ + uint16_t* output /* (o) the swapped sequence */ + ) { + size_t k; + for (k = wordLength; k > 0; k--) { + *output++ = (*input >> 8)|(*input << 8); + input++; + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/swap_bytes.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/swap_bytes.h new file mode 100644 index 0000000000..c59bf3068a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/swap_bytes.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_SwapBytes.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SWAP_BYTES_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_SWAP_BYTES_H_ + +#include <stddef.h> +#include <stdint.h> + +/*----------------------------------------------------------------* + * Swap bytes (to simplify operations on Little Endian machines) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_SwapBytes( + const uint16_t* input, /* (i) the sequence to swap */ + size_t wordLength, /* (i) number or uint16_t to swap */ + uint16_t* output /* (o) the swapped sequence */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/empty.cc b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/empty.cc new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/empty.cc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/iLBC_test.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/iLBC_test.c new file mode 100644 index 0000000000..e0ca075eda --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/iLBC_test.c @@ -0,0 +1,238 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + iLBC_test.c + +******************************************************************/ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "modules/audio_coding/codecs/ilbc/ilbc.h" + +/*---------------------------------------------------------------* + * Main program to test iLBC encoding and decoding + * + * Usage: + * exefile_name.exe <infile> <bytefile> <outfile> <channel> + * + * <infile> : Input file, speech for encoder (16-bit pcm file) + * <bytefile> : Bit stream output from the encoder + * <outfile> : Output file, decoded speech (16-bit pcm file) + * <channel> : Bit error file, optional (16-bit) + * 1 - Packet received correctly + * 0 - Packet Lost + * + *--------------------------------------------------------------*/ + +#define BLOCKL_MAX 240 +#define ILBCNOOFWORDS_MAX 25 + + +int main(int argc, char* argv[]) +{ + + FILE *ifileid,*efileid,*ofileid, *cfileid; + int16_t data[BLOCKL_MAX]; + uint8_t encoded_data[2 * ILBCNOOFWORDS_MAX]; + int16_t decoded_data[BLOCKL_MAX]; + int len_int, mode; + short pli; + int blockcount = 0; + size_t frameLen, len, len_i16s; + int16_t speechType; + IlbcEncoderInstance *Enc_Inst; + IlbcDecoderInstance *Dec_Inst; + +#ifdef __ILBC_WITH_40BITACC + /* Doublecheck that long long exists */ + if (sizeof(long)>=sizeof(long long)) { + fprintf(stderr, "40-bit simulation is not be supported on this platform\n"); + exit(0); + } +#endif + + /* get arguments and open files */ + + if ((argc!=5) && (argc!=6)) { + fprintf(stderr, + "\n*-----------------------------------------------*\n"); + fprintf(stderr, + " %s <20,30> input encoded decoded (channel)\n\n", + argv[0]); + fprintf(stderr, + " mode : Frame size for the encoding/decoding\n"); + fprintf(stderr, + " 20 - 20 ms\n"); + fprintf(stderr, + " 30 - 30 ms\n"); + fprintf(stderr, + " input : Speech for encoder (16-bit pcm file)\n"); + fprintf(stderr, + " encoded : Encoded bit stream\n"); + fprintf(stderr, + " decoded : Decoded speech (16-bit pcm file)\n"); + fprintf(stderr, + " channel : Packet loss pattern, optional (16-bit)\n"); + fprintf(stderr, + " 1 - Packet received correctly\n"); + fprintf(stderr, + " 0 - Packet Lost\n"); + fprintf(stderr, + "*-----------------------------------------------*\n\n"); + exit(1); + } + mode=atoi(argv[1]); + if (mode != 20 && mode != 30) { + fprintf(stderr,"Wrong mode %s, must be 20, or 30\n", + argv[1]); + exit(2); + } + if ( (ifileid=fopen(argv[2],"rb")) == NULL) { + fprintf(stderr,"Cannot open input file %s\n", argv[2]); + exit(2);} + if ( (efileid=fopen(argv[3],"wb")) == NULL) { + fprintf(stderr, "Cannot open encoded file file %s\n", + argv[3]); exit(1);} + if ( (ofileid=fopen(argv[4],"wb")) == NULL) { + fprintf(stderr, "Cannot open decoded file %s\n", + argv[4]); exit(1);} + if (argc==6) { + if( (cfileid=fopen(argv[5],"rb")) == NULL) { + fprintf(stderr, "Cannot open channel file %s\n", + argv[5]); + exit(1); + } + } else { + cfileid=NULL; + } + + /* print info */ + + fprintf(stderr, "\n"); + fprintf(stderr, + "*---------------------------------------------------*\n"); + fprintf(stderr, + "* *\n"); + fprintf(stderr, + "* iLBC test program *\n"); + fprintf(stderr, + "* *\n"); + fprintf(stderr, + "* *\n"); + fprintf(stderr, + "*---------------------------------------------------*\n"); + fprintf(stderr,"\nMode : %2d ms\n", mode); + fprintf(stderr,"Input file : %s\n", argv[2]); + fprintf(stderr,"Encoded file : %s\n", argv[3]); + fprintf(stderr,"Output file : %s\n", argv[4]); + if (argc==6) { + fprintf(stderr,"Channel file : %s\n", argv[5]); + } + fprintf(stderr,"\n"); + + /* Create structs */ + WebRtcIlbcfix_EncoderCreate(&Enc_Inst); + WebRtcIlbcfix_DecoderCreate(&Dec_Inst); + + + /* Initialization */ + + WebRtcIlbcfix_EncoderInit(Enc_Inst, mode); + WebRtcIlbcfix_DecoderInit(Dec_Inst, mode); + frameLen = (size_t)(mode*8); + + /* loop over input blocks */ + + while (fread(data,sizeof(int16_t),frameLen,ifileid) == frameLen) { + + blockcount++; + + /* encoding */ + + fprintf(stderr, "--- Encoding block %i --- ",blockcount); + len_int = WebRtcIlbcfix_Encode(Enc_Inst, data, frameLen, encoded_data); + if (len_int < 0) { + fprintf(stderr, "Error encoding\n"); + exit(0); + } + len = (size_t)len_int; + fprintf(stderr, "\r"); + + /* write byte file */ + + len_i16s = (len + 1) / sizeof(int16_t); + if (fwrite(encoded_data, sizeof(int16_t), len_i16s, efileid) != len_i16s) { + return -1; + } + + /* get channel data if provided */ + if (argc==6) { + if (fread(&pli, sizeof(int16_t), 1, cfileid)) { + if ((pli!=0)&&(pli!=1)) { + fprintf(stderr, "Error in channel file\n"); + exit(0); + } + if (pli==0) { + /* Packet loss -> remove info from frame */ + memset(encoded_data, 0, + sizeof(int16_t)*ILBCNOOFWORDS_MAX); + } + } else { + fprintf(stderr, "Error. Channel file too short\n"); + exit(0); + } + } else { + pli=1; + } + + /* decoding */ + + fprintf(stderr, "--- Decoding block %i --- ",blockcount); + if (pli==1) { + len_int=WebRtcIlbcfix_Decode(Dec_Inst, encoded_data, + len, decoded_data,&speechType); + if (len_int < 0) { + fprintf(stderr, "Error decoding\n"); + exit(0); + } + len = (size_t)len_int; + } else { + len=WebRtcIlbcfix_DecodePlc(Dec_Inst, decoded_data, 1); + } + fprintf(stderr, "\r"); + + /* write output file */ + + if (fwrite(decoded_data, sizeof(int16_t), len, ofileid) != len) { + return -1; + } + } + + /* close files */ + + fclose(ifileid); fclose(efileid); fclose(ofileid); + if (argc==6) { + fclose(cfileid); + } + + /* Free structs */ + WebRtcIlbcfix_EncoderFree(Enc_Inst); + WebRtcIlbcfix_DecoderFree(Dec_Inst); + + + printf("\nDone with simulation\n\n"); + + return(0); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/iLBC_testLib.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/iLBC_testLib.c new file mode 100644 index 0000000000..132f3bdb37 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/iLBC_testLib.c @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + +iLBC Speech Coder ANSI-C Source Code + +iLBC_test.c + +******************************************************************/ + +#include <math.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <time.h> +#include "modules/audio_coding/codecs/ilbc/ilbc.h" + +//#define JUNK_DATA +#ifdef JUNK_DATA +#define SEED_FILE "randseed.txt" +#endif + + +/*----------------------------------------------------------------* +* Main program to test iLBC encoding and decoding +* +* Usage: +* exefile_name.exe <infile> <bytefile> <outfile> +* +*---------------------------------------------------------------*/ + +int main(int argc, char* argv[]) +{ + FILE *ifileid,*efileid,*ofileid, *chfileid; + short encoded_data[55], data[240], speechType; + int len_int, mode; + short pli; + size_t len, readlen; + int blockcount = 0; + + IlbcEncoderInstance *Enc_Inst; + IlbcDecoderInstance *Dec_Inst; +#ifdef JUNK_DATA + size_t i; + FILE *seedfile; + unsigned int random_seed = (unsigned int) time(NULL);//1196764538 +#endif + + /* Create structs */ + WebRtcIlbcfix_EncoderCreate(&Enc_Inst); + WebRtcIlbcfix_DecoderCreate(&Dec_Inst); + + /* get arguments and open files */ + + if (argc != 6 ) { + fprintf(stderr, "%s mode inputfile bytefile outputfile channelfile\n", + argv[0]); + fprintf(stderr, "Example:\n"); + fprintf(stderr, "%s <30,20> in.pcm byte.dat out.pcm T30.0.dat\n", argv[0]); + exit(1); + } + mode=atoi(argv[1]); + if (mode != 20 && mode != 30) { + fprintf(stderr,"Wrong mode %s, must be 20, or 30\n", argv[1]); + exit(2); + } + if ( (ifileid=fopen(argv[2],"rb")) == NULL) { + fprintf(stderr,"Cannot open input file %s\n", argv[2]); + exit(2);} + if ( (efileid=fopen(argv[3],"wb")) == NULL) { + fprintf(stderr, "Cannot open channelfile file %s\n", + argv[3]); exit(3);} + if( (ofileid=fopen(argv[4],"wb")) == NULL) { + fprintf(stderr, "Cannot open output file %s\n", + argv[4]); exit(3);} + if ( (chfileid=fopen(argv[5],"rb")) == NULL) { + fprintf(stderr,"Cannot open channel file file %s\n", argv[5]); + exit(2); + } + /* print info */ + fprintf(stderr, "\n"); + fprintf(stderr, + "*---------------------------------------------------*\n"); + fprintf(stderr, + "* *\n"); + fprintf(stderr, + "* iLBCtest *\n"); + fprintf(stderr, + "* *\n"); + fprintf(stderr, + "* *\n"); + fprintf(stderr, + "*---------------------------------------------------*\n"); +#ifdef SPLIT_10MS + fprintf(stderr,"\n10ms split with raw mode: %2d ms\n", mode); +#else + fprintf(stderr,"\nMode : %2d ms\n", mode); +#endif + fprintf(stderr,"\nInput file : %s\n", argv[2]); + fprintf(stderr,"Coded file : %s\n", argv[3]); + fprintf(stderr,"Output file : %s\n\n", argv[4]); + fprintf(stderr,"Channel file : %s\n\n", argv[5]); + +#ifdef JUNK_DATA + srand(random_seed); + + if ( (seedfile = fopen(SEED_FILE, "a+t") ) == NULL ) { + fprintf(stderr, "Error: Could not open file %s\n", SEED_FILE); + } + else { + fprintf(seedfile, "%u\n", random_seed); + fclose(seedfile); + } +#endif + + /* Initialization */ + WebRtcIlbcfix_EncoderInit(Enc_Inst, mode); + WebRtcIlbcfix_DecoderInit(Dec_Inst, mode); + + /* loop over input blocks */ +#ifdef SPLIT_10MS + readlen = 80; +#else + readlen = (size_t)(mode << 3); +#endif + while(fread(data, sizeof(short), readlen, ifileid) == readlen) { + blockcount++; + + /* encoding */ + fprintf(stderr, "--- Encoding block %i --- ",blockcount); + len_int=WebRtcIlbcfix_Encode(Enc_Inst, data, readlen, encoded_data); + if (len_int < 0) { + fprintf(stderr, "Error encoding\n"); + exit(0); + } + len = (size_t)len_int; + fprintf(stderr, "\r"); + +#ifdef JUNK_DATA + for ( i = 0; i < len; i++) { + encoded_data[i] = (short) (encoded_data[i] + (short) rand()); + } +#endif + /* write byte file */ + if(len != 0){ //len may be 0 in 10ms split case + fwrite(encoded_data,1,len,efileid); + + /* get channel data if provided */ + if (argc==6) { + if (fread(&pli, sizeof(int16_t), 1, chfileid)) { + if ((pli!=0)&&(pli!=1)) { + fprintf(stderr, "Error in channel file\n"); + exit(0); + } + if (pli==0) { + /* Packet loss -> remove info from frame */ + memset(encoded_data, 0, sizeof(int16_t)*25); + } + } else { + fprintf(stderr, "Error. Channel file too short\n"); + exit(0); + } + } else { + pli=1; + } + + /* decoding */ + fprintf(stderr, "--- Decoding block %i --- ",blockcount); + if (pli==1) { + len_int = WebRtcIlbcfix_Decode(Dec_Inst, encoded_data, len, data, + &speechType); + if (len_int < 0) { + fprintf(stderr, "Error decoding\n"); + exit(0); + } + len = (size_t)len_int; + } else { + len=WebRtcIlbcfix_DecodePlc(Dec_Inst, data, 1); + } + fprintf(stderr, "\r"); + + /* write output file */ + fwrite(data,sizeof(short),len,ofileid); + } + } + +#ifdef JUNK_DATA + if ( (seedfile = fopen(SEED_FILE, "a+t") ) == NULL ) { + fprintf(stderr, "Error: Could not open file %s\n", SEED_FILE); + } + else { + fprintf(seedfile, "ok\n\n"); + fclose(seedfile); + } +#endif + + /* free structs */ + WebRtcIlbcfix_EncoderFree(Enc_Inst); + WebRtcIlbcfix_DecoderFree(Dec_Inst); + + /* close files */ + fclose(ifileid); + fclose(efileid); + fclose(ofileid); + + return 0; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/iLBC_testprogram.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/iLBC_testprogram.c new file mode 100644 index 0000000000..a62a42edf6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/test/iLBC_testprogram.c @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + iLBC_test.c + +******************************************************************/ + +#include <math.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "modules/audio_coding/codecs/ilbc/defines.h" +#include "modules/audio_coding/codecs/ilbc/nit_encode.h" +#include "modules/audio_coding/codecs/ilbc/encode.h" +#include "modules/audio_coding/codecs/ilbc/init_decode.h" +#include "modules/audio_coding/codecs/ilbc/decode.h" +#include "modules/audio_coding/codecs/ilbc/constants.h" +#include "modules/audio_coding/codecs/ilbc/ilbc.h" + +#define ILBCNOOFWORDS_MAX (NO_OF_BYTES_30MS)/2 + +/* Runtime statistics */ +#include <time.h> +/* #define CLOCKS_PER_SEC 1000 */ + +/*----------------------------------------------------------------* + * Encoder interface function + *---------------------------------------------------------------*/ + +short encode( /* (o) Number of bytes encoded */ + IlbcEncoder *iLBCenc_inst, /* (i/o) Encoder instance */ + int16_t *encoded_data, /* (o) The encoded bytes */ + int16_t *data /* (i) The signal block to encode */ + ){ + + /* do the actual encoding */ + WebRtcIlbcfix_Encode((uint16_t *)encoded_data, data, iLBCenc_inst); + + return (iLBCenc_inst->no_of_bytes); +} + +/*----------------------------------------------------------------* + * Decoder interface function + *---------------------------------------------------------------*/ + +short decode( /* (o) Number of decoded samples */ + IlbcDecoder *iLBCdec_inst, /* (i/o) Decoder instance */ + short *decoded_data, /* (o) Decoded signal block */ + short *encoded_data, /* (i) Encoded bytes */ + short mode /* (i) 0=PL, 1=Normal */ + ){ + + /* check if mode is valid */ + + if (mode<0 || mode>1) { + printf("\nERROR - Wrong mode - 0, 1 allowed\n"); exit(3);} + + /* do actual decoding of block */ + + WebRtcIlbcfix_Decode(decoded_data, (uint16_t *)encoded_data, + iLBCdec_inst, mode); + + return (iLBCdec_inst->blockl); +} + +/*----------------------------------------------------------------* + * Main program to test iLBC encoding and decoding + * + * Usage: + * exefile_name.exe <infile> <bytefile> <outfile> <channelfile> + * + *---------------------------------------------------------------*/ + +#define MAXFRAMES 10000 +#define MAXFILELEN (BLOCKL_MAX*MAXFRAMES) + +int main(int argc, char* argv[]) +{ + + /* Runtime statistics */ + + float starttime1, starttime2; + float runtime1, runtime2; + float outtime; + + FILE *ifileid,*efileid,*ofileid, *chfileid; + short *inputdata, *encodeddata, *decodeddata; + short *channeldata; + int blockcount = 0, noOfBlocks=0, i, noOfLostBlocks=0; + short mode; + IlbcEncoder Enc_Inst; + IlbcDecoder Dec_Inst; + + short frameLen; + short count; +#ifdef SPLIT_10MS + short size; +#endif + + inputdata=(short*) malloc(MAXFILELEN*sizeof(short)); + if (inputdata==NULL) { + fprintf(stderr,"Could not allocate memory for vector\n"); + exit(0); + } + encodeddata=(short*) malloc(ILBCNOOFWORDS_MAX*MAXFRAMES*sizeof(short)); + if (encodeddata==NULL) { + fprintf(stderr,"Could not allocate memory for vector\n"); + free(inputdata); + exit(0); + } + decodeddata=(short*) malloc(MAXFILELEN*sizeof(short)); + if (decodeddata==NULL) { + fprintf(stderr,"Could not allocate memory for vector\n"); + free(inputdata); + free(encodeddata); + exit(0); + } + channeldata=(short*) malloc(MAXFRAMES*sizeof(short)); + if (channeldata==NULL) { + fprintf(stderr,"Could not allocate memory for vector\n"); + free(inputdata); + free(encodeddata); + free(decodeddata); + exit(0); + } + + /* get arguments and open files */ + + if (argc != 6 ) { + fprintf(stderr, "%s mode inputfile bytefile outputfile channelfile\n", + argv[0]); + fprintf(stderr, "Example:\n"); + fprintf(stderr, "%s <30,20> in.pcm byte.dat out.pcm T30.0.dat\n", argv[0]); + exit(1); + } + mode=atoi(argv[1]); + if (mode != 20 && mode != 30) { + fprintf(stderr,"Wrong mode %s, must be 20, or 30\n", argv[1]); + exit(2); + } + if ( (ifileid=fopen(argv[2],"rb")) == NULL) { + fprintf(stderr,"Cannot open input file %s\n", argv[2]); + exit(2);} + if ( (efileid=fopen(argv[3],"wb")) == NULL) { + fprintf(stderr, "Cannot open channelfile file %s\n", + argv[3]); exit(3);} + if( (ofileid=fopen(argv[4],"wb")) == NULL) { + fprintf(stderr, "Cannot open output file %s\n", + argv[4]); exit(3);} + if ( (chfileid=fopen(argv[5],"rb")) == NULL) { + fprintf(stderr,"Cannot open channel file file %s\n", argv[5]); + exit(2);} + + + /* print info */ +#ifndef PRINT_MIPS + fprintf(stderr, "\n"); + fprintf(stderr, + "*---------------------------------------------------*\n"); + fprintf(stderr, + "* *\n"); + fprintf(stderr, + "* iLBCtest *\n"); + fprintf(stderr, + "* *\n"); + fprintf(stderr, + "* *\n"); + fprintf(stderr, + "*---------------------------------------------------*\n"); +#ifdef SPLIT_10MS + fprintf(stderr,"\n10ms split with raw mode: %2d ms\n", mode); +#else + fprintf(stderr,"\nMode : %2d ms\n", mode); +#endif + fprintf(stderr,"\nInput file : %s\n", argv[2]); + fprintf(stderr,"Coded file : %s\n", argv[3]); + fprintf(stderr,"Output file : %s\n\n", argv[4]); + fprintf(stderr,"Channel file : %s\n\n", argv[5]); +#endif + + /* Initialization */ + + WebRtcIlbcfix_EncoderInit(&Enc_Inst, mode); + WebRtcIlbcfix_DecoderInit(&Dec_Inst, mode, 1); + + /* extract the input file and channel file */ + +#ifdef SPLIT_10MS + frameLen = (mode==20)? 80:160; + fread(Enc_Inst.past_samples, sizeof(short), frameLen, ifileid); + Enc_Inst.section = 0; + + while( fread(&inputdata[noOfBlocks*80], sizeof(short), + 80, ifileid) == 80 ) { + noOfBlocks++; + } + + noOfBlocks += frameLen/80; + frameLen = 80; +#else + frameLen = Enc_Inst.blockl; + + while( fread(&inputdata[noOfBlocks*Enc_Inst.blockl],sizeof(short), + Enc_Inst.blockl,ifileid)==(uint16_t)Enc_Inst.blockl){ + noOfBlocks++; + } +#endif + + + while ((fread(&channeldata[blockcount],sizeof(short), 1,chfileid)==1) + && ( blockcount < noOfBlocks/(Enc_Inst.blockl/frameLen) )) { + blockcount++; + } + + if ( blockcount < noOfBlocks/(Enc_Inst.blockl/frameLen) ) { + fprintf(stderr,"Channel file %s is too short\n", argv[4]); + free(inputdata); + free(encodeddata); + free(decodeddata); + free(channeldata); + exit(0); + } + + count=0; + + /* Runtime statistics */ + + starttime1 = clock()/(float)CLOCKS_PER_SEC; + + /* Encoding loop */ +#ifdef PRINT_MIPS + printf("-1 -1\n"); +#endif + +#ifdef SPLIT_10MS + /* "Enc_Inst.section != 0" is to make sure we run through full + lengths of all vectors for 10ms split mode. + */ + // while( (count < noOfBlocks) || (Enc_Inst.section != 0) ) { + while( count < blockcount * (Enc_Inst.blockl/frameLen) ) { + + encode(&Enc_Inst, &encodeddata[Enc_Inst.no_of_words * + (count/(Enc_Inst.nsub/2))], + &inputdata[frameLen * count] ); +#else + while (count < noOfBlocks) { + encode( &Enc_Inst, &encodeddata[Enc_Inst.no_of_words * count], + &inputdata[frameLen * count] ); +#endif + +#ifdef PRINT_MIPS + printf("-1 -1\n"); +#endif + + count++; + } + + count=0; + + /* Runtime statistics */ + + starttime2=clock()/(float)CLOCKS_PER_SEC; + runtime1 = (float)(starttime2-starttime1); + + /* Decoding loop */ + + while (count < blockcount) { + if (channeldata[count]==1) { + /* Normal decoding */ + decode(&Dec_Inst, &decodeddata[count * Dec_Inst.blockl], + &encodeddata[Dec_Inst.no_of_words * count], 1); + } else if (channeldata[count]==0) { + /* PLC */ + short emptydata[ILBCNOOFWORDS_MAX]; + memset(emptydata, 0, Dec_Inst.no_of_words*sizeof(short)); + decode(&Dec_Inst, &decodeddata[count*Dec_Inst.blockl], + emptydata, 0); + noOfLostBlocks++; + } else { + printf("Error in channel file (values have to be either 1 or 0)\n"); + exit(0); + } +#ifdef PRINT_MIPS + printf("-1 -1\n"); +#endif + + count++; + } + + /* Runtime statistics */ + + runtime2 = (float)(clock()/(float)CLOCKS_PER_SEC-starttime2); + + outtime = (float)((float)blockcount* + (float)mode/1000.0); + +#ifndef PRINT_MIPS + printf("\nLength of speech file: %.1f s\n", outtime); + printf("Lost frames : %.1f%%\n\n", 100*(float)noOfLostBlocks/(float)blockcount); + + printf("Time to run iLBC_encode+iLBC_decode:"); + printf(" %.1f s (%.1f%% of realtime)\n", runtime1+runtime2, + (100*(runtime1+runtime2)/outtime)); + + printf("Time in iLBC_encode :"); + printf(" %.1f s (%.1f%% of total runtime)\n", + runtime1, 100.0*runtime1/(runtime1+runtime2)); + + printf("Time in iLBC_decode :"); + printf(" %.1f s (%.1f%% of total runtime)\n\n", + runtime2, 100.0*runtime2/(runtime1+runtime2)); +#endif + + /* Write data to files */ + for (i=0; i<blockcount; i++) { + fwrite(&encodeddata[i*Enc_Inst.no_of_words], sizeof(short), + Enc_Inst.no_of_words, efileid); + } + for (i=0;i<blockcount;i++) { + fwrite(&decodeddata[i*Enc_Inst.blockl],sizeof(short),Enc_Inst.blockl,ofileid); + } + + /* return memory and close files */ + + free(inputdata); + free(encodeddata); + free(decodeddata); + free(channeldata); + fclose(ifileid); fclose(efileid); fclose(ofileid); + return(0); + } diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/unpack_bits.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/unpack_bits.c new file mode 100644 index 0000000000..a9a0147b9d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/unpack_bits.c @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_UnpackBits.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/unpack_bits.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * unpacking of bits from bitstream, i.e., vector of bytes + *---------------------------------------------------------------*/ + +int16_t WebRtcIlbcfix_UnpackBits( /* (o) "Empty" frame indicator */ + const uint16_t *bitstream, /* (i) The packatized bitstream */ + iLBC_bits *enc_bits, /* (o) Paramerers from bitstream */ + int16_t mode /* (i) Codec mode (20 or 30) */ + ) { + const uint16_t *bitstreamPtr; + int i, k; + int16_t *tmpPtr; + + bitstreamPtr=bitstream; + + /* First int16_t */ + enc_bits->lsf[0] = (*bitstreamPtr)>>10; /* Bit 0..5 */ + enc_bits->lsf[1] = ((*bitstreamPtr)>>3)&0x7F; /* Bit 6..12 */ + enc_bits->lsf[2] = ((*bitstreamPtr)&0x7)<<4; /* Bit 13..15 */ + bitstreamPtr++; + /* Second int16_t */ + enc_bits->lsf[2] |= ((*bitstreamPtr)>>12)&0xF; /* Bit 0..3 */ + + if (mode==20) { + enc_bits->startIdx = ((*bitstreamPtr)>>10)&0x3; /* Bit 4..5 */ + enc_bits->state_first = ((*bitstreamPtr)>>9)&0x1; /* Bit 6 */ + enc_bits->idxForMax = ((*bitstreamPtr)>>3)&0x3F; /* Bit 7..12 */ + enc_bits->cb_index[0] = ((*bitstreamPtr)&0x7)<<4; /* Bit 13..15 */ + bitstreamPtr++; + /* Third int16_t */ + enc_bits->cb_index[0] |= ((*bitstreamPtr)>>12)&0xE; /* Bit 0..2 */ + enc_bits->gain_index[0] = ((*bitstreamPtr)>>8)&0x18; /* Bit 3..4 */ + enc_bits->gain_index[1] = ((*bitstreamPtr)>>7)&0x8; /* Bit 5 */ + enc_bits->cb_index[3] = ((*bitstreamPtr)>>2)&0xFE; /* Bit 6..12 */ + enc_bits->gain_index[3] = ((*bitstreamPtr)<<2)&0x10; /* Bit 13 */ + enc_bits->gain_index[4] = ((*bitstreamPtr)<<2)&0x8; /* Bit 14 */ + enc_bits->gain_index[6] = ((*bitstreamPtr)<<4)&0x10; /* Bit 15 */ + } else { /* mode==30 */ + enc_bits->lsf[3] = ((*bitstreamPtr)>>6)&0x3F; /* Bit 4..9 */ + enc_bits->lsf[4] = ((*bitstreamPtr)<<1)&0x7E; /* Bit 10..15 */ + bitstreamPtr++; + /* Third int16_t */ + enc_bits->lsf[4] |= ((*bitstreamPtr)>>15)&0x1; /* Bit 0 */ + enc_bits->lsf[5] = ((*bitstreamPtr)>>8)&0x7F; /* Bit 1..7 */ + enc_bits->startIdx = ((*bitstreamPtr)>>5)&0x7; /* Bit 8..10 */ + enc_bits->state_first = ((*bitstreamPtr)>>4)&0x1; /* Bit 11 */ + enc_bits->idxForMax = ((*bitstreamPtr)<<2)&0x3C; /* Bit 12..15 */ + bitstreamPtr++; + /* 4:th int16_t */ + enc_bits->idxForMax |= ((*bitstreamPtr)>>14)&0x3; /* Bit 0..1 */ + enc_bits->cb_index[0] = ((*bitstreamPtr)>>7)&0x78; /* Bit 2..5 */ + enc_bits->gain_index[0] = ((*bitstreamPtr)>>5)&0x10; /* Bit 6 */ + enc_bits->gain_index[1] = ((*bitstreamPtr)>>5)&0x8; /* Bit 7 */ + enc_bits->cb_index[3] = ((*bitstreamPtr))&0xFC; /* Bit 8..13 */ + enc_bits->gain_index[3] = ((*bitstreamPtr)<<3)&0x10; /* Bit 14 */ + enc_bits->gain_index[4] = ((*bitstreamPtr)<<3)&0x8; /* Bit 15 */ + } + /* Class 2 bits of ULP */ + /* 4:th to 6:th int16_t for 20 ms case + 5:th to 7:th int16_t for 30 ms case */ + bitstreamPtr++; + tmpPtr=enc_bits->idxVec; + for (k=0; k<3; k++) { + for (i=15; i>=0; i--) { + (*tmpPtr) = (((*bitstreamPtr)>>i)<<2)&0x4; + /* Bit 15-i */ + tmpPtr++; + } + bitstreamPtr++; + } + + if (mode==20) { + /* 7:th int16_t */ + for (i=15; i>6; i--) { + (*tmpPtr) = (((*bitstreamPtr)>>i)<<2)&0x4; + /* Bit 15-i */ + tmpPtr++; + } + enc_bits->gain_index[1] |= ((*bitstreamPtr)>>4)&0x4; /* Bit 9 */ + enc_bits->gain_index[3] |= ((*bitstreamPtr)>>2)&0xC; /* Bit 10..11 */ + enc_bits->gain_index[4] |= ((*bitstreamPtr)>>1)&0x4; /* Bit 12 */ + enc_bits->gain_index[6] |= ((*bitstreamPtr)<<1)&0x8; /* Bit 13 */ + enc_bits->gain_index[7] = ((*bitstreamPtr)<<2)&0xC; /* Bit 14..15 */ + + } else { /* mode==30 */ + /* 8:th int16_t */ + for (i=15; i>5; i--) { + (*tmpPtr) = (((*bitstreamPtr)>>i)<<2)&0x4; + /* Bit 15-i */ + tmpPtr++; + } + enc_bits->cb_index[0] |= ((*bitstreamPtr)>>3)&0x6; /* Bit 10..11 */ + enc_bits->gain_index[0] |= ((*bitstreamPtr))&0x8; /* Bit 12 */ + enc_bits->gain_index[1] |= ((*bitstreamPtr))&0x4; /* Bit 13 */ + enc_bits->cb_index[3] |= ((*bitstreamPtr))&0x2; /* Bit 14 */ + enc_bits->cb_index[6] = ((*bitstreamPtr)<<7)&0x80; /* Bit 15 */ + bitstreamPtr++; + /* 9:th int16_t */ + enc_bits->cb_index[6] |= ((*bitstreamPtr)>>9)&0x7E; /* Bit 0..5 */ + enc_bits->cb_index[9] = ((*bitstreamPtr)>>2)&0xFE; /* Bit 6..12 */ + enc_bits->cb_index[12] = ((*bitstreamPtr)<<5)&0xE0; /* Bit 13..15 */ + bitstreamPtr++; + /* 10:th int16_t */ + enc_bits->cb_index[12] |= ((*bitstreamPtr)>>11)&0x1E;/* Bit 0..3 */ + enc_bits->gain_index[3] |= ((*bitstreamPtr)>>8)&0xC; /* Bit 4..5 */ + enc_bits->gain_index[4] |= ((*bitstreamPtr)>>7)&0x6; /* Bit 6..7 */ + enc_bits->gain_index[6] = ((*bitstreamPtr)>>3)&0x18; /* Bit 8..9 */ + enc_bits->gain_index[7] = ((*bitstreamPtr)>>2)&0xC; /* Bit 10..11 */ + enc_bits->gain_index[9] = ((*bitstreamPtr)<<1)&0x10; /* Bit 12 */ + enc_bits->gain_index[10] = ((*bitstreamPtr)<<1)&0x8; /* Bit 13 */ + enc_bits->gain_index[12] = ((*bitstreamPtr)<<3)&0x10; /* Bit 14 */ + enc_bits->gain_index[13] = ((*bitstreamPtr)<<3)&0x8; /* Bit 15 */ + } + bitstreamPtr++; + /* Class 3 bits of ULP */ + /* 8:th to 14:th int16_t for 20 ms case + 11:th to 17:th int16_t for 30 ms case */ + tmpPtr=enc_bits->idxVec; + for (k=0; k<7; k++) { + for (i=14; i>=0; i-=2) { + (*tmpPtr) |= ((*bitstreamPtr)>>i)&0x3; /* Bit 15-i..14-i*/ + tmpPtr++; + } + bitstreamPtr++; + } + + if (mode==20) { + /* 15:th int16_t */ + enc_bits->idxVec[56] |= ((*bitstreamPtr)>>14)&0x3; /* Bit 0..1 */ + enc_bits->cb_index[0] |= ((*bitstreamPtr)>>13)&0x1; /* Bit 2 */ + enc_bits->cb_index[1] = ((*bitstreamPtr)>>6)&0x7F; /* Bit 3..9 */ + enc_bits->cb_index[2] = ((*bitstreamPtr)<<1)&0x7E; /* Bit 10..15 */ + bitstreamPtr++; + /* 16:th int16_t */ + enc_bits->cb_index[2] |= ((*bitstreamPtr)>>15)&0x1; /* Bit 0 */ + enc_bits->gain_index[0] |= ((*bitstreamPtr)>>12)&0x7; /* Bit 1..3 */ + enc_bits->gain_index[1] |= ((*bitstreamPtr)>>10)&0x3; /* Bit 4..5 */ + enc_bits->gain_index[2] = ((*bitstreamPtr)>>7)&0x7; /* Bit 6..8 */ + enc_bits->cb_index[3] |= ((*bitstreamPtr)>>6)&0x1; /* Bit 9 */ + enc_bits->cb_index[4] = ((*bitstreamPtr)<<1)&0x7E; /* Bit 10..15 */ + bitstreamPtr++; + /* 17:th int16_t */ + enc_bits->cb_index[4] |= ((*bitstreamPtr)>>15)&0x1; /* Bit 0 */ + enc_bits->cb_index[5] = ((*bitstreamPtr)>>8)&0x7F; /* Bit 1..7 */ + enc_bits->cb_index[6] = ((*bitstreamPtr))&0xFF; /* Bit 8..15 */ + bitstreamPtr++; + /* 18:th int16_t */ + enc_bits->cb_index[7] = (*bitstreamPtr)>>8; /* Bit 0..7 */ + enc_bits->cb_index[8] = (*bitstreamPtr)&0xFF; /* Bit 8..15 */ + bitstreamPtr++; + /* 19:th int16_t */ + enc_bits->gain_index[3] |= ((*bitstreamPtr)>>14)&0x3; /* Bit 0..1 */ + enc_bits->gain_index[4] |= ((*bitstreamPtr)>>12)&0x3; /* Bit 2..3 */ + enc_bits->gain_index[5] = ((*bitstreamPtr)>>9)&0x7; /* Bit 4..6 */ + enc_bits->gain_index[6] |= ((*bitstreamPtr)>>6)&0x7; /* Bit 7..9 */ + enc_bits->gain_index[7] |= ((*bitstreamPtr)>>4)&0x3; /* Bit 10..11 */ + enc_bits->gain_index[8] = ((*bitstreamPtr)>>1)&0x7; /* Bit 12..14 */ + } else { /* mode==30 */ + /* 18:th int16_t */ + enc_bits->idxVec[56] |= ((*bitstreamPtr)>>14)&0x3; /* Bit 0..1 */ + enc_bits->idxVec[57] |= ((*bitstreamPtr)>>12)&0x3; /* Bit 2..3 */ + enc_bits->cb_index[0] |= ((*bitstreamPtr)>>11)&1; /* Bit 4 */ + enc_bits->cb_index[1] = ((*bitstreamPtr)>>4)&0x7F; /* Bit 5..11 */ + enc_bits->cb_index[2] = ((*bitstreamPtr)<<3)&0x78; /* Bit 12..15 */ + bitstreamPtr++; + /* 19:th int16_t */ + enc_bits->cb_index[2] |= ((*bitstreamPtr)>>13)&0x7; /* Bit 0..2 */ + enc_bits->gain_index[0] |= ((*bitstreamPtr)>>10)&0x7; /* Bit 3..5 */ + enc_bits->gain_index[1] |= ((*bitstreamPtr)>>8)&0x3; /* Bit 6..7 */ + enc_bits->gain_index[2] = ((*bitstreamPtr)>>5)&0x7; /* Bit 8..10 */ + enc_bits->cb_index[3] |= ((*bitstreamPtr)>>4)&0x1; /* Bit 11 */ + enc_bits->cb_index[4] = ((*bitstreamPtr)<<3)&0x78; /* Bit 12..15 */ + bitstreamPtr++; + /* 20:th int16_t */ + enc_bits->cb_index[4] |= ((*bitstreamPtr)>>13)&0x7; /* Bit 0..2 */ + enc_bits->cb_index[5] = ((*bitstreamPtr)>>6)&0x7F; /* Bit 3..9 */ + enc_bits->cb_index[6] |= ((*bitstreamPtr)>>5)&0x1; /* Bit 10 */ + enc_bits->cb_index[7] = ((*bitstreamPtr)<<3)&0xF8; /* Bit 11..15 */ + bitstreamPtr++; + /* 21:st int16_t */ + enc_bits->cb_index[7] |= ((*bitstreamPtr)>>13)&0x7; /* Bit 0..2 */ + enc_bits->cb_index[8] = ((*bitstreamPtr)>>5)&0xFF; /* Bit 3..10 */ + enc_bits->cb_index[9] |= ((*bitstreamPtr)>>4)&0x1; /* Bit 11 */ + enc_bits->cb_index[10] = ((*bitstreamPtr)<<4)&0xF0; /* Bit 12..15 */ + bitstreamPtr++; + /* 22:nd int16_t */ + enc_bits->cb_index[10] |= ((*bitstreamPtr)>>12)&0xF; /* Bit 0..3 */ + enc_bits->cb_index[11] = ((*bitstreamPtr)>>4)&0xFF; /* Bit 4..11 */ + enc_bits->cb_index[12] |= ((*bitstreamPtr)>>3)&0x1; /* Bit 12 */ + enc_bits->cb_index[13] = ((*bitstreamPtr)<<5)&0xE0; /* Bit 13..15 */ + bitstreamPtr++; + /* 23:rd int16_t */ + enc_bits->cb_index[13] |= ((*bitstreamPtr)>>11)&0x1F;/* Bit 0..4 */ + enc_bits->cb_index[14] = ((*bitstreamPtr)>>3)&0xFF; /* Bit 5..12 */ + enc_bits->gain_index[3] |= ((*bitstreamPtr)>>1)&0x3; /* Bit 13..14 */ + enc_bits->gain_index[4] |= ((*bitstreamPtr)&0x1); /* Bit 15 */ + bitstreamPtr++; + /* 24:rd int16_t */ + enc_bits->gain_index[5] = ((*bitstreamPtr)>>13)&0x7; /* Bit 0..2 */ + enc_bits->gain_index[6] |= ((*bitstreamPtr)>>10)&0x7; /* Bit 3..5 */ + enc_bits->gain_index[7] |= ((*bitstreamPtr)>>8)&0x3; /* Bit 6..7 */ + enc_bits->gain_index[8] = ((*bitstreamPtr)>>5)&0x7; /* Bit 8..10 */ + enc_bits->gain_index[9] |= ((*bitstreamPtr)>>1)&0xF; /* Bit 11..14 */ + enc_bits->gain_index[10] |= ((*bitstreamPtr)<<2)&0x4; /* Bit 15 */ + bitstreamPtr++; + /* 25:rd int16_t */ + enc_bits->gain_index[10] |= ((*bitstreamPtr)>>14)&0x3; /* Bit 0..1 */ + enc_bits->gain_index[11] = ((*bitstreamPtr)>>11)&0x7; /* Bit 2..4 */ + enc_bits->gain_index[12] |= ((*bitstreamPtr)>>7)&0xF; /* Bit 5..8 */ + enc_bits->gain_index[13] |= ((*bitstreamPtr)>>4)&0x7; /* Bit 9..11 */ + enc_bits->gain_index[14] = ((*bitstreamPtr)>>1)&0x7; /* Bit 12..14 */ + } + /* Last bit should be zero, otherwise it's an "empty" frame */ + if (((*bitstreamPtr)&0x1) == 1) { + return(1); + } else { + return(0); + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/unpack_bits.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/unpack_bits.h new file mode 100644 index 0000000000..1a63280e6b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/unpack_bits.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_UnpackBits.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_UNPACK_BITS_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_UNPACK_BITS_H_ + +#include <stdint.h> + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * unpacking of bits from bitstream, i.e., vector of bytes + *---------------------------------------------------------------*/ + +int16_t +WebRtcIlbcfix_UnpackBits(/* (o) "Empty" frame indicator */ + const uint16_t* + bitstream, /* (i) The packatized bitstream */ + iLBC_bits* + enc_bits, /* (o) Paramerers from bitstream */ + int16_t mode /* (i) Codec mode (20 or 30) */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq3.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq3.c new file mode 100644 index 0000000000..d9375fb995 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq3.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Vq3.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/vq3.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" + +/*----------------------------------------------------------------* + * vector quantization + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Vq3( + int16_t *Xq, /* quantized vector (Q13) */ + int16_t *index, + int16_t *CB, /* codebook in Q13 */ + int16_t *X, /* vector to quantize (Q13) */ + int16_t n_cb + ){ + int16_t i, j; + int16_t pos, minindex=0; + int16_t tmp; + int32_t dist, mindist; + + pos = 0; + mindist = WEBRTC_SPL_WORD32_MAX; /* start value */ + + /* Find the codebook with the lowest square distance */ + for (j = 0; j < n_cb; j++) { + tmp = X[0] - CB[pos]; + dist = tmp * tmp; + for (i = 1; i < 3; i++) { + tmp = X[i] - CB[pos + i]; + dist += tmp * tmp; + } + + if (dist < mindist) { + mindist = dist; + minindex = j; + } + pos += 3; + } + + /* Store the quantized codebook and the index */ + for (i = 0; i < 3; i++) { + Xq[i] = CB[minindex*3 + i]; + } + *index = minindex; + +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq3.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq3.h new file mode 100644 index 0000000000..c946478a1a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq3.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Vq3.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_VQ3_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_VQ3_H_ + +#include <stdint.h> + +/*----------------------------------------------------------------* + * Vector quantization of order 3 (based on MSE) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Vq3( + int16_t* Xq, /* (o) the quantized vector (Q13) */ + int16_t* index, /* (o) the quantization index */ + int16_t* CB, /* (i) the vector quantization codebook (Q13) */ + int16_t* X, /* (i) the vector to quantize (Q13) */ + int16_t n_cb /* (i) the number of vectors in the codebook */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq4.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq4.c new file mode 100644 index 0000000000..c9a65aec2a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq4.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Vq4.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/vq4.h" + +#include "modules/audio_coding/codecs/ilbc/constants.h" + +/*----------------------------------------------------------------* + * vector quantization + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Vq4( + int16_t *Xq, /* quantized vector (Q13) */ + int16_t *index, + int16_t *CB, /* codebook in Q13 */ + int16_t *X, /* vector to quantize (Q13) */ + int16_t n_cb + ){ + int16_t i, j; + int16_t pos, minindex=0; + int16_t tmp; + int32_t dist, mindist; + + pos = 0; + mindist = WEBRTC_SPL_WORD32_MAX; /* start value */ + + /* Find the codebook with the lowest square distance */ + for (j = 0; j < n_cb; j++) { + tmp = X[0] - CB[pos]; + dist = tmp * tmp; + for (i = 1; i < 4; i++) { + tmp = X[i] - CB[pos + i]; + dist += tmp * tmp; + } + + if (dist < mindist) { + mindist = dist; + minindex = j; + } + pos += 4; + } + + /* Store the quantized codebook and the index */ + for (i = 0; i < 4; i++) { + Xq[i] = CB[minindex*4 + i]; + } + *index = minindex; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq4.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq4.h new file mode 100644 index 0000000000..6d14830c03 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/vq4.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Vq4.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_VQ4_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_VQ4_H_ + +#include <stdint.h> + +/*----------------------------------------------------------------* + * Vector quantization of order 4 (based on MSE) + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Vq4( + int16_t* Xq, /* (o) the quantized vector (Q13) */ + int16_t* index, /* (o) the quantization index */ + int16_t* CB, /* (i) the vector quantization codebook (Q13) */ + int16_t* X, /* (i) the vector to quantize (Q13) */ + int16_t n_cb /* (i) the number of vectors in the codebook */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/window32_w32.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/window32_w32.c new file mode 100644 index 0000000000..e82d167220 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/window32_w32.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Window32W32.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/window32_w32.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * window multiplication + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Window32W32( + int32_t *z, /* Output */ + int32_t *x, /* Input (same domain as Output)*/ + const int32_t *y, /* Q31 Window */ + size_t N /* length to process */ + ) { + size_t i; + int16_t x_low, x_hi, y_low, y_hi; + int16_t left_shifts; + int32_t temp; + + left_shifts = (int16_t)WebRtcSpl_NormW32(x[0]); + WebRtcSpl_VectorBitShiftW32(x, N, x, (int16_t)(-left_shifts)); + + + /* The double precision numbers use a special representation: + * w32 = hi<<16 + lo<<1 + */ + for (i = 0; i < N; i++) { + /* Extract higher bytes */ + x_hi = (int16_t)(x[i] >> 16); + y_hi = (int16_t)(y[i] >> 16); + + /* Extract lower bytes, defined as (w32 - hi<<16)>>1 */ + x_low = (int16_t)((x[i] - (x_hi << 16)) >> 1); + + y_low = (int16_t)((y[i] - (y_hi << 16)) >> 1); + + /* Calculate z by a 32 bit multiplication using both low and high from x and y */ + temp = ((x_hi * y_hi) << 1) + ((x_hi * y_low) >> 14); + + z[i] = temp + ((x_low * y_hi) >> 14); + } + + WebRtcSpl_VectorBitShiftW32(z, N, z, left_shifts); + + return; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/window32_w32.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/window32_w32.h new file mode 100644 index 0000000000..15d72c5ba2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/window32_w32.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_Window32W32.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_WINDOW32_W32_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_WINDOW32_W32_H_ + +#include <stddef.h> +#include <stdint.h> + +/*----------------------------------------------------------------* + * window multiplication + *---------------------------------------------------------------*/ + +void WebRtcIlbcfix_Window32W32(int32_t* z, /* Output */ + int32_t* x, /* Input (same domain as Output)*/ + const int32_t* y, /* Q31 Window */ + size_t N /* length to process */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/xcorr_coef.c b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/xcorr_coef.c new file mode 100644 index 0000000000..9dc880b37e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/xcorr_coef.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_XcorrCoef.c + +******************************************************************/ + +#include "modules/audio_coding/codecs/ilbc/xcorr_coef.h" + +#include "modules/audio_coding/codecs/ilbc/defines.h" + +/*----------------------------------------------------------------* + * cross correlation which finds the optimal lag for the + * crossCorr*crossCorr/(energy) criteria + *---------------------------------------------------------------*/ + +size_t WebRtcIlbcfix_XcorrCoef( + int16_t *target, /* (i) first array */ + int16_t *regressor, /* (i) second array */ + size_t subl, /* (i) dimension arrays */ + size_t searchLen, /* (i) the search lenght */ + size_t offset, /* (i) samples offset between arrays */ + int16_t step /* (i) +1 or -1 */ + ){ + size_t k; + size_t maxlag; + int16_t pos; + int16_t max; + int16_t crossCorrScale, Energyscale; + int16_t crossCorrSqMod, crossCorrSqMod_Max; + int32_t crossCorr, Energy; + int16_t crossCorrmod, EnergyMod, EnergyMod_Max; + int16_t *tp, *rp; + int16_t *rp_beg, *rp_end; + int16_t totscale, totscale_max; + int16_t scalediff; + int32_t newCrit, maxCrit; + int shifts; + + /* Initializations, to make sure that the first one is selected */ + crossCorrSqMod_Max=0; + EnergyMod_Max=WEBRTC_SPL_WORD16_MAX; + totscale_max=-500; + maxlag=0; + pos=0; + + /* Find scale value and start position */ + if (step==1) { + max=WebRtcSpl_MaxAbsValueW16(regressor, subl + searchLen - 1); + rp_beg = regressor; + rp_end = regressor + subl; + } else { /* step==-1 */ + max = WebRtcSpl_MaxAbsValueW16(regressor - searchLen, subl + searchLen - 1); + rp_beg = regressor - 1; + rp_end = regressor + subl - 1; + } + + /* Introduce a scale factor on the Energy in int32_t in + order to make sure that the calculation does not + overflow */ + + if (max>5000) { + shifts=2; + } else { + shifts=0; + } + + /* Calculate the first energy, then do a +/- to get the other energies */ + Energy=WebRtcSpl_DotProductWithScale(regressor, regressor, subl, shifts); + + for (k=0;k<searchLen;k++) { + tp = target; + rp = ®ressor[pos]; + + crossCorr=WebRtcSpl_DotProductWithScale(tp, rp, subl, shifts); + + if ((Energy>0)&&(crossCorr>0)) { + + /* Put cross correlation and energy on 16 bit word */ + crossCorrScale=(int16_t)WebRtcSpl_NormW32(crossCorr)-16; + crossCorrmod=(int16_t)WEBRTC_SPL_SHIFT_W32(crossCorr, crossCorrScale); + Energyscale=(int16_t)WebRtcSpl_NormW32(Energy)-16; + EnergyMod=(int16_t)WEBRTC_SPL_SHIFT_W32(Energy, Energyscale); + + /* Square cross correlation and store upper int16_t */ + crossCorrSqMod = (int16_t)((crossCorrmod * crossCorrmod) >> 16); + + /* Calculate the total number of (dynamic) right shifts that have + been performed on (crossCorr*crossCorr)/energy + */ + totscale=Energyscale-(crossCorrScale<<1); + + /* Calculate the shift difference in order to be able to compare the two + (crossCorr*crossCorr)/energy in the same domain + */ + scalediff=totscale-totscale_max; + scalediff=WEBRTC_SPL_MIN(scalediff,31); + scalediff=WEBRTC_SPL_MAX(scalediff,-31); + + /* Compute the cross multiplication between the old best criteria + and the new one to be able to compare them without using a + division */ + + if (scalediff<0) { + newCrit = ((int32_t)crossCorrSqMod*EnergyMod_Max)>>(-scalediff); + maxCrit = ((int32_t)crossCorrSqMod_Max*EnergyMod); + } else { + newCrit = ((int32_t)crossCorrSqMod*EnergyMod_Max); + maxCrit = ((int32_t)crossCorrSqMod_Max*EnergyMod)>>scalediff; + } + + /* Store the new lag value if the new criteria is larger + than previous largest criteria */ + + if (newCrit > maxCrit) { + crossCorrSqMod_Max = crossCorrSqMod; + EnergyMod_Max = EnergyMod; + totscale_max = totscale; + maxlag = k; + } + } + pos+=step; + + /* Do a +/- to get the next energy */ + Energy += step * ((*rp_end * *rp_end - *rp_beg * *rp_beg) >> shifts); + rp_beg+=step; + rp_end+=step; + } + + return(maxlag+offset); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/xcorr_coef.h b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/xcorr_coef.h new file mode 100644 index 0000000000..3be5a296b5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/ilbc/xcorr_coef.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/****************************************************************** + + iLBC Speech Coder ANSI-C Source Code + + WebRtcIlbcfix_XcorrCoef.h + +******************************************************************/ + +#ifndef MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_XCORR_COEF_H_ +#define MODULES_AUDIO_CODING_CODECS_ILBC_MAIN_SOURCE_XCORR_COEF_H_ + +#include <stddef.h> +#include <stdint.h> + +/*----------------------------------------------------------------* + * cross correlation which finds the optimal lag for the + * crossCorr*crossCorr/(energy) criteria + *---------------------------------------------------------------*/ + +size_t WebRtcIlbcfix_XcorrCoef( + int16_t* target, /* (i) first array */ + int16_t* regressor, /* (i) second array */ + size_t subl, /* (i) dimension arrays */ + size_t searchLen, /* (i) the search lenght */ + size_t offset, /* (i) samples offset between arrays */ + int16_t step /* (i) +1 or -1 */ + ); + +#endif diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/bandwidth_info.h b/third_party/libwebrtc/modules/audio_coding/codecs/isac/bandwidth_info.h new file mode 100644 index 0000000000..c3830a5f7c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/bandwidth_info.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_BANDWIDTH_INFO_H_ +#define MODULES_AUDIO_CODING_CODECS_ISAC_BANDWIDTH_INFO_H_ + +#include <stdint.h> + +typedef struct { + int in_use; + int32_t send_bw_avg; + int32_t send_max_delay_avg; + int16_t bottleneck_idx; + int16_t jitter_info; +} IsacBandwidthInfo; + +#endif // MODULES_AUDIO_CODING_CODECS_ISAC_BANDWIDTH_INFO_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/filter_functions.c b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/filter_functions.c new file mode 100644 index 0000000000..a4f297c5a1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/filter_functions.c @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <memory.h> +#include <string.h> +#ifdef WEBRTC_ANDROID +#include <stdlib.h> +#endif + +#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h" +#include "modules/audio_coding/codecs/isac/main/source/isac_vad.h" + +static void WebRtcIsac_AllPoleFilter(double* InOut, + double* Coef, + size_t lengthInOut, + int orderCoef) { + /* the state of filter is assumed to be in InOut[-1] to InOut[-orderCoef] */ + double scal; + double sum; + size_t n; + int k; + + //if (fabs(Coef[0]-1.0)<0.001) { + if ( (Coef[0] > 0.9999) && (Coef[0] < 1.0001) ) + { + for(n = 0; n < lengthInOut; n++) + { + sum = Coef[1] * InOut[-1]; + for(k = 2; k <= orderCoef; k++){ + sum += Coef[k] * InOut[-k]; + } + *InOut++ -= sum; + } + } + else + { + scal = 1.0 / Coef[0]; + for(n=0;n<lengthInOut;n++) + { + *InOut *= scal; + for(k=1;k<=orderCoef;k++){ + *InOut -= scal*Coef[k]*InOut[-k]; + } + InOut++; + } + } +} + +static void WebRtcIsac_AllZeroFilter(double* In, + double* Coef, + size_t lengthInOut, + int orderCoef, + double* Out) { + /* the state of filter is assumed to be in In[-1] to In[-orderCoef] */ + + size_t n; + int k; + double tmp; + + for(n = 0; n < lengthInOut; n++) + { + tmp = In[0] * Coef[0]; + + for(k = 1; k <= orderCoef; k++){ + tmp += Coef[k] * In[-k]; + } + + *Out++ = tmp; + In++; + } +} + +static void WebRtcIsac_ZeroPoleFilter(double* In, + double* ZeroCoef, + double* PoleCoef, + size_t lengthInOut, + int orderCoef, + double* Out) { + /* the state of the zero section is assumed to be in In[-1] to In[-orderCoef] */ + /* the state of the pole section is assumed to be in Out[-1] to Out[-orderCoef] */ + + WebRtcIsac_AllZeroFilter(In,ZeroCoef,lengthInOut,orderCoef,Out); + WebRtcIsac_AllPoleFilter(Out,PoleCoef,lengthInOut,orderCoef); +} + + +void WebRtcIsac_AutoCorr(double* r, const double* x, size_t N, size_t order) { + size_t lag, n; + double sum, prod; + const double *x_lag; + + for (lag = 0; lag <= order; lag++) + { + sum = 0.0f; + x_lag = &x[lag]; + prod = x[0] * x_lag[0]; + for (n = 1; n < N - lag; n++) { + sum += prod; + prod = x[n] * x_lag[n]; + } + sum += prod; + r[lag] = sum; + } + +} + +static void WebRtcIsac_BwExpand(double* out, + double* in, + double coef, + size_t length) { + size_t i; + double chirp; + + chirp = coef; + + out[0] = in[0]; + for (i = 1; i < length; i++) { + out[i] = chirp * in[i]; + chirp *= coef; + } +} + +void WebRtcIsac_WeightingFilter(const double* in, + double* weiout, + double* whiout, + WeightFiltstr* wfdata) { + double tmpbuffer[PITCH_FRAME_LEN + PITCH_WLPCBUFLEN]; + double corr[PITCH_WLPCORDER+1], rc[PITCH_WLPCORDER+1]; + double apol[PITCH_WLPCORDER+1], apolr[PITCH_WLPCORDER+1]; + double rho=0.9, *inp, *dp, *dp2; + double whoutbuf[PITCH_WLPCBUFLEN + PITCH_WLPCORDER]; + double weoutbuf[PITCH_WLPCBUFLEN + PITCH_WLPCORDER]; + double *weo, *who, opol[PITCH_WLPCORDER+1], ext[PITCH_WLPCWINLEN]; + int k, n, endpos, start; + + /* Set up buffer and states */ + memcpy(tmpbuffer, wfdata->buffer, sizeof(double) * PITCH_WLPCBUFLEN); + memcpy(tmpbuffer+PITCH_WLPCBUFLEN, in, sizeof(double) * PITCH_FRAME_LEN); + memcpy(wfdata->buffer, tmpbuffer+PITCH_FRAME_LEN, sizeof(double) * PITCH_WLPCBUFLEN); + + dp=weoutbuf; + dp2=whoutbuf; + for (k=0;k<PITCH_WLPCORDER;k++) { + *dp++ = wfdata->weostate[k]; + *dp2++ = wfdata->whostate[k]; + opol[k]=0.0; + } + opol[0]=1.0; + opol[PITCH_WLPCORDER]=0.0; + weo=dp; + who=dp2; + + endpos=PITCH_WLPCBUFLEN + PITCH_SUBFRAME_LEN; + inp=tmpbuffer + PITCH_WLPCBUFLEN; + + for (n=0; n<PITCH_SUBFRAMES; n++) { + /* Windowing */ + start=endpos-PITCH_WLPCWINLEN; + for (k=0; k<PITCH_WLPCWINLEN; k++) { + ext[k]=wfdata->window[k]*tmpbuffer[start+k]; + } + + /* Get LPC polynomial */ + WebRtcIsac_AutoCorr(corr, ext, PITCH_WLPCWINLEN, PITCH_WLPCORDER); + corr[0]=1.01*corr[0]+1.0; /* White noise correction */ + WebRtcIsac_LevDurb(apol, rc, corr, PITCH_WLPCORDER); + WebRtcIsac_BwExpand(apolr, apol, rho, PITCH_WLPCORDER+1); + + /* Filtering */ + WebRtcIsac_ZeroPoleFilter(inp, apol, apolr, PITCH_SUBFRAME_LEN, PITCH_WLPCORDER, weo); + WebRtcIsac_ZeroPoleFilter(inp, apolr, opol, PITCH_SUBFRAME_LEN, PITCH_WLPCORDER, who); + + inp+=PITCH_SUBFRAME_LEN; + endpos+=PITCH_SUBFRAME_LEN; + weo+=PITCH_SUBFRAME_LEN; + who+=PITCH_SUBFRAME_LEN; + } + + /* Export filter states */ + for (k=0;k<PITCH_WLPCORDER;k++) { + wfdata->weostate[k]=weoutbuf[PITCH_FRAME_LEN+k]; + wfdata->whostate[k]=whoutbuf[PITCH_FRAME_LEN+k]; + } + + /* Export output data */ + memcpy(weiout, weoutbuf+PITCH_WLPCORDER, sizeof(double) * PITCH_FRAME_LEN); + memcpy(whiout, whoutbuf+PITCH_WLPCORDER, sizeof(double) * PITCH_FRAME_LEN); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/filter_functions.h b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/filter_functions.h new file mode 100644 index 0000000000..a747a7f549 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/filter_functions.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_FILTER_FUNCTIONS_H_ +#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_FILTER_FUNCTIONS_H_ + +#include <stddef.h> + +#include "modules/audio_coding/codecs/isac/main/source/structs.h" + +void WebRtcIsac_AutoCorr(double* r, const double* x, size_t N, size_t order); + +void WebRtcIsac_WeightingFilter(const double* in, + double* weiout, + double* whiout, + WeightFiltstr* wfdata); + +#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_FILTER_FUNCTIONS_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/isac_vad.c b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/isac_vad.c new file mode 100644 index 0000000000..57cf0c39da --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/isac_vad.c @@ -0,0 +1,409 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/isac/main/source/isac_vad.h" + +#include <math.h> + +void WebRtcIsac_InitPitchFilter(PitchFiltstr* pitchfiltdata) { + int k; + + for (k = 0; k < PITCH_BUFFSIZE; k++) { + pitchfiltdata->ubuf[k] = 0.0; + } + pitchfiltdata->ystate[0] = 0.0; + for (k = 1; k < (PITCH_DAMPORDER); k++) { + pitchfiltdata->ystate[k] = 0.0; + } + pitchfiltdata->oldlagp[0] = 50.0; + pitchfiltdata->oldgainp[0] = 0.0; +} + +static void WebRtcIsac_InitWeightingFilter(WeightFiltstr* wfdata) { + int k; + double t, dtmp, dtmp2, denum, denum2; + + for (k = 0; k < PITCH_WLPCBUFLEN; k++) + wfdata->buffer[k] = 0.0; + + for (k = 0; k < PITCH_WLPCORDER; k++) { + wfdata->istate[k] = 0.0; + wfdata->weostate[k] = 0.0; + wfdata->whostate[k] = 0.0; + } + + /* next part should be in Matlab, writing to a global table */ + t = 0.5; + denum = 1.0 / ((double)PITCH_WLPCWINLEN); + denum2 = denum * denum; + for (k = 0; k < PITCH_WLPCWINLEN; k++) { + dtmp = PITCH_WLPCASYM * t * denum + (1 - PITCH_WLPCASYM) * t * t * denum2; + dtmp *= 3.14159265; + dtmp2 = sin(dtmp); + wfdata->window[k] = dtmp2 * dtmp2; + t++; + } +} + +void WebRtcIsac_InitPitchAnalysis(PitchAnalysisStruct* State) { + int k; + + for (k = 0; k < PITCH_CORR_LEN2 + PITCH_CORR_STEP2 + PITCH_MAX_LAG / 2 - + PITCH_FRAME_LEN / 2 + 2; + k++) + State->dec_buffer[k] = 0.0; + for (k = 0; k < 2 * ALLPASSSECTIONS + 1; k++) + State->decimator_state[k] = 0.0; + for (k = 0; k < 2; k++) + State->hp_state[k] = 0.0; + for (k = 0; k < QLOOKAHEAD; k++) + State->whitened_buf[k] = 0.0; + for (k = 0; k < QLOOKAHEAD; k++) + State->inbuf[k] = 0.0; + + WebRtcIsac_InitPitchFilter(&(State->PFstr_wght)); + + WebRtcIsac_InitPitchFilter(&(State->PFstr)); + + WebRtcIsac_InitWeightingFilter(&(State->Wghtstr)); +} + +void WebRtcIsac_InitPreFilterbank(PreFiltBankstr* prefiltdata) { + int k; + + for (k = 0; k < QLOOKAHEAD; k++) { + prefiltdata->INLABUF1[k] = 0; + prefiltdata->INLABUF2[k] = 0; + + prefiltdata->INLABUF1_float[k] = 0; + prefiltdata->INLABUF2_float[k] = 0; + } + for (k = 0; k < 2 * (QORDER - 1); k++) { + prefiltdata->INSTAT1[k] = 0; + prefiltdata->INSTAT2[k] = 0; + prefiltdata->INSTATLA1[k] = 0; + prefiltdata->INSTATLA2[k] = 0; + + prefiltdata->INSTAT1_float[k] = 0; + prefiltdata->INSTAT2_float[k] = 0; + prefiltdata->INSTATLA1_float[k] = 0; + prefiltdata->INSTATLA2_float[k] = 0; + } + + /* High pass filter states */ + prefiltdata->HPstates[0] = 0.0; + prefiltdata->HPstates[1] = 0.0; + + prefiltdata->HPstates_float[0] = 0.0f; + prefiltdata->HPstates_float[1] = 0.0f; + + return; +} + +double WebRtcIsac_LevDurb(double* a, double* k, double* r, size_t order) { + const double LEVINSON_EPS = 1.0e-10; + + double sum, alpha; + size_t m, m_h, i; + alpha = 0; // warning -DH + a[0] = 1.0; + if (r[0] < LEVINSON_EPS) { /* if r[0] <= 0, set LPC coeff. to zero */ + for (i = 0; i < order; i++) { + k[i] = 0; + a[i + 1] = 0; + } + } else { + a[1] = k[0] = -r[1] / r[0]; + alpha = r[0] + r[1] * k[0]; + for (m = 1; m < order; m++) { + sum = r[m + 1]; + for (i = 0; i < m; i++) { + sum += a[i + 1] * r[m - i]; + } + k[m] = -sum / alpha; + alpha += k[m] * sum; + m_h = (m + 1) >> 1; + for (i = 0; i < m_h; i++) { + sum = a[i + 1] + k[m] * a[m - i]; + a[m - i] += k[m] * a[i + 1]; + a[i + 1] = sum; + } + a[m + 1] = k[m]; + } + } + return alpha; +} + +/* The upper channel all-pass filter factors */ +const float WebRtcIsac_kUpperApFactorsFloat[2] = {0.03470000000000f, + 0.38260000000000f}; + +/* The lower channel all-pass filter factors */ +const float WebRtcIsac_kLowerApFactorsFloat[2] = {0.15440000000000f, + 0.74400000000000f}; + +/* This function performs all-pass filtering--a series of first order all-pass + * sections are used to filter the input in a cascade manner. + * The input is overwritten!! + */ +void WebRtcIsac_AllPassFilter2Float(float* InOut, + const float* APSectionFactors, + int lengthInOut, + int NumberOfSections, + float* FilterState) { + int n, j; + float temp; + for (j = 0; j < NumberOfSections; j++) { + for (n = 0; n < lengthInOut; n++) { + temp = FilterState[j] + APSectionFactors[j] * InOut[n]; + FilterState[j] = -APSectionFactors[j] * temp + InOut[n]; + InOut[n] = temp; + } + } +} + +/* The number of composite all-pass filter factors */ +#define NUMBEROFCOMPOSITEAPSECTIONS 4 + +/* Function WebRtcIsac_SplitAndFilter + * This function creates low-pass and high-pass decimated versions of part of + the input signal, and part of the signal in the input 'lookahead buffer'. + + INPUTS: + in: a length FRAMESAMPLES array of input samples + prefiltdata: input data structure containing the filterbank states + and lookahead samples from the previous encoding + iteration. + OUTPUTS: + LP: a FRAMESAMPLES_HALF array of low-pass filtered samples that + have been phase equalized. The first QLOOKAHEAD samples are + based on the samples in the two prefiltdata->INLABUFx arrays + each of length QLOOKAHEAD. + The remaining FRAMESAMPLES_HALF-QLOOKAHEAD samples are based + on the first FRAMESAMPLES_HALF-QLOOKAHEAD samples of the input + array in[]. + HP: a FRAMESAMPLES_HALF array of high-pass filtered samples that + have been phase equalized. The first QLOOKAHEAD samples are + based on the samples in the two prefiltdata->INLABUFx arrays + each of length QLOOKAHEAD. + The remaining FRAMESAMPLES_HALF-QLOOKAHEAD samples are based + on the first FRAMESAMPLES_HALF-QLOOKAHEAD samples of the input + array in[]. + + LP_la: a FRAMESAMPLES_HALF array of low-pass filtered samples. + These samples are not phase equalized. They are computed + from the samples in the in[] array. + HP_la: a FRAMESAMPLES_HALF array of high-pass filtered samples + that are not phase equalized. They are computed from + the in[] vector. + prefiltdata: this input data structure's filterbank state and + lookahead sample buffers are updated for the next + encoding iteration. +*/ +void WebRtcIsac_SplitAndFilterFloat(float* pin, + float* LP, + float* HP, + double* LP_la, + double* HP_la, + PreFiltBankstr* prefiltdata) { + int k, n; + float CompositeAPFilterState[NUMBEROFCOMPOSITEAPSECTIONS]; + float ForTransform_CompositeAPFilterState[NUMBEROFCOMPOSITEAPSECTIONS]; + float ForTransform_CompositeAPFilterState2[NUMBEROFCOMPOSITEAPSECTIONS]; + float tempinoutvec[FRAMESAMPLES + MAX_AR_MODEL_ORDER]; + float tempin_ch1[FRAMESAMPLES + MAX_AR_MODEL_ORDER]; + float tempin_ch2[FRAMESAMPLES + MAX_AR_MODEL_ORDER]; + float in[FRAMESAMPLES]; + float ftmp; + + /* HPstcoeff_in = {a1, a2, b1 - b0 * a1, b2 - b0 * a2}; */ + static const float kHpStCoefInFloat[4] = { + -1.94895953203325f, 0.94984516000000f, -0.05101826139794f, + 0.05015484000000f}; + + /* The composite all-pass filter factors */ + static const float WebRtcIsac_kCompositeApFactorsFloat[4] = { + 0.03470000000000f, 0.15440000000000f, 0.38260000000000f, + 0.74400000000000f}; + + // The matrix for transforming the backward composite state to upper channel + // state. + static const float WebRtcIsac_kTransform1Float[8] = { + -0.00158678506084f, 0.00127157815343f, -0.00104805672709f, + 0.00084837248079f, 0.00134467983258f, -0.00107756549387f, + 0.00088814793277f, -0.00071893072525f}; + + // The matrix for transforming the backward composite state to lower channel + // state. + static const float WebRtcIsac_kTransform2Float[8] = { + -0.00170686041697f, 0.00136780109829f, -0.00112736532350f, + 0.00091257055385f, 0.00103094281812f, -0.00082615076557f, + 0.00068092756088f, -0.00055119165484f}; + + /* High pass filter */ + + for (k = 0; k < FRAMESAMPLES; k++) { + in[k] = pin[k] + kHpStCoefInFloat[2] * prefiltdata->HPstates_float[0] + + kHpStCoefInFloat[3] * prefiltdata->HPstates_float[1]; + ftmp = pin[k] - kHpStCoefInFloat[0] * prefiltdata->HPstates_float[0] - + kHpStCoefInFloat[1] * prefiltdata->HPstates_float[1]; + prefiltdata->HPstates_float[1] = prefiltdata->HPstates_float[0]; + prefiltdata->HPstates_float[0] = ftmp; + } + + /* First Channel */ + + /*initial state of composite filter is zero */ + for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) { + CompositeAPFilterState[k] = 0.0; + } + /* put every other sample of input into a temporary vector in reverse + * (backward) order*/ + for (k = 0; k < FRAMESAMPLES_HALF; k++) { + tempinoutvec[k] = in[FRAMESAMPLES - 1 - 2 * k]; + } + + /* now all-pass filter the backwards vector. Output values overwrite the + * input vector. */ + WebRtcIsac_AllPassFilter2Float( + tempinoutvec, WebRtcIsac_kCompositeApFactorsFloat, FRAMESAMPLES_HALF, + NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState); + + /* save the backwards filtered output for later forward filtering, + but write it in forward order*/ + for (k = 0; k < FRAMESAMPLES_HALF; k++) { + tempin_ch1[FRAMESAMPLES_HALF + QLOOKAHEAD - 1 - k] = tempinoutvec[k]; + } + + /* save the backwards filter state becaue it will be transformed + later into a forward state */ + for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) { + ForTransform_CompositeAPFilterState[k] = CompositeAPFilterState[k]; + } + + /* now backwards filter the samples in the lookahead buffer. The samples were + placed there in the encoding of the previous frame. The output samples + overwrite the input samples */ + WebRtcIsac_AllPassFilter2Float( + prefiltdata->INLABUF1_float, WebRtcIsac_kCompositeApFactorsFloat, + QLOOKAHEAD, NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState); + + /* save the output, but write it in forward order */ + /* write the lookahead samples for the next encoding iteration. Every other + sample at the end of the input frame is written in reverse order for the + lookahead length. Exported in the prefiltdata structure. */ + for (k = 0; k < QLOOKAHEAD; k++) { + tempin_ch1[QLOOKAHEAD - 1 - k] = prefiltdata->INLABUF1_float[k]; + prefiltdata->INLABUF1_float[k] = in[FRAMESAMPLES - 1 - 2 * k]; + } + + /* Second Channel. This is exactly like the first channel, except that the + even samples are now filtered instead (lower channel). */ + for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) { + CompositeAPFilterState[k] = 0.0; + } + + for (k = 0; k < FRAMESAMPLES_HALF; k++) { + tempinoutvec[k] = in[FRAMESAMPLES - 2 - 2 * k]; + } + + WebRtcIsac_AllPassFilter2Float( + tempinoutvec, WebRtcIsac_kCompositeApFactorsFloat, FRAMESAMPLES_HALF, + NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState); + + for (k = 0; k < FRAMESAMPLES_HALF; k++) { + tempin_ch2[FRAMESAMPLES_HALF + QLOOKAHEAD - 1 - k] = tempinoutvec[k]; + } + + for (k = 0; k < NUMBEROFCOMPOSITEAPSECTIONS; k++) { + ForTransform_CompositeAPFilterState2[k] = CompositeAPFilterState[k]; + } + + WebRtcIsac_AllPassFilter2Float( + prefiltdata->INLABUF2_float, WebRtcIsac_kCompositeApFactorsFloat, + QLOOKAHEAD, NUMBEROFCOMPOSITEAPSECTIONS, CompositeAPFilterState); + + for (k = 0; k < QLOOKAHEAD; k++) { + tempin_ch2[QLOOKAHEAD - 1 - k] = prefiltdata->INLABUF2_float[k]; + prefiltdata->INLABUF2_float[k] = in[FRAMESAMPLES - 2 - 2 * k]; + } + + /* Transform filter states from backward to forward */ + /*At this point, each of the states of the backwards composite filters for the + two channels are transformed into forward filtering states for the + corresponding forward channel filters. Each channel's forward filtering + state from the previous + encoding iteration is added to the transformed state to get a proper forward + state */ + + /* So the existing NUMBEROFCOMPOSITEAPSECTIONS x 1 (4x1) state vector is + multiplied by a NUMBEROFCHANNELAPSECTIONSxNUMBEROFCOMPOSITEAPSECTIONS (2x4) + transform matrix to get the new state that is added to the previous 2x1 + input state */ + + for (k = 0; k < NUMBEROFCHANNELAPSECTIONS; k++) { /* k is row variable */ + for (n = 0; n < NUMBEROFCOMPOSITEAPSECTIONS; + n++) { /* n is column variable */ + prefiltdata->INSTAT1_float[k] += + ForTransform_CompositeAPFilterState[n] * + WebRtcIsac_kTransform1Float[k * NUMBEROFCHANNELAPSECTIONS + n]; + prefiltdata->INSTAT2_float[k] += + ForTransform_CompositeAPFilterState2[n] * + WebRtcIsac_kTransform2Float[k * NUMBEROFCHANNELAPSECTIONS + n]; + } + } + + /*obtain polyphase components by forward all-pass filtering through each + * channel */ + /* the backward filtered samples are now forward filtered with the + * corresponding channel filters */ + /* The all pass filtering automatically updates the filter states which are + exported in the prefiltdata structure */ + WebRtcIsac_AllPassFilter2Float(tempin_ch1, WebRtcIsac_kUpperApFactorsFloat, + FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS, + prefiltdata->INSTAT1_float); + WebRtcIsac_AllPassFilter2Float(tempin_ch2, WebRtcIsac_kLowerApFactorsFloat, + FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS, + prefiltdata->INSTAT2_float); + + /* Now Construct low-pass and high-pass signals as combinations of polyphase + * components */ + for (k = 0; k < FRAMESAMPLES_HALF; k++) { + LP[k] = 0.5f * (tempin_ch1[k] + tempin_ch2[k]); /* low pass signal*/ + HP[k] = 0.5f * (tempin_ch1[k] - tempin_ch2[k]); /* high pass signal*/ + } + + /* Lookahead LP and HP signals */ + /* now create low pass and high pass signals of the input vector. However, no + backwards filtering is performed, and hence no phase equalization is + involved. Also, the input contains some samples that are lookahead samples. + The high pass and low pass signals that are created are used outside this + function for analysis (not encoding) purposes */ + + /* set up input */ + for (k = 0; k < FRAMESAMPLES_HALF; k++) { + tempin_ch1[k] = in[2 * k + 1]; + tempin_ch2[k] = in[2 * k]; + } + + /* the input filter states are passed in and updated by the all-pass filtering + routine and exported in the prefiltdata structure*/ + WebRtcIsac_AllPassFilter2Float(tempin_ch1, WebRtcIsac_kUpperApFactorsFloat, + FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS, + prefiltdata->INSTATLA1_float); + WebRtcIsac_AllPassFilter2Float(tempin_ch2, WebRtcIsac_kLowerApFactorsFloat, + FRAMESAMPLES_HALF, NUMBEROFCHANNELAPSECTIONS, + prefiltdata->INSTATLA2_float); + + for (k = 0; k < FRAMESAMPLES_HALF; k++) { + LP_la[k] = (float)(0.5f * (tempin_ch1[k] + tempin_ch2[k])); /*low pass */ + HP_la[k] = (double)(0.5f * (tempin_ch1[k] - tempin_ch2[k])); /* high pass */ + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/isac_vad.h b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/isac_vad.h new file mode 100644 index 0000000000..1aecfc4046 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/isac_vad.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ISAC_VAD_H_ +#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ISAC_VAD_H_ + +#include <stddef.h> + +#include "modules/audio_coding/codecs/isac/main/source/structs.h" + +void WebRtcIsac_InitPitchFilter(PitchFiltstr* pitchfiltdata); +void WebRtcIsac_InitPitchAnalysis(PitchAnalysisStruct* state); +void WebRtcIsac_InitPreFilterbank(PreFiltBankstr* prefiltdata); + +double WebRtcIsac_LevDurb(double* a, double* k, double* r, size_t order); + +/* The number of all-pass filter factors in an upper or lower channel*/ +#define NUMBEROFCHANNELAPSECTIONS 2 + +/* The upper channel all-pass filter factors */ +extern const float WebRtcIsac_kUpperApFactorsFloat[2]; + +/* The lower channel all-pass filter factors */ +extern const float WebRtcIsac_kLowerApFactorsFloat[2]; + +void WebRtcIsac_AllPassFilter2Float(float* InOut, + const float* APSectionFactors, + int lengthInOut, + int NumberOfSections, + float* FilterState); +void WebRtcIsac_SplitAndFilterFloat(float* in, + float* LP, + float* HP, + double* LP_la, + double* HP_la, + PreFiltBankstr* prefiltdata); + +#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ISAC_VAD_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/os_specific_inline.h b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/os_specific_inline.h new file mode 100644 index 0000000000..fe9afa4ba2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/os_specific_inline.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_OS_SPECIFIC_INLINE_H_ +#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_OS_SPECIFIC_INLINE_H_ + +#include <math.h> + +#include "rtc_base/system/arch.h" + +#if defined(WEBRTC_POSIX) +#define WebRtcIsac_lrint lrint +#elif (defined(WEBRTC_ARCH_X86) && defined(WIN32)) +static __inline long int WebRtcIsac_lrint(double x_dbl) { + long int x_int; + + __asm { + fld x_dbl + fistp x_int + } + ; + + return x_int; +} +#else // Do a slow but correct implementation of lrint + +static __inline long int WebRtcIsac_lrint(double x_dbl) { + long int x_int; + x_int = (long int)floor(x_dbl + 0.499999999999); + return x_int; +} + +#endif + +#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_OS_SPECIFIC_INLINE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.c b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.c new file mode 100644 index 0000000000..8a19ac1710 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.c @@ -0,0 +1,695 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h" + +#include <math.h> +#include <memory.h> +#include <string.h> +#ifdef WEBRTC_ANDROID +#include <stdlib.h> +#endif + +#include "modules/audio_coding/codecs/isac/main/source/filter_functions.h" +#include "modules/audio_coding/codecs/isac/main/source/pitch_filter.h" +#include "rtc_base/system/ignore_warnings.h" + +static const double kInterpolWin[8] = {-0.00067556028640, 0.02184247643159, -0.12203175715679, 0.60086484101160, + 0.60086484101160, -0.12203175715679, 0.02184247643159, -0.00067556028640}; + +/* interpolation filter */ +__inline static void IntrepolFilter(double *data_ptr, double *intrp) +{ + *intrp = kInterpolWin[0] * data_ptr[-3]; + *intrp += kInterpolWin[1] * data_ptr[-2]; + *intrp += kInterpolWin[2] * data_ptr[-1]; + *intrp += kInterpolWin[3] * data_ptr[0]; + *intrp += kInterpolWin[4] * data_ptr[1]; + *intrp += kInterpolWin[5] * data_ptr[2]; + *intrp += kInterpolWin[6] * data_ptr[3]; + *intrp += kInterpolWin[7] * data_ptr[4]; +} + + +/* 2D parabolic interpolation */ +/* probably some 0.5 factors can be eliminated, and the square-roots can be removed from the Cholesky fact. */ +__inline static void Intrpol2D(double T[3][3], double *x, double *y, double *peak_val) +{ + double c, b[2], A[2][2]; + double t1, t2, d; + double delta1, delta2; + + + // double T[3][3] = {{-1.25, -.25,-.25}, {-.25, .75, .75}, {-.25, .75, .75}}; + // should result in: delta1 = 0.5; delta2 = 0.0; peak_val = 1.0 + + c = T[1][1]; + b[0] = 0.5 * (T[1][2] + T[2][1] - T[0][1] - T[1][0]); + b[1] = 0.5 * (T[1][0] + T[2][1] - T[0][1] - T[1][2]); + A[0][1] = -0.5 * (T[0][1] + T[2][1] - T[1][0] - T[1][2]); + t1 = 0.5 * (T[0][0] + T[2][2]) - c; + t2 = 0.5 * (T[2][0] + T[0][2]) - c; + d = (T[0][1] + T[1][2] + T[1][0] + T[2][1]) - 4.0 * c - t1 - t2; + A[0][0] = -t1 - 0.5 * d; + A[1][1] = -t2 - 0.5 * d; + + /* deal with singularities or ill-conditioned cases */ + if ( (A[0][0] < 1e-7) || ((A[0][0] * A[1][1] - A[0][1] * A[0][1]) < 1e-7) ) { + *peak_val = T[1][1]; + return; + } + + /* Cholesky decomposition: replace A by upper-triangular factor */ + A[0][0] = sqrt(A[0][0]); + A[0][1] = A[0][1] / A[0][0]; + A[1][1] = sqrt(A[1][1] - A[0][1] * A[0][1]); + + /* compute [x; y] = -0.5 * inv(A) * b */ + t1 = b[0] / A[0][0]; + t2 = (b[1] - t1 * A[0][1]) / A[1][1]; + delta2 = t2 / A[1][1]; + delta1 = 0.5 * (t1 - delta2 * A[0][1]) / A[0][0]; + delta2 *= 0.5; + + /* limit norm */ + t1 = delta1 * delta1 + delta2 * delta2; + if (t1 > 1.0) { + delta1 /= t1; + delta2 /= t1; + } + + *peak_val = 0.5 * (b[0] * delta1 + b[1] * delta2) + c; + + *x += delta1; + *y += delta2; +} + + +static void PCorr(const double *in, double *outcorr) +{ + double sum, ysum, prod; + const double *x, *inptr; + int k, n; + + //ysum = 1e-6; /* use this with float (i.s.o. double)! */ + ysum = 1e-13; + sum = 0.0; + x = in + PITCH_MAX_LAG/2 + 2; + for (n = 0; n < PITCH_CORR_LEN2; n++) { + ysum += in[n] * in[n]; + sum += x[n] * in[n]; + } + + outcorr += PITCH_LAG_SPAN2 - 1; /* index of last element in array */ + *outcorr = sum / sqrt(ysum); + + for (k = 1; k < PITCH_LAG_SPAN2; k++) { + ysum -= in[k-1] * in[k-1]; + ysum += in[PITCH_CORR_LEN2 + k - 1] * in[PITCH_CORR_LEN2 + k - 1]; + sum = 0.0; + inptr = &in[k]; + prod = x[0] * inptr[0]; + for (n = 1; n < PITCH_CORR_LEN2; n++) { + sum += prod; + prod = x[n] * inptr[n]; + } + sum += prod; + outcorr--; + *outcorr = sum / sqrt(ysum); + } +} + +static void WebRtcIsac_AllpassFilterForDec(double* InOut, + const double* APSectionFactors, + size_t lengthInOut, + double* FilterState) { + // This performs all-pass filtering--a series of first order all-pass + // sections are used to filter the input in a cascade manner. + size_t n, j; + double temp; + for (j = 0; j < ALLPASSSECTIONS; j++) { + for (n = 0; n < lengthInOut; n += 2) { + temp = InOut[n]; // store input + InOut[n] = FilterState[j] + APSectionFactors[j] * temp; + FilterState[j] = -APSectionFactors[j] * InOut[n] + temp; + } + } +} + +static void WebRtcIsac_DecimateAllpass( + const double* in, + double* state_in, // array of size: 2*ALLPASSSECTIONS+1 + size_t N, // number of input samples + double* out) { // array of size N/2 + + static const double APupper[ALLPASSSECTIONS] = {0.0347, 0.3826}; + static const double APlower[ALLPASSSECTIONS] = {0.1544, 0.744}; + + size_t n; + double data_vec[PITCH_FRAME_LEN]; + + /* copy input */ + memcpy(data_vec + 1, in, sizeof(double) * (N - 1)); + + data_vec[0] = state_in[2 * ALLPASSSECTIONS]; // the z^(-1) state + state_in[2 * ALLPASSSECTIONS] = in[N - 1]; + + WebRtcIsac_AllpassFilterForDec(data_vec + 1, APupper, N, state_in); + WebRtcIsac_AllpassFilterForDec(data_vec, APlower, N, + state_in + ALLPASSSECTIONS); + + for (n = 0; n < N / 2; n++) + out[n] = data_vec[2 * n] + data_vec[2 * n + 1]; +} + +RTC_PUSH_IGNORING_WFRAME_LARGER_THAN() + +static void WebRtcIsac_InitializePitch(const double* in, + const double old_lag, + const double old_gain, + PitchAnalysisStruct* State, + double* lags) { + double buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2]; + double ratio, log_lag, gain_bias; + double bias; + double corrvec1[PITCH_LAG_SPAN2]; + double corrvec2[PITCH_LAG_SPAN2]; + int m, k; + // Allocating 10 extra entries at the begining of the CorrSurf + double corrSurfBuff[10 + (2*PITCH_BW+3)*(PITCH_LAG_SPAN2+4)]; + double* CorrSurf[2*PITCH_BW+3]; + double *CorrSurfPtr1, *CorrSurfPtr2; + double LagWin[3] = {0.2, 0.5, 0.98}; + int ind1, ind2, peaks_ind, peak, max_ind; + int peaks[PITCH_MAX_NUM_PEAKS]; + double adj, gain_tmp; + double corr, corr_max; + double intrp_a, intrp_b, intrp_c, intrp_d; + double peak_vals[PITCH_MAX_NUM_PEAKS]; + double lags1[PITCH_MAX_NUM_PEAKS]; + double lags2[PITCH_MAX_NUM_PEAKS]; + double T[3][3]; + int row; + + for(k = 0; k < 2*PITCH_BW+3; k++) + { + CorrSurf[k] = &corrSurfBuff[10 + k * (PITCH_LAG_SPAN2+4)]; + } + /* reset CorrSurf matrix */ + memset(corrSurfBuff, 0, sizeof(double) * (10 + (2*PITCH_BW+3) * (PITCH_LAG_SPAN2+4))); + + //warnings -DH + max_ind = 0; + peak = 0; + + /* copy old values from state buffer */ + memcpy(buf_dec, State->dec_buffer, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2)); + + /* decimation; put result after the old values */ + WebRtcIsac_DecimateAllpass(in, State->decimator_state, PITCH_FRAME_LEN, + &buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2]); + + /* low-pass filtering */ + for (k = PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2; k < PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2; k++) + buf_dec[k] += 0.75 * buf_dec[k-1] - 0.25 * buf_dec[k-2]; + + /* copy end part back into state buffer */ + memcpy(State->dec_buffer, buf_dec+PITCH_FRAME_LEN/2, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2)); + + /* compute correlation for first and second half of the frame */ + PCorr(buf_dec, corrvec1); + PCorr(buf_dec + PITCH_CORR_STEP2, corrvec2); + + /* bias towards pitch lag of previous frame */ + log_lag = log(0.5 * old_lag); + gain_bias = 4.0 * old_gain * old_gain; + if (gain_bias > 0.8) gain_bias = 0.8; + for (k = 0; k < PITCH_LAG_SPAN2; k++) + { + ratio = log((double) (k + (PITCH_MIN_LAG/2-2))) - log_lag; + bias = 1.0 + gain_bias * exp(-5.0 * ratio * ratio); + corrvec1[k] *= bias; + } + + /* taper correlation functions */ + for (k = 0; k < 3; k++) { + gain_tmp = LagWin[k]; + corrvec1[k] *= gain_tmp; + corrvec2[k] *= gain_tmp; + corrvec1[PITCH_LAG_SPAN2-1-k] *= gain_tmp; + corrvec2[PITCH_LAG_SPAN2-1-k] *= gain_tmp; + } + + corr_max = 0.0; + /* fill middle row of correlation surface */ + ind1 = 0; + ind2 = 0; + CorrSurfPtr1 = &CorrSurf[PITCH_BW][2]; + for (k = 0; k < PITCH_LAG_SPAN2; k++) { + corr = corrvec1[ind1++] + corrvec2[ind2++]; + CorrSurfPtr1[k] = corr; + if (corr > corr_max) { + corr_max = corr; /* update maximum */ + max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); + } + } + /* fill first and last rows of correlation surface */ + ind1 = 0; + ind2 = PITCH_BW; + CorrSurfPtr1 = &CorrSurf[0][2]; + CorrSurfPtr2 = &CorrSurf[2*PITCH_BW][PITCH_BW+2]; + for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW; k++) { + ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12)); + adj = 0.2 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */ + corr = adj * (corrvec1[ind1] + corrvec2[ind2]); + CorrSurfPtr1[k] = corr; + if (corr > corr_max) { + corr_max = corr; /* update maximum */ + max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); + } + corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]); + CorrSurfPtr2[k] = corr; + if (corr > corr_max) { + corr_max = corr; /* update maximum */ + max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]); + } + } + /* fill second and next to last rows of correlation surface */ + ind1 = 0; + ind2 = PITCH_BW-1; + CorrSurfPtr1 = &CorrSurf[1][2]; + CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-1][PITCH_BW+1]; + for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+1; k++) { + ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12)); + adj = 0.9 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */ + corr = adj * (corrvec1[ind1] + corrvec2[ind2]); + CorrSurfPtr1[k] = corr; + if (corr > corr_max) { + corr_max = corr; /* update maximum */ + max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); + } + corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]); + CorrSurfPtr2[k] = corr; + if (corr > corr_max) { + corr_max = corr; /* update maximum */ + max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]); + } + } + /* fill remainder of correlation surface */ + for (m = 2; m < PITCH_BW; m++) { + ind1 = 0; + ind2 = PITCH_BW - m; /* always larger than ind1 */ + CorrSurfPtr1 = &CorrSurf[m][2]; + CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-m][PITCH_BW+2-m]; + for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+m; k++) { + ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12)); + adj = ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */ + corr = adj * (corrvec1[ind1] + corrvec2[ind2]); + CorrSurfPtr1[k] = corr; + if (corr > corr_max) { + corr_max = corr; /* update maximum */ + max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); + } + corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]); + CorrSurfPtr2[k] = corr; + if (corr > corr_max) { + corr_max = corr; /* update maximum */ + max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]); + } + } + } + + /* threshold value to qualify as a peak */ + corr_max *= 0.6; + + peaks_ind = 0; + /* find peaks */ + for (m = 1; m < PITCH_BW+1; m++) { + if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; + CorrSurfPtr1 = &CorrSurf[m][2]; + for (k = 2; k < PITCH_LAG_SPAN2-PITCH_BW-2+m; k++) { + corr = CorrSurfPtr1[k]; + if (corr > corr_max) { + if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) { + if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) { + /* found a peak; store index into matrix */ + peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); + if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; + } + } + } + } + } + for (m = PITCH_BW+1; m < 2*PITCH_BW; m++) { + if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; + CorrSurfPtr1 = &CorrSurf[m][2]; + for (k = 2+m-PITCH_BW; k < PITCH_LAG_SPAN2-2; k++) { + corr = CorrSurfPtr1[k]; + if (corr > corr_max) { + if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) { + if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) { + /* found a peak; store index into matrix */ + peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); + if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; + } + } + } + } + } + + if (peaks_ind > 0) { + /* examine each peak */ + CorrSurfPtr1 = &CorrSurf[0][0]; + for (k = 0; k < peaks_ind; k++) { + peak = peaks[k]; + + /* compute four interpolated values around current peak */ + IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)], &intrp_a); + IntrepolFilter(&CorrSurfPtr1[peak - 1 ], &intrp_b); + IntrepolFilter(&CorrSurfPtr1[peak ], &intrp_c); + IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)], &intrp_d); + + /* determine maximum of the interpolated values */ + corr = CorrSurfPtr1[peak]; + corr_max = intrp_a; + if (intrp_b > corr_max) corr_max = intrp_b; + if (intrp_c > corr_max) corr_max = intrp_c; + if (intrp_d > corr_max) corr_max = intrp_d; + + /* determine where the peak sits and fill a 3x3 matrix around it */ + row = peak / (PITCH_LAG_SPAN2+4); + lags1[k] = (double) ((peak - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4); + lags2[k] = (double) (lags1[k] + PITCH_BW - row); + if ( corr > corr_max ) { + T[0][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)]; + T[2][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)]; + T[1][1] = corr; + T[0][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)]; + T[2][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)]; + T[1][0] = intrp_a; + T[0][1] = intrp_b; + T[2][1] = intrp_c; + T[1][2] = intrp_d; + } else { + if (intrp_a == corr_max) { + lags1[k] -= 0.5; + lags2[k] += 0.5; + IntrepolFilter(&CorrSurfPtr1[peak - 2*(PITCH_LAG_SPAN2+5)], &T[0][0]); + IntrepolFilter(&CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)], &T[2][0]); + T[1][1] = intrp_a; + T[0][2] = intrp_b; + T[2][2] = intrp_c; + T[1][0] = CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)]; + T[0][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)]; + T[2][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)]; + T[1][2] = corr; + } else if (intrp_b == corr_max) { + lags1[k] -= 0.5; + lags2[k] -= 0.5; + IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+6)], &T[0][0]); + T[2][0] = intrp_a; + T[1][1] = intrp_b; + IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+3)], &T[0][2]); + T[2][2] = intrp_d; + T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)]; + T[0][1] = CorrSurfPtr1[peak - 1]; + T[2][1] = corr; + T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)]; + } else if (intrp_c == corr_max) { + lags1[k] += 0.5; + lags2[k] += 0.5; + T[0][0] = intrp_a; + IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)], &T[2][0]); + T[1][1] = intrp_c; + T[0][2] = intrp_d; + IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)], &T[2][2]); + T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)]; + T[0][1] = corr; + T[2][1] = CorrSurfPtr1[peak + 1]; + T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)]; + } else { + lags1[k] += 0.5; + lags2[k] -= 0.5; + T[0][0] = intrp_b; + T[2][0] = intrp_c; + T[1][1] = intrp_d; + IntrepolFilter(&CorrSurfPtr1[peak + 2*(PITCH_LAG_SPAN2+4)], &T[0][2]); + IntrepolFilter(&CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)], &T[2][2]); + T[1][0] = corr; + T[0][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)]; + T[2][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)]; + T[1][2] = CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)]; + } + } + + /* 2D parabolic interpolation gives more accurate lags and peak value */ + Intrpol2D(T, &lags1[k], &lags2[k], &peak_vals[k]); + } + + /* determine the highest peak, after applying a bias towards short lags */ + corr_max = 0.0; + for (k = 0; k < peaks_ind; k++) { + corr = peak_vals[k] * pow(PITCH_PEAK_DECAY, log(lags1[k] + lags2[k])); + if (corr > corr_max) { + corr_max = corr; + peak = k; + } + } + + lags1[peak] *= 2.0; + lags2[peak] *= 2.0; + + if (lags1[peak] < (double) PITCH_MIN_LAG) lags1[peak] = (double) PITCH_MIN_LAG; + if (lags2[peak] < (double) PITCH_MIN_LAG) lags2[peak] = (double) PITCH_MIN_LAG; + if (lags1[peak] > (double) PITCH_MAX_LAG) lags1[peak] = (double) PITCH_MAX_LAG; + if (lags2[peak] > (double) PITCH_MAX_LAG) lags2[peak] = (double) PITCH_MAX_LAG; + + /* store lags of highest peak in output array */ + lags[0] = lags1[peak]; + lags[1] = lags1[peak]; + lags[2] = lags2[peak]; + lags[3] = lags2[peak]; + } + else + { + row = max_ind / (PITCH_LAG_SPAN2+4); + lags1[0] = (double) ((max_ind - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4); + lags2[0] = (double) (lags1[0] + PITCH_BW - row); + + if (lags1[0] < (double) PITCH_MIN_LAG) lags1[0] = (double) PITCH_MIN_LAG; + if (lags2[0] < (double) PITCH_MIN_LAG) lags2[0] = (double) PITCH_MIN_LAG; + if (lags1[0] > (double) PITCH_MAX_LAG) lags1[0] = (double) PITCH_MAX_LAG; + if (lags2[0] > (double) PITCH_MAX_LAG) lags2[0] = (double) PITCH_MAX_LAG; + + /* store lags of highest peak in output array */ + lags[0] = lags1[0]; + lags[1] = lags1[0]; + lags[2] = lags2[0]; + lags[3] = lags2[0]; + } +} + +RTC_POP_IGNORING_WFRAME_LARGER_THAN() + +/* create weighting matrix by orthogonalizing a basis of polynomials of increasing order + * t = (0:4)'; + * A = [t.^0, t.^1, t.^2, t.^3, t.^4]; + * [Q, dummy] = qr(A); + * P.Weight = Q * diag([0, .1, .5, 1, 1]) * Q'; */ +static const double kWeight[5][5] = { + { 0.29714285714286, -0.30857142857143, -0.05714285714286, 0.05142857142857, 0.01714285714286}, + {-0.30857142857143, 0.67428571428571, -0.27142857142857, -0.14571428571429, 0.05142857142857}, + {-0.05714285714286, -0.27142857142857, 0.65714285714286, -0.27142857142857, -0.05714285714286}, + { 0.05142857142857, -0.14571428571429, -0.27142857142857, 0.67428571428571, -0.30857142857143}, + { 0.01714285714286, 0.05142857142857, -0.05714285714286, -0.30857142857143, 0.29714285714286} +}; + +/* second order high-pass filter */ +static void WebRtcIsac_Highpass(const double* in, + double* out, + double* state, + size_t N) { + /* create high-pass filter ocefficients + * z = 0.998 * exp(j*2*pi*35/8000); + * p = 0.94 * exp(j*2*pi*140/8000); + * HP_b = [1, -2*real(z), abs(z)^2]; + * HP_a = [1, -2*real(p), abs(p)^2]; */ + static const double a_coef[2] = { 1.86864659625574, -0.88360000000000}; + static const double b_coef[2] = {-1.99524591718270, 0.99600400000000}; + + size_t k; + + for (k=0; k<N; k++) { + *out = *in + state[1]; + state[1] = state[0] + b_coef[0] * *in + a_coef[0] * *out; + state[0] = b_coef[1] * *in++ + a_coef[1] * *out++; + } +} + +RTC_PUSH_IGNORING_WFRAME_LARGER_THAN() + +void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN samples */ + double *out, /* PITCH_FRAME_LEN+QLOOKAHEAD samples */ + PitchAnalysisStruct *State, + double *lags, + double *gains) +{ + double HPin[PITCH_FRAME_LEN]; + double Weighted[PITCH_FRAME_LEN]; + double Whitened[PITCH_FRAME_LEN + QLOOKAHEAD]; + double inbuf[PITCH_FRAME_LEN + QLOOKAHEAD]; + double out_G[PITCH_FRAME_LEN + QLOOKAHEAD]; // could be removed by using out instead + double out_dG[4][PITCH_FRAME_LEN + QLOOKAHEAD]; + double old_lag, old_gain; + double nrg_wht, tmp; + double Wnrg, Wfluct, Wgain; + double H[4][4]; + double grad[4]; + double dG[4]; + int k, m, n, iter; + + /* high pass filtering using second order pole-zero filter */ + WebRtcIsac_Highpass(in, HPin, State->hp_state, PITCH_FRAME_LEN); + + /* copy from state into buffer */ + memcpy(Whitened, State->whitened_buf, sizeof(double) * QLOOKAHEAD); + + /* compute weighted and whitened signals */ + WebRtcIsac_WeightingFilter(HPin, &Weighted[0], &Whitened[QLOOKAHEAD], &(State->Wghtstr)); + + /* copy from buffer into state */ + memcpy(State->whitened_buf, Whitened+PITCH_FRAME_LEN, sizeof(double) * QLOOKAHEAD); + + old_lag = State->PFstr_wght.oldlagp[0]; + old_gain = State->PFstr_wght.oldgainp[0]; + + /* inital pitch estimate */ + WebRtcIsac_InitializePitch(Weighted, old_lag, old_gain, State, lags); + + + /* Iterative optimization of lags - to be done */ + + /* compute energy of whitened signal */ + nrg_wht = 0.0; + for (k = 0; k < PITCH_FRAME_LEN + QLOOKAHEAD; k++) + nrg_wht += Whitened[k] * Whitened[k]; + + + /* Iterative optimization of gains */ + + /* set weights for energy, gain fluctiation, and spectral gain penalty functions */ + Wnrg = 1.0 / nrg_wht; + Wgain = 0.005; + Wfluct = 3.0; + + /* set initial gains */ + for (k = 0; k < 4; k++) + gains[k] = PITCH_MAX_GAIN_06; + + /* two iterations should be enough */ + for (iter = 0; iter < 2; iter++) { + /* compute Jacobian of pre-filter output towards gains */ + WebRtcIsac_PitchfilterPre_gains(Whitened, out_G, out_dG, &(State->PFstr_wght), lags, gains); + + /* gradient and approximate Hessian (lower triangle) for minimizing the filter's output power */ + for (k = 0; k < 4; k++) { + tmp = 0.0; + for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++) + tmp += out_G[n] * out_dG[k][n]; + grad[k] = tmp * Wnrg; + } + for (k = 0; k < 4; k++) { + for (m = 0; m <= k; m++) { + tmp = 0.0; + for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++) + tmp += out_dG[m][n] * out_dG[k][n]; + H[k][m] = tmp * Wnrg; + } + } + + /* add gradient and Hessian (lower triangle) for dampening fast gain changes */ + for (k = 0; k < 4; k++) { + tmp = kWeight[k+1][0] * old_gain; + for (m = 0; m < 4; m++) + tmp += kWeight[k+1][m+1] * gains[m]; + grad[k] += tmp * Wfluct; + } + for (k = 0; k < 4; k++) { + for (m = 0; m <= k; m++) { + H[k][m] += kWeight[k+1][m+1] * Wfluct; + } + } + + /* add gradient and Hessian for dampening gain */ + for (k = 0; k < 3; k++) { + tmp = 1.0 / (1 - gains[k]); + grad[k] += tmp * tmp * Wgain; + H[k][k] += 2.0 * tmp * (tmp * tmp * Wgain); + } + tmp = 1.0 / (1 - gains[3]); + grad[3] += 1.33 * (tmp * tmp * Wgain); + H[3][3] += 2.66 * tmp * (tmp * tmp * Wgain); + + + /* compute Cholesky factorization of Hessian + * by overwritting the upper triangle; scale factors on diagonal + * (for non pc-platforms store the inverse of the diagonals seperately to minimize divisions) */ + H[0][1] = H[1][0] / H[0][0]; + H[0][2] = H[2][0] / H[0][0]; + H[0][3] = H[3][0] / H[0][0]; + H[1][1] -= H[0][0] * H[0][1] * H[0][1]; + H[1][2] = (H[2][1] - H[0][1] * H[2][0]) / H[1][1]; + H[1][3] = (H[3][1] - H[0][1] * H[3][0]) / H[1][1]; + H[2][2] -= H[0][0] * H[0][2] * H[0][2] + H[1][1] * H[1][2] * H[1][2]; + H[2][3] = (H[3][2] - H[0][2] * H[3][0] - H[1][2] * H[1][1] * H[1][3]) / H[2][2]; + H[3][3] -= H[0][0] * H[0][3] * H[0][3] + H[1][1] * H[1][3] * H[1][3] + H[2][2] * H[2][3] * H[2][3]; + + /* Compute update as delta_gains = -inv(H) * grad */ + /* copy and negate */ + for (k = 0; k < 4; k++) + dG[k] = -grad[k]; + /* back substitution */ + dG[1] -= dG[0] * H[0][1]; + dG[2] -= dG[0] * H[0][2] + dG[1] * H[1][2]; + dG[3] -= dG[0] * H[0][3] + dG[1] * H[1][3] + dG[2] * H[2][3]; + /* scale */ + for (k = 0; k < 4; k++) + dG[k] /= H[k][k]; + /* back substitution */ + dG[2] -= dG[3] * H[2][3]; + dG[1] -= dG[3] * H[1][3] + dG[2] * H[1][2]; + dG[0] -= dG[3] * H[0][3] + dG[2] * H[0][2] + dG[1] * H[0][1]; + + /* update gains and check range */ + for (k = 0; k < 4; k++) { + gains[k] += dG[k]; + if (gains[k] > PITCH_MAX_GAIN) + gains[k] = PITCH_MAX_GAIN; + else if (gains[k] < 0.0) + gains[k] = 0.0; + } + } + + /* update state for next frame */ + WebRtcIsac_PitchfilterPre(Whitened, out, &(State->PFstr_wght), lags, gains); + + /* concatenate previous input's end and current input */ + memcpy(inbuf, State->inbuf, sizeof(double) * QLOOKAHEAD); + memcpy(inbuf+QLOOKAHEAD, in, sizeof(double) * PITCH_FRAME_LEN); + + /* lookahead pitch filtering for masking analysis */ + WebRtcIsac_PitchfilterPre_la(inbuf, out, &(State->PFstr), lags, gains); + + /* store last part of input */ + for (k = 0; k < QLOOKAHEAD; k++) + State->inbuf[k] = inbuf[k + PITCH_FRAME_LEN]; +} + +RTC_POP_IGNORING_WFRAME_LARGER_THAN() diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h new file mode 100644 index 0000000000..4ab78c20ad --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * pitch_estimator.h + * + * Pitch functions + * + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_ESTIMATOR_H_ +#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_ESTIMATOR_H_ + +#include <stddef.h> + +#include "modules/audio_coding/codecs/isac/main/source/structs.h" + +void WebRtcIsac_PitchAnalysis( + const double* in, /* PITCH_FRAME_LEN samples */ + double* out, /* PITCH_FRAME_LEN+QLOOKAHEAD samples */ + PitchAnalysisStruct* State, + double* lags, + double* gains); + +#endif /* MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_ESTIMATOR_H_ */ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_filter.c b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_filter.c new file mode 100644 index 0000000000..bf03dfff2e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_filter.c @@ -0,0 +1,388 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <math.h> +#include <memory.h> +#include <stdlib.h> + +#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h" +#include "modules/audio_coding/codecs/isac/main/source/os_specific_inline.h" +#include "rtc_base/compile_assert_c.h" + +/* + * We are implementing the following filters; + * + * Pre-filtering: + * y(z) = x(z) + damper(z) * gain * (x(z) + y(z)) * z ^ (-lag); + * + * Post-filtering: + * y(z) = x(z) - damper(z) * gain * (x(z) + y(z)) * z ^ (-lag); + * + * Note that `lag` is a floating number so we perform an interpolation to + * obtain the correct `lag`. + * + */ + +static const double kDampFilter[PITCH_DAMPORDER] = {-0.07, 0.25, 0.64, 0.25, + -0.07}; + +/* interpolation coefficients; generated by design_pitch_filter.m */ +static const double kIntrpCoef[PITCH_FRACS][PITCH_FRACORDER] = { + {-0.02239172458614, 0.06653315052934, -0.16515880017569, 0.60701333734125, + 0.64671399919202, -0.20249000396417, 0.09926548334755, -0.04765933793109, + 0.01754159521746}, + {-0.01985640750434, 0.05816126837866, -0.13991265473714, 0.44560418147643, + 0.79117042386876, -0.20266133815188, 0.09585268418555, -0.04533310458084, + 0.01654127246314}, + {-0.01463300534216, 0.04229888475060, -0.09897034715253, 0.28284326017787, + 0.90385267956632, -0.16976950138649, 0.07704272393639, -0.03584218578311, + 0.01295781500709}, + {-0.00764851320885, 0.02184035544377, -0.04985561057281, 0.13083306574393, + 0.97545011664662, -0.10177807997561, 0.04400901776474, -0.02010737175166, + 0.00719783432422}, + {-0.00000000000000, 0.00000000000000, -0.00000000000001, 0.00000000000001, + 0.99999999999999, 0.00000000000001, -0.00000000000001, 0.00000000000000, + -0.00000000000000}, + {0.00719783432422, -0.02010737175166, 0.04400901776474, -0.10177807997562, + 0.97545011664663, 0.13083306574393, -0.04985561057280, 0.02184035544377, + -0.00764851320885}, + {0.01295781500710, -0.03584218578312, 0.07704272393640, -0.16976950138650, + 0.90385267956634, 0.28284326017785, -0.09897034715252, 0.04229888475059, + -0.01463300534216}, + {0.01654127246315, -0.04533310458085, 0.09585268418557, -0.20266133815190, + 0.79117042386878, 0.44560418147640, -0.13991265473712, 0.05816126837865, + -0.01985640750433} +}; + +/* + * Enumerating the operation of the filter. + * iSAC has 4 different pitch-filter which are very similar in their structure. + * + * kPitchFilterPre : In this mode the filter is operating as pitch + * pre-filter. This is used at the encoder. + * kPitchFilterPost : In this mode the filter is operating as pitch + * post-filter. This is the inverse of pre-filter and used + * in the decoder. + * kPitchFilterPreLa : This is, in structure, similar to pre-filtering but + * utilizing 3 millisecond lookahead. It is used to + * obtain the signal for LPC analysis. + * kPitchFilterPreGain : This is, in structure, similar to pre-filtering but + * differential changes in gain is considered. This is + * used to find the optimal gain. + */ +typedef enum { + kPitchFilterPre, kPitchFilterPost, kPitchFilterPreLa, kPitchFilterPreGain +} PitchFilterOperation; + +/* + * Structure with parameters used for pitch-filtering. + * buffer : a buffer where the sum of previous inputs and outputs + * are stored. + * damper_state : the state of the damping filter. The filter is defined by + * `kDampFilter`. + * interpol_coeff : pointer to a set of coefficient which are used to utilize + * fractional pitch by interpolation. + * gain : pitch-gain to be applied to the current segment of input. + * lag : pitch-lag for the current segment of input. + * lag_offset : the offset of lag w.r.t. current sample. + * sub_frame : sub-frame index, there are 4 pitch sub-frames in an iSAC + * frame. + * This specifies the usage of the filter. See + * 'PitchFilterOperation' for operational modes. + * num_samples : number of samples to be processed in each segment. + * index : index of the input and output sample. + * damper_state_dg : state of damping filter for different trial gains. + * gain_mult : differential changes to gain. + */ +typedef struct { + double buffer[PITCH_INTBUFFSIZE + QLOOKAHEAD]; + double damper_state[PITCH_DAMPORDER]; + const double *interpol_coeff; + double gain; + double lag; + int lag_offset; + + int sub_frame; + PitchFilterOperation mode; + int num_samples; + int index; + + double damper_state_dg[4][PITCH_DAMPORDER]; + double gain_mult[4]; +} PitchFilterParam; + +/********************************************************************** + * FilterSegment() + * Filter one segment, a quarter of a frame. + * + * Inputs + * in_data : pointer to the input signal of 30 ms at 8 kHz sample-rate. + * filter_param : pitch filter parameters. + * + * Outputs + * out_data : pointer to a buffer where the filtered signal is written to. + * out_dg : [only used in kPitchFilterPreGain] pointer to a buffer + * where the output of different gain values (differential + * change to gain) is written. + */ +static void FilterSegment(const double* in_data, PitchFilterParam* parameters, + double* out_data, + double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) { + int n; + int m; + int j; + double sum; + double sum2; + /* Index of `parameters->buffer` where the output is written to. */ + int pos = parameters->index + PITCH_BUFFSIZE; + /* Index of `parameters->buffer` where samples are read for fractional-lag + * computation. */ + int pos_lag = pos - parameters->lag_offset; + + for (n = 0; n < parameters->num_samples; ++n) { + /* Shift low pass filter states. */ + for (m = PITCH_DAMPORDER - 1; m > 0; --m) { + parameters->damper_state[m] = parameters->damper_state[m - 1]; + } + /* Filter to get fractional pitch. */ + sum = 0.0; + for (m = 0; m < PITCH_FRACORDER; ++m) { + sum += parameters->buffer[pos_lag + m] * parameters->interpol_coeff[m]; + } + /* Multiply with gain. */ + parameters->damper_state[0] = parameters->gain * sum; + + if (parameters->mode == kPitchFilterPreGain) { + int lag_index = parameters->index - parameters->lag_offset; + int m_tmp = (lag_index < 0) ? -lag_index : 0; + /* Update the damper state for the new sample. */ + for (m = PITCH_DAMPORDER - 1; m > 0; --m) { + for (j = 0; j < 4; ++j) { + parameters->damper_state_dg[j][m] = + parameters->damper_state_dg[j][m - 1]; + } + } + + for (j = 0; j < parameters->sub_frame + 1; ++j) { + /* Filter for fractional pitch. */ + sum2 = 0.0; + for (m = PITCH_FRACORDER-1; m >= m_tmp; --m) { + /* `lag_index + m` is always larger than or equal to zero, see how + * m_tmp is computed. This is equivalent to assume samples outside + * `out_dg[j]` are zero. */ + sum2 += out_dg[j][lag_index + m] * parameters->interpol_coeff[m]; + } + /* Add the contribution of differential gain change. */ + parameters->damper_state_dg[j][0] = parameters->gain_mult[j] * sum + + parameters->gain * sum2; + } + + /* Filter with damping filter, and store the results. */ + for (j = 0; j < parameters->sub_frame + 1; ++j) { + sum = 0.0; + for (m = 0; m < PITCH_DAMPORDER; ++m) { + sum -= parameters->damper_state_dg[j][m] * kDampFilter[m]; + } + out_dg[j][parameters->index] = sum; + } + } + /* Filter with damping filter. */ + sum = 0.0; + for (m = 0; m < PITCH_DAMPORDER; ++m) { + sum += parameters->damper_state[m] * kDampFilter[m]; + } + + /* Subtract from input and update buffer. */ + out_data[parameters->index] = in_data[parameters->index] - sum; + parameters->buffer[pos] = in_data[parameters->index] + + out_data[parameters->index]; + + ++parameters->index; + ++pos; + ++pos_lag; + } + return; +} + +/* Update filter parameters based on the pitch-gains and pitch-lags. */ +static void Update(PitchFilterParam* parameters) { + double fraction; + int fraction_index; + /* Compute integer lag-offset. */ + parameters->lag_offset = WebRtcIsac_lrint(parameters->lag + PITCH_FILTDELAY + + 0.5); + /* Find correct set of coefficients for computing fractional pitch. */ + fraction = parameters->lag_offset - (parameters->lag + PITCH_FILTDELAY); + fraction_index = WebRtcIsac_lrint(PITCH_FRACS * fraction - 0.5); + parameters->interpol_coeff = kIntrpCoef[fraction_index]; + + if (parameters->mode == kPitchFilterPreGain) { + /* If in this mode make a differential change to pitch gain. */ + parameters->gain_mult[parameters->sub_frame] += 0.2; + if (parameters->gain_mult[parameters->sub_frame] > 1.0) { + parameters->gain_mult[parameters->sub_frame] = 1.0; + } + if (parameters->sub_frame > 0) { + parameters->gain_mult[parameters->sub_frame - 1] -= 0.2; + } + } +} + +/****************************************************************************** + * FilterFrame() + * Filter a frame of 30 millisecond, given pitch-lags and pitch-gains. + * + * Inputs + * in_data : pointer to the input signal of 30 ms at 8 kHz sample-rate. + * lags : pointer to pitch-lags, 4 lags per frame. + * gains : pointer to pitch-gians, 4 gains per frame. + * mode : defining the functionality of the filter. It takes the + * following values. + * kPitchFilterPre: Pitch pre-filter, used at encoder. + * kPitchFilterPost: Pitch post-filter, used at decoder. + * kPitchFilterPreLa: Pitch pre-filter with lookahead. + * kPitchFilterPreGain: Pitch pre-filter used to otain optimal + * pitch-gains. + * + * Outputs + * out_data : pointer to a buffer where the filtered signal is written to. + * out_dg : [only used in kPitchFilterPreGain] pointer to a buffer + * where the output of different gain values (differential + * change to gain) is written. + */ +static void FilterFrame(const double* in_data, PitchFiltstr* filter_state, + double* lags, double* gains, PitchFilterOperation mode, + double* out_data, + double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) { + PitchFilterParam filter_parameters; + double gain_delta, lag_delta; + double old_lag, old_gain; + int n; + int m; + const double kEnhancer = 1.3; + + /* Set up buffer and states. */ + filter_parameters.index = 0; + filter_parameters.lag_offset = 0; + filter_parameters.mode = mode; + /* Copy states to local variables. */ + memcpy(filter_parameters.buffer, filter_state->ubuf, + sizeof(filter_state->ubuf)); + RTC_COMPILE_ASSERT(sizeof(filter_parameters.buffer) >= + sizeof(filter_state->ubuf)); + memset(filter_parameters.buffer + + sizeof(filter_state->ubuf) / sizeof(filter_state->ubuf[0]), + 0, sizeof(filter_parameters.buffer) - sizeof(filter_state->ubuf)); + memcpy(filter_parameters.damper_state, filter_state->ystate, + sizeof(filter_state->ystate)); + + if (mode == kPitchFilterPreGain) { + /* Clear buffers. */ + memset(filter_parameters.gain_mult, 0, sizeof(filter_parameters.gain_mult)); + memset(filter_parameters.damper_state_dg, 0, + sizeof(filter_parameters.damper_state_dg)); + for (n = 0; n < PITCH_SUBFRAMES; ++n) { + //memset(out_dg[n], 0, sizeof(double) * (PITCH_FRAME_LEN + QLOOKAHEAD)); + memset(out_dg[n], 0, sizeof(out_dg[n])); + } + } else if (mode == kPitchFilterPost) { + /* Make output more periodic. Negative sign is to change the structure + * of the filter. */ + for (n = 0; n < PITCH_SUBFRAMES; ++n) { + gains[n] *= -kEnhancer; + } + } + + old_lag = *filter_state->oldlagp; + old_gain = *filter_state->oldgainp; + + /* No interpolation if pitch lag step is big. */ + if ((lags[0] > (PITCH_UPSTEP * old_lag)) || + (lags[0] < (PITCH_DOWNSTEP * old_lag))) { + old_lag = lags[0]; + old_gain = gains[0]; + + if (mode == kPitchFilterPreGain) { + filter_parameters.gain_mult[0] = 1.0; + } + } + + filter_parameters.num_samples = PITCH_UPDATE; + for (m = 0; m < PITCH_SUBFRAMES; ++m) { + /* Set the sub-frame value. */ + filter_parameters.sub_frame = m; + /* Calculate interpolation steps for pitch-lag and pitch-gain. */ + lag_delta = (lags[m] - old_lag) / PITCH_GRAN_PER_SUBFRAME; + filter_parameters.lag = old_lag; + gain_delta = (gains[m] - old_gain) / PITCH_GRAN_PER_SUBFRAME; + filter_parameters.gain = old_gain; + /* Store for the next sub-frame. */ + old_lag = lags[m]; + old_gain = gains[m]; + + for (n = 0; n < PITCH_GRAN_PER_SUBFRAME; ++n) { + /* Step-wise interpolation of pitch gains and lags. As pitch-lag changes, + * some parameters of filter need to be update. */ + filter_parameters.gain += gain_delta; + filter_parameters.lag += lag_delta; + /* Update parameters according to new lag value. */ + Update(&filter_parameters); + /* Filter a segment of input. */ + FilterSegment(in_data, &filter_parameters, out_data, out_dg); + } + } + + if (mode != kPitchFilterPreGain) { + /* Export buffer and states. */ + memcpy(filter_state->ubuf, &filter_parameters.buffer[PITCH_FRAME_LEN], + sizeof(filter_state->ubuf)); + memcpy(filter_state->ystate, filter_parameters.damper_state, + sizeof(filter_state->ystate)); + + /* Store for the next frame. */ + *filter_state->oldlagp = old_lag; + *filter_state->oldgainp = old_gain; + } + + if ((mode == kPitchFilterPreGain) || (mode == kPitchFilterPreLa)) { + /* Filter the lookahead segment, this is treated as the last sub-frame. So + * set `pf_param` to last sub-frame. */ + filter_parameters.sub_frame = PITCH_SUBFRAMES - 1; + filter_parameters.num_samples = QLOOKAHEAD; + FilterSegment(in_data, &filter_parameters, out_data, out_dg); + } +} + +void WebRtcIsac_PitchfilterPre(double* in_data, double* out_data, + PitchFiltstr* pf_state, double* lags, + double* gains) { + FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPre, out_data, NULL); +} + +void WebRtcIsac_PitchfilterPre_la(double* in_data, double* out_data, + PitchFiltstr* pf_state, double* lags, + double* gains) { + FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreLa, out_data, + NULL); +} + +void WebRtcIsac_PitchfilterPre_gains( + double* in_data, double* out_data, + double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD], PitchFiltstr *pf_state, + double* lags, double* gains) { + FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreGain, out_data, + out_dg); +} + +void WebRtcIsac_PitchfilterPost(double* in_data, double* out_data, + PitchFiltstr* pf_state, double* lags, + double* gains) { + FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPost, out_data, NULL); +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_filter.h b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_filter.h new file mode 100644 index 0000000000..9a232de87b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/pitch_filter.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_FILTER_H_ +#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_FILTER_H_ + +#include "modules/audio_coding/codecs/isac/main/source/structs.h" + +void WebRtcIsac_PitchfilterPre(double* indat, + double* outdat, + PitchFiltstr* pfp, + double* lags, + double* gains); + +void WebRtcIsac_PitchfilterPost(double* indat, + double* outdat, + PitchFiltstr* pfp, + double* lags, + double* gains); + +void WebRtcIsac_PitchfilterPre_la(double* indat, + double* outdat, + PitchFiltstr* pfp, + double* lags, + double* gains); + +void WebRtcIsac_PitchfilterPre_gains( + double* indat, + double* outdat, + double out_dG[][PITCH_FRAME_LEN + QLOOKAHEAD], + PitchFiltstr* pfp, + double* lags, + double* gains); + +#endif // MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/settings.h b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/settings.h new file mode 100644 index 0000000000..abce90c4f5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/settings.h @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * settings.h + * + * Declaration of #defines used in the iSAC codec + * + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SETTINGS_H_ +#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SETTINGS_H_ + +/* sampling frequency (Hz) */ +#define FS 16000 + +/* number of samples per frame (either 320 (20ms), 480 (30ms) or 960 (60ms)) */ +#define INITIAL_FRAMESAMPLES 960 + +/* do not modify the following; this will have to be modified if we + * have a 20ms framesize option */ +/**********************************************************************/ +/* miliseconds */ +#define FRAMESIZE 30 +/* number of samples per frame processed in the encoder, 480 */ +#define FRAMESAMPLES 480 /* ((FRAMESIZE*FS)/1000) */ +#define FRAMESAMPLES_HALF 240 +#define FRAMESAMPLES_QUARTER 120 +/**********************************************************************/ + +/* max number of samples per frame (= 60 ms frame) */ +#define MAX_FRAMESAMPLES 960 +#define MAX_SWBFRAMESAMPLES (MAX_FRAMESAMPLES * 2) +/* number of samples per 10ms frame */ +#define FRAMESAMPLES_10ms ((10 * FS) / 1000) +#define SWBFRAMESAMPLES_10ms (FRAMESAMPLES_10ms * 2) +/* number of samples in 30 ms frame */ +#define FRAMESAMPLES_30ms 480 +/* number of subframes */ +#define SUBFRAMES 6 +/* length of a subframe */ +#define UPDATE 80 +/* length of half a subframe (low/high band) */ +#define HALF_SUBFRAMELEN (UPDATE / 2) +/* samples of look ahead (in a half-band, so actually + * half the samples of look ahead @ FS) */ +#define QLOOKAHEAD 24 /* 3 ms */ +/* order of AR model in spectral entropy coder */ +#define AR_ORDER 6 +/* order of LP model in spectral entropy coder */ +#define LP_ORDER 0 + +/* window length (masking analysis) */ +#define WINLEN 256 +/* order of low-band pole filter used to approximate masking curve */ +#define ORDERLO 12 +/* order of hi-band pole filter used to approximate masking curve */ +#define ORDERHI 6 + +#define UB_LPC_ORDER 4 +#define UB_LPC_VEC_PER_FRAME 2 +#define UB16_LPC_VEC_PER_FRAME 4 +#define UB_ACTIVE_SUBFRAMES 2 +#define UB_MAX_LPC_ORDER 6 +#define UB_INTERPOL_SEGMENTS 1 +#define UB16_INTERPOL_SEGMENTS 3 +#define LB_TOTAL_DELAY_SAMPLES 48 +enum ISACBandwidth { isac8kHz = 8, isac12kHz = 12, isac16kHz = 16 }; +enum ISACBand { + kIsacLowerBand = 0, + kIsacUpperBand12 = 1, + kIsacUpperBand16 = 2 +}; +enum IsacSamplingRate { kIsacWideband = 16, kIsacSuperWideband = 32 }; +#define UB_LPC_GAIN_DIM SUBFRAMES +#define FB_STATE_SIZE_WORD32 6 + +/* order for post_filter_bank */ +#define POSTQORDER 3 +/* order for pre-filterbank */ +#define QORDER 3 +/* another order */ +#define QORDER_ALL (POSTQORDER + QORDER - 1) +/* for decimator */ +#define ALLPASSSECTIONS 2 + +/* array size for byte stream in number of bytes. */ +/* The old maximum size still needed for the decoding */ +#define STREAM_SIZE_MAX 600 +#define STREAM_SIZE_MAX_30 200 /* 200 bytes=53.4 kbps @ 30 ms.framelength */ +#define STREAM_SIZE_MAX_60 400 /* 400 bytes=53.4 kbps @ 60 ms.framelength */ + +/* storage size for bit counts */ +#define BIT_COUNTER_SIZE 30 +/* maximum order of any AR model or filter */ +#define MAX_AR_MODEL_ORDER 12 // 50 + +/* For pitch analysis */ +#define PITCH_FRAME_LEN (FRAMESAMPLES_HALF) /* 30 ms */ +#define PITCH_MAX_LAG 140 /* 57 Hz */ +#define PITCH_MIN_LAG 20 /* 400 Hz */ +#define PITCH_MAX_GAIN 0.45 +#define PITCH_MAX_GAIN_06 0.27 /* PITCH_MAX_GAIN*0.6 */ +#define PITCH_MAX_GAIN_Q12 1843 +#define PITCH_LAG_SPAN2 (PITCH_MAX_LAG / 2 - PITCH_MIN_LAG / 2 + 5) +#define PITCH_CORR_LEN2 60 /* 15 ms */ +#define PITCH_CORR_STEP2 (PITCH_FRAME_LEN / 4) +#define PITCH_BW 11 /* half the band width of correlation surface */ +#define PITCH_SUBFRAMES 4 +#define PITCH_GRAN_PER_SUBFRAME 5 +#define PITCH_SUBFRAME_LEN (PITCH_FRAME_LEN / PITCH_SUBFRAMES) +#define PITCH_UPDATE (PITCH_SUBFRAME_LEN / PITCH_GRAN_PER_SUBFRAME) +/* maximum number of peaks to be examined in correlation surface */ +#define PITCH_MAX_NUM_PEAKS 10 +#define PITCH_PEAK_DECAY 0.85 +/* For weighting filter */ +#define PITCH_WLPCORDER 6 +#define PITCH_WLPCWINLEN PITCH_FRAME_LEN +#define PITCH_WLPCASYM 0.3 /* asymmetry parameter */ +#define PITCH_WLPCBUFLEN PITCH_WLPCWINLEN +/* For pitch filter */ +/* Extra 50 for fraction and LP filters */ +#define PITCH_BUFFSIZE (PITCH_MAX_LAG + 50) +#define PITCH_INTBUFFSIZE (PITCH_FRAME_LEN + PITCH_BUFFSIZE) +/* Max rel. step for interpolation */ +#define PITCH_UPSTEP 1.5 +/* Max rel. step for interpolation */ +#define PITCH_DOWNSTEP 0.67 +#define PITCH_FRACS 8 +#define PITCH_FRACORDER 9 +#define PITCH_DAMPORDER 5 +#define PITCH_FILTDELAY 1.5f +/* stepsize for quantization of the pitch Gain */ +#define PITCH_GAIN_STEPSIZE 0.125 + +/* Order of high pass filter */ +#define HPORDER 2 + +/* some mathematical constants */ +/* log2(exp) */ +#define LOG2EXP 1.44269504088896 +#define PI 3.14159265358979 + +/* Maximum number of iterations allowed to limit payload size */ +#define MAX_PAYLOAD_LIMIT_ITERATION 5 + +/* Redundant Coding */ +#define RCU_BOTTLENECK_BPS 16000 +#define RCU_TRANSCODING_SCALE 0.40f +#define RCU_TRANSCODING_SCALE_INVERSE 2.5f + +#define RCU_TRANSCODING_SCALE_UB 0.50f +#define RCU_TRANSCODING_SCALE_UB_INVERSE 2.0f + +/* Define Error codes */ +/* 6000 General */ +#define ISAC_MEMORY_ALLOCATION_FAILED 6010 +#define ISAC_MODE_MISMATCH 6020 +#define ISAC_DISALLOWED_BOTTLENECK 6030 +#define ISAC_DISALLOWED_FRAME_LENGTH 6040 +#define ISAC_UNSUPPORTED_SAMPLING_FREQUENCY 6050 + +/* 6200 Bandwidth estimator */ +#define ISAC_RANGE_ERROR_BW_ESTIMATOR 6240 +/* 6400 Encoder */ +#define ISAC_ENCODER_NOT_INITIATED 6410 +#define ISAC_DISALLOWED_CODING_MODE 6420 +#define ISAC_DISALLOWED_FRAME_MODE_ENCODER 6430 +#define ISAC_DISALLOWED_BITSTREAM_LENGTH 6440 +#define ISAC_PAYLOAD_LARGER_THAN_LIMIT 6450 +#define ISAC_DISALLOWED_ENCODER_BANDWIDTH 6460 +/* 6600 Decoder */ +#define ISAC_DECODER_NOT_INITIATED 6610 +#define ISAC_EMPTY_PACKET 6620 +#define ISAC_DISALLOWED_FRAME_MODE_DECODER 6630 +#define ISAC_RANGE_ERROR_DECODE_FRAME_LENGTH 6640 +#define ISAC_RANGE_ERROR_DECODE_BANDWIDTH 6650 +#define ISAC_RANGE_ERROR_DECODE_PITCH_GAIN 6660 +#define ISAC_RANGE_ERROR_DECODE_PITCH_LAG 6670 +#define ISAC_RANGE_ERROR_DECODE_LPC 6680 +#define ISAC_RANGE_ERROR_DECODE_SPECTRUM 6690 +#define ISAC_LENGTH_MISMATCH 6730 +#define ISAC_RANGE_ERROR_DECODE_BANDWITH 6740 +#define ISAC_DISALLOWED_BANDWIDTH_MODE_DECODER 6750 +#define ISAC_DISALLOWED_LPC_MODEL 6760 +/* 6800 Call setup formats */ +#define ISAC_INCOMPATIBLE_FORMATS 6810 + +#endif /* MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SETTINGS_H_ */ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/structs.h b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/structs.h new file mode 100644 index 0000000000..6861ca42bd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/isac/main/source/structs.h @@ -0,0 +1,448 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * structs.h + * + * This header file contains all the structs used in the ISAC codec + * + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_STRUCTS_H_ +#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_STRUCTS_H_ + +#include "modules/audio_coding/codecs/isac/bandwidth_info.h" +#include "modules/audio_coding/codecs/isac/main/source/settings.h" +#include "modules/third_party/fft/fft.h" + +typedef struct Bitstreamstruct { + uint8_t stream[STREAM_SIZE_MAX]; + uint32_t W_upper; + uint32_t streamval; + uint32_t stream_index; + +} Bitstr; + +typedef struct { + double DataBufferLo[WINLEN]; + double DataBufferHi[WINLEN]; + + double CorrBufLo[ORDERLO + 1]; + double CorrBufHi[ORDERHI + 1]; + + float PreStateLoF[ORDERLO + 1]; + float PreStateLoG[ORDERLO + 1]; + float PreStateHiF[ORDERHI + 1]; + float PreStateHiG[ORDERHI + 1]; + float PostStateLoF[ORDERLO + 1]; + float PostStateLoG[ORDERLO + 1]; + float PostStateHiF[ORDERHI + 1]; + float PostStateHiG[ORDERHI + 1]; + + double OldEnergy; + +} MaskFiltstr; + +typedef struct { + // state vectors for each of the two analysis filters + double INSTAT1[2 * (QORDER - 1)]; + double INSTAT2[2 * (QORDER - 1)]; + double INSTATLA1[2 * (QORDER - 1)]; + double INSTATLA2[2 * (QORDER - 1)]; + double INLABUF1[QLOOKAHEAD]; + double INLABUF2[QLOOKAHEAD]; + + float INSTAT1_float[2 * (QORDER - 1)]; + float INSTAT2_float[2 * (QORDER - 1)]; + float INSTATLA1_float[2 * (QORDER - 1)]; + float INSTATLA2_float[2 * (QORDER - 1)]; + float INLABUF1_float[QLOOKAHEAD]; + float INLABUF2_float[QLOOKAHEAD]; + + /* High pass filter */ + double HPstates[HPORDER]; + float HPstates_float[HPORDER]; + +} PreFiltBankstr; + +typedef struct { + // state vectors for each of the two analysis filters + double STATE_0_LOWER[2 * POSTQORDER]; + double STATE_0_UPPER[2 * POSTQORDER]; + + /* High pass filter */ + double HPstates1[HPORDER]; + double HPstates2[HPORDER]; + + float STATE_0_LOWER_float[2 * POSTQORDER]; + float STATE_0_UPPER_float[2 * POSTQORDER]; + + float HPstates1_float[HPORDER]; + float HPstates2_float[HPORDER]; + +} PostFiltBankstr; + +typedef struct { + // data buffer for pitch filter + double ubuf[PITCH_BUFFSIZE]; + + // low pass state vector + double ystate[PITCH_DAMPORDER]; + + // old lag and gain + double oldlagp[1]; + double oldgainp[1]; + +} PitchFiltstr; + +typedef struct { + // data buffer + double buffer[PITCH_WLPCBUFLEN]; + + // state vectors + double istate[PITCH_WLPCORDER]; + double weostate[PITCH_WLPCORDER]; + double whostate[PITCH_WLPCORDER]; + + // LPC window -> should be a global array because constant + double window[PITCH_WLPCWINLEN]; + +} WeightFiltstr; + +typedef struct { + // for inital estimator + double dec_buffer[PITCH_CORR_LEN2 + PITCH_CORR_STEP2 + PITCH_MAX_LAG / 2 - + PITCH_FRAME_LEN / 2 + 2]; + double decimator_state[2 * ALLPASSSECTIONS + 1]; + double hp_state[2]; + + double whitened_buf[QLOOKAHEAD]; + + double inbuf[QLOOKAHEAD]; + + PitchFiltstr PFstr_wght; + PitchFiltstr PFstr; + WeightFiltstr Wghtstr; + +} PitchAnalysisStruct; + +/* Have instance of struct together with other iSAC structs */ +typedef struct { + /* Previous frame length (in ms) */ + int32_t prev_frame_length; + + /* Previous RTP timestamp from received + packet (in samples relative beginning) */ + int32_t prev_rec_rtp_number; + + /* Send timestamp for previous packet (in ms using timeGetTime()) */ + uint32_t prev_rec_send_ts; + + /* Arrival time for previous packet (in ms using timeGetTime()) */ + uint32_t prev_rec_arr_ts; + + /* rate of previous packet, derived from RTP timestamps (in bits/s) */ + float prev_rec_rtp_rate; + + /* Time sinse the last update of the BN estimate (in ms) */ + uint32_t last_update_ts; + + /* Time sinse the last reduction (in ms) */ + uint32_t last_reduction_ts; + + /* How many times the estimate was update in the beginning */ + int32_t count_tot_updates_rec; + + /* The estimated bottle neck rate from there to here (in bits/s) */ + int32_t rec_bw; + float rec_bw_inv; + float rec_bw_avg; + float rec_bw_avg_Q; + + /* The estimated mean absolute jitter value, + as seen on this side (in ms) */ + float rec_jitter; + float rec_jitter_short_term; + float rec_jitter_short_term_abs; + float rec_max_delay; + float rec_max_delay_avg_Q; + + /* (assumed) bitrate for headers (bps) */ + float rec_header_rate; + + /* The estimated bottle neck rate from here to there (in bits/s) */ + float send_bw_avg; + + /* The estimated mean absolute jitter value, as seen on + the other siee (in ms) */ + float send_max_delay_avg; + + // number of packets received since last update + int num_pkts_rec; + + int num_consec_rec_pkts_over_30k; + + // flag for marking that a high speed network has been + // detected downstream + int hsn_detect_rec; + + int num_consec_snt_pkts_over_30k; + + // flag for marking that a high speed network has + // been detected upstream + int hsn_detect_snd; + + uint32_t start_wait_period; + + int in_wait_period; + + int change_to_WB; + + uint32_t senderTimestamp; + uint32_t receiverTimestamp; + // enum IsacSamplingRate incomingStreamSampFreq; + uint16_t numConsecLatePkts; + float consecLatency; + int16_t inWaitLatePkts; + + IsacBandwidthInfo external_bw_info; +} BwEstimatorstr; + +typedef struct { + /* boolean, flags if previous packet exceeded B.N. */ + int PrevExceed; + /* ms */ + int ExceedAgo; + /* packets left to send in current burst */ + int BurstCounter; + /* packets */ + int InitCounter; + /* ms remaining in buffer when next packet will be sent */ + double StillBuffered; + +} RateModel; + +/* The following strutc is used to store data from encoding, to make it + fast and easy to construct a new bitstream with a different Bandwidth + estimate. All values (except framelength and minBytes) is double size to + handle 60 ms of data. +*/ +typedef struct { + /* Used to keep track of if it is first or second part of 60 msec packet */ + int startIdx; + + /* Frame length in samples */ + int16_t framelength; + + /* Pitch Gain */ + int pitchGain_index[2]; + + /* Pitch Lag */ + double meanGain[2]; + int pitchIndex[PITCH_SUBFRAMES * 2]; + + /* LPC */ + int LPCindex_s[108 * 2]; /* KLT_ORDER_SHAPE = 108 */ + int LPCindex_g[12 * 2]; /* KLT_ORDER_GAIN = 12 */ + double LPCcoeffs_lo[(ORDERLO + 1) * SUBFRAMES * 2]; + double LPCcoeffs_hi[(ORDERHI + 1) * SUBFRAMES * 2]; + + /* Encode Spec */ + int16_t fre[FRAMESAMPLES]; + int16_t fim[FRAMESAMPLES]; + int16_t AvgPitchGain[2]; + + /* Used in adaptive mode only */ + int minBytes; + +} IsacSaveEncoderData; + +typedef struct { + int indexLPCShape[UB_LPC_ORDER * UB16_LPC_VEC_PER_FRAME]; + double lpcGain[SUBFRAMES << 1]; + int lpcGainIndex[SUBFRAMES << 1]; + + Bitstr bitStreamObj; + + int16_t realFFT[FRAMESAMPLES_HALF]; + int16_t imagFFT[FRAMESAMPLES_HALF]; +} ISACUBSaveEncDataStruct; + +typedef struct { + Bitstr bitstr_obj; + MaskFiltstr maskfiltstr_obj; + PreFiltBankstr prefiltbankstr_obj; + PitchFiltstr pitchfiltstr_obj; + PitchAnalysisStruct pitchanalysisstr_obj; + FFTstr fftstr_obj; + IsacSaveEncoderData SaveEnc_obj; + + int buffer_index; + int16_t current_framesamples; + + float data_buffer_float[FRAMESAMPLES_30ms]; + + int frame_nb; + double bottleneck; + int16_t new_framelength; + double s2nr; + + /* Maximum allowed number of bits for a 30 msec packet */ + int16_t payloadLimitBytes30; + /* Maximum allowed number of bits for a 30 msec packet */ + int16_t payloadLimitBytes60; + /* Maximum allowed number of bits for both 30 and 60 msec packet */ + int16_t maxPayloadBytes; + /* Maximum allowed rate in bytes per 30 msec packet */ + int16_t maxRateInBytes; + + /*--- + If set to 1 iSAC will not adapt the frame-size, if used in + channel-adaptive mode. The initial value will be used for all rates. + ---*/ + int16_t enforceFrameSize; + + /*----- + This records the BWE index the encoder injected into the bit-stream. + It will be used in RCU. The same BWE index of main payload will be in + the redundant payload. We can not retrieve it from BWE because it is + a recursive procedure (WebRtcIsac_GetDownlinkBwJitIndexImpl) and has to be + called only once per each encode. + -----*/ + int16_t lastBWIdx; +} ISACLBEncStruct; + +typedef struct { + Bitstr bitstr_obj; + MaskFiltstr maskfiltstr_obj; + PreFiltBankstr prefiltbankstr_obj; + FFTstr fftstr_obj; + ISACUBSaveEncDataStruct SaveEnc_obj; + + int buffer_index; + float data_buffer_float[MAX_FRAMESAMPLES + LB_TOTAL_DELAY_SAMPLES]; + double bottleneck; + /* Maximum allowed number of bits for a 30 msec packet */ + // int16_t payloadLimitBytes30; + /* Maximum allowed number of bits for both 30 and 60 msec packet */ + // int16_t maxPayloadBytes; + int16_t maxPayloadSizeBytes; + + double lastLPCVec[UB_LPC_ORDER]; + int16_t numBytesUsed; + int16_t lastJitterInfo; +} ISACUBEncStruct; + +typedef struct { + Bitstr bitstr_obj; + MaskFiltstr maskfiltstr_obj; + PostFiltBankstr postfiltbankstr_obj; + PitchFiltstr pitchfiltstr_obj; + FFTstr fftstr_obj; + +} ISACLBDecStruct; + +typedef struct { + Bitstr bitstr_obj; + MaskFiltstr maskfiltstr_obj; + PostFiltBankstr postfiltbankstr_obj; + FFTstr fftstr_obj; + +} ISACUBDecStruct; + +typedef struct { + ISACLBEncStruct ISACencLB_obj; + ISACLBDecStruct ISACdecLB_obj; +} ISACLBStruct; + +typedef struct { + ISACUBEncStruct ISACencUB_obj; + ISACUBDecStruct ISACdecUB_obj; +} ISACUBStruct; + +/* + This struct is used to take a snapshot of the entropy coder and LPC gains + right before encoding LPC gains. This allows us to go back to that state + if we like to limit the payload size. +*/ +typedef struct { + /* 6 lower-band & 6 upper-band */ + double loFiltGain[SUBFRAMES]; + double hiFiltGain[SUBFRAMES]; + /* Upper boundary of interval W */ + uint32_t W_upper; + uint32_t streamval; + /* Index to the current position in bytestream */ + uint32_t stream_index; + uint8_t stream[3]; +} transcode_obj; + +typedef struct { + // TODO(kwiberg): The size of these tables could be reduced by storing floats + // instead of doubles, and by making use of the identity cos(x) = + // sin(x+pi/2). They could also be made global constants that we fill in at + // compile time. + double costab1[FRAMESAMPLES_HALF]; + double sintab1[FRAMESAMPLES_HALF]; + double costab2[FRAMESAMPLES_QUARTER]; + double sintab2[FRAMESAMPLES_QUARTER]; +} TransformTables; + +typedef struct { + // lower-band codec instance + ISACLBStruct instLB; + // upper-band codec instance + ISACUBStruct instUB; + + // Bandwidth Estimator and model for the rate. + BwEstimatorstr bwestimator_obj; + RateModel rate_data_obj; + double MaxDelay; + + /* 0 = adaptive; 1 = instantaneous */ + int16_t codingMode; + + // overall bottleneck of the codec + int32_t bottleneck; + + // QMF Filter state + int32_t analysisFBState1[FB_STATE_SIZE_WORD32]; + int32_t analysisFBState2[FB_STATE_SIZE_WORD32]; + int32_t synthesisFBState1[FB_STATE_SIZE_WORD32]; + int32_t synthesisFBState2[FB_STATE_SIZE_WORD32]; + + // Error Code + int16_t errorCode; + + // bandwidth of the encoded audio 8, 12 or 16 kHz + enum ISACBandwidth bandwidthKHz; + // Sampling rate of audio, encoder and decode, 8 or 16 kHz + enum IsacSamplingRate encoderSamplingRateKHz; + enum IsacSamplingRate decoderSamplingRateKHz; + // Flag to keep track of initializations, lower & upper-band + // encoder and decoder. + int16_t initFlag; + + // Flag to to indicate signal bandwidth switch + int16_t resetFlag_8kHz; + + // Maximum allowed rate, measured in Bytes per 30 ms. + int16_t maxRateBytesPer30Ms; + // Maximum allowed payload-size, measured in Bytes. + int16_t maxPayloadSizeBytes; + /* The expected sampling rate of the input signal. Valid values are 16000 + * and 32000. This is not the operation sampling rate of the codec. */ + uint16_t in_sample_rate_hz; + + // Trig tables for WebRtcIsac_Time2Spec and WebRtcIsac_Spec2time. + TransformTables transform_tables; +} ISACMainStruct; + +#endif /* MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_STRUCTS_H_ */ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/legacy_encoded_audio_frame.cc b/third_party/libwebrtc/modules/audio_coding/codecs/legacy_encoded_audio_frame.cc new file mode 100644 index 0000000000..dacf325082 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/legacy_encoded_audio_frame.cc @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/legacy_encoded_audio_frame.h" + +#include <algorithm> +#include <memory> +#include <utility> + +#include "rtc_base/checks.h" + +namespace webrtc { + +LegacyEncodedAudioFrame::LegacyEncodedAudioFrame(AudioDecoder* decoder, + rtc::Buffer&& payload) + : decoder_(decoder), payload_(std::move(payload)) {} + +LegacyEncodedAudioFrame::~LegacyEncodedAudioFrame() = default; + +size_t LegacyEncodedAudioFrame::Duration() const { + const int ret = decoder_->PacketDuration(payload_.data(), payload_.size()); + return (ret < 0) ? 0 : static_cast<size_t>(ret); +} + +absl::optional<AudioDecoder::EncodedAudioFrame::DecodeResult> +LegacyEncodedAudioFrame::Decode(rtc::ArrayView<int16_t> decoded) const { + AudioDecoder::SpeechType speech_type = AudioDecoder::kSpeech; + const int ret = decoder_->Decode( + payload_.data(), payload_.size(), decoder_->SampleRateHz(), + decoded.size() * sizeof(int16_t), decoded.data(), &speech_type); + + if (ret < 0) + return absl::nullopt; + + return DecodeResult{static_cast<size_t>(ret), speech_type}; +} + +std::vector<AudioDecoder::ParseResult> LegacyEncodedAudioFrame::SplitBySamples( + AudioDecoder* decoder, + rtc::Buffer&& payload, + uint32_t timestamp, + size_t bytes_per_ms, + uint32_t timestamps_per_ms) { + RTC_DCHECK(payload.data()); + std::vector<AudioDecoder::ParseResult> results; + size_t split_size_bytes = payload.size(); + + // Find a "chunk size" >= 20 ms and < 40 ms. + const size_t min_chunk_size = bytes_per_ms * 20; + if (min_chunk_size >= payload.size()) { + std::unique_ptr<LegacyEncodedAudioFrame> frame( + new LegacyEncodedAudioFrame(decoder, std::move(payload))); + results.emplace_back(timestamp, 0, std::move(frame)); + } else { + // Reduce the split size by half as long as `split_size_bytes` is at least + // twice the minimum chunk size (so that the resulting size is at least as + // large as the minimum chunk size). + while (split_size_bytes >= 2 * min_chunk_size) { + split_size_bytes /= 2; + } + + const uint32_t timestamps_per_chunk = static_cast<uint32_t>( + split_size_bytes * timestamps_per_ms / bytes_per_ms); + size_t byte_offset; + uint32_t timestamp_offset; + for (byte_offset = 0, timestamp_offset = 0; byte_offset < payload.size(); + byte_offset += split_size_bytes, + timestamp_offset += timestamps_per_chunk) { + split_size_bytes = + std::min(split_size_bytes, payload.size() - byte_offset); + rtc::Buffer new_payload(payload.data() + byte_offset, split_size_bytes); + std::unique_ptr<LegacyEncodedAudioFrame> frame( + new LegacyEncodedAudioFrame(decoder, std::move(new_payload))); + results.emplace_back(timestamp + timestamp_offset, 0, std::move(frame)); + } + } + + return results; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/legacy_encoded_audio_frame.h b/third_party/libwebrtc/modules/audio_coding/codecs/legacy_encoded_audio_frame.h new file mode 100644 index 0000000000..21da1367ed --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/legacy_encoded_audio_frame.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_LEGACY_ENCODED_AUDIO_FRAME_H_ +#define MODULES_AUDIO_CODING_CODECS_LEGACY_ENCODED_AUDIO_FRAME_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <vector> + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio_codecs/audio_decoder.h" +#include "rtc_base/buffer.h" + +namespace webrtc { + +class LegacyEncodedAudioFrame final : public AudioDecoder::EncodedAudioFrame { + public: + LegacyEncodedAudioFrame(AudioDecoder* decoder, rtc::Buffer&& payload); + ~LegacyEncodedAudioFrame() override; + + static std::vector<AudioDecoder::ParseResult> SplitBySamples( + AudioDecoder* decoder, + rtc::Buffer&& payload, + uint32_t timestamp, + size_t bytes_per_ms, + uint32_t timestamps_per_ms); + + size_t Duration() const override; + + absl::optional<DecodeResult> Decode( + rtc::ArrayView<int16_t> decoded) const override; + + // For testing: + const rtc::Buffer& payload() const { return payload_; } + + private: + AudioDecoder* const decoder_; + const rtc::Buffer payload_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_LEGACY_ENCODED_AUDIO_FRAME_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/legacy_encoded_audio_frame_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/legacy_encoded_audio_frame_unittest.cc new file mode 100644 index 0000000000..f81aeeea80 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/legacy_encoded_audio_frame_unittest.cc @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/legacy_encoded_audio_frame.h" + +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gtest.h" + +namespace webrtc { + +enum class NetEqDecoder { + kDecoderPCMu, + kDecoderPCMa, + kDecoderPCMu_2ch, + kDecoderPCMa_2ch, + kDecoderPCM16B, + kDecoderPCM16Bwb, + kDecoderPCM16Bswb32kHz, + kDecoderPCM16Bswb48kHz, + kDecoderPCM16B_2ch, + kDecoderPCM16Bwb_2ch, + kDecoderPCM16Bswb32kHz_2ch, + kDecoderPCM16Bswb48kHz_2ch, + kDecoderPCM16B_5ch, + kDecoderG722, +}; + +class SplitBySamplesTest : public ::testing::TestWithParam<NetEqDecoder> { + protected: + virtual void SetUp() { + decoder_type_ = GetParam(); + switch (decoder_type_) { + case NetEqDecoder::kDecoderPCMu: + case NetEqDecoder::kDecoderPCMa: + bytes_per_ms_ = 8; + samples_per_ms_ = 8; + break; + case NetEqDecoder::kDecoderPCMu_2ch: + case NetEqDecoder::kDecoderPCMa_2ch: + bytes_per_ms_ = 2 * 8; + samples_per_ms_ = 8; + break; + case NetEqDecoder::kDecoderG722: + bytes_per_ms_ = 8; + samples_per_ms_ = 16; + break; + case NetEqDecoder::kDecoderPCM16B: + bytes_per_ms_ = 16; + samples_per_ms_ = 8; + break; + case NetEqDecoder::kDecoderPCM16Bwb: + bytes_per_ms_ = 32; + samples_per_ms_ = 16; + break; + case NetEqDecoder::kDecoderPCM16Bswb32kHz: + bytes_per_ms_ = 64; + samples_per_ms_ = 32; + break; + case NetEqDecoder::kDecoderPCM16Bswb48kHz: + bytes_per_ms_ = 96; + samples_per_ms_ = 48; + break; + case NetEqDecoder::kDecoderPCM16B_2ch: + bytes_per_ms_ = 2 * 16; + samples_per_ms_ = 8; + break; + case NetEqDecoder::kDecoderPCM16Bwb_2ch: + bytes_per_ms_ = 2 * 32; + samples_per_ms_ = 16; + break; + case NetEqDecoder::kDecoderPCM16Bswb32kHz_2ch: + bytes_per_ms_ = 2 * 64; + samples_per_ms_ = 32; + break; + case NetEqDecoder::kDecoderPCM16Bswb48kHz_2ch: + bytes_per_ms_ = 2 * 96; + samples_per_ms_ = 48; + break; + case NetEqDecoder::kDecoderPCM16B_5ch: + bytes_per_ms_ = 5 * 16; + samples_per_ms_ = 8; + break; + default: + RTC_DCHECK_NOTREACHED(); + break; + } + } + size_t bytes_per_ms_; + int samples_per_ms_; + NetEqDecoder decoder_type_; +}; + +// Test splitting sample-based payloads. +TEST_P(SplitBySamplesTest, PayloadSizes) { + constexpr uint32_t kBaseTimestamp = 0x12345678; + struct ExpectedSplit { + size_t payload_size_ms; + size_t num_frames; + // For simplicity. We only expect up to two packets per split. + size_t frame_sizes[2]; + }; + // The payloads are expected to be split as follows: + // 10 ms -> 10 ms + // 20 ms -> 20 ms + // 30 ms -> 30 ms + // 40 ms -> 20 + 20 ms + // 50 ms -> 25 + 25 ms + // 60 ms -> 30 + 30 ms + ExpectedSplit expected_splits[] = {{10, 1, {10}}, {20, 1, {20}}, + {30, 1, {30}}, {40, 2, {20, 20}}, + {50, 2, {25, 25}}, {60, 2, {30, 30}}}; + + for (const auto& expected_split : expected_splits) { + // The payload values are set to steadily increase (modulo 256), so that the + // resulting frames can be checked and we can be reasonably certain no + // sample was missed or repeated. + const auto generate_payload = [](size_t num_bytes) { + rtc::Buffer payload(num_bytes); + uint8_t value = 0; + // Allow wrap-around of value in counter below. + for (size_t i = 0; i != payload.size(); ++i, ++value) { + payload[i] = value; + } + return payload; + }; + + const auto results = LegacyEncodedAudioFrame::SplitBySamples( + nullptr, + generate_payload(expected_split.payload_size_ms * bytes_per_ms_), + kBaseTimestamp, bytes_per_ms_, samples_per_ms_); + + EXPECT_EQ(expected_split.num_frames, results.size()); + uint32_t expected_timestamp = kBaseTimestamp; + uint8_t value = 0; + for (size_t i = 0; i != expected_split.num_frames; ++i) { + const auto& result = results[i]; + const LegacyEncodedAudioFrame* frame = + static_cast<const LegacyEncodedAudioFrame*>(result.frame.get()); + const size_t length_bytes = expected_split.frame_sizes[i] * bytes_per_ms_; + EXPECT_EQ(length_bytes, frame->payload().size()); + EXPECT_EQ(expected_timestamp, result.timestamp); + const rtc::Buffer& payload = frame->payload(); + // Allow wrap-around of value in counter below. + for (size_t i = 0; i != payload.size(); ++i, ++value) { + ASSERT_EQ(value, payload[i]); + } + + expected_timestamp += rtc::checked_cast<uint32_t>( + expected_split.frame_sizes[i] * samples_per_ms_); + } + } +} + +INSTANTIATE_TEST_SUITE_P( + LegacyEncodedAudioFrame, + SplitBySamplesTest, + ::testing::Values(NetEqDecoder::kDecoderPCMu, + NetEqDecoder::kDecoderPCMa, + NetEqDecoder::kDecoderPCMu_2ch, + NetEqDecoder::kDecoderPCMa_2ch, + NetEqDecoder::kDecoderG722, + NetEqDecoder::kDecoderPCM16B, + NetEqDecoder::kDecoderPCM16Bwb, + NetEqDecoder::kDecoderPCM16Bswb32kHz, + NetEqDecoder::kDecoderPCM16Bswb48kHz, + NetEqDecoder::kDecoderPCM16B_2ch, + NetEqDecoder::kDecoderPCM16Bwb_2ch, + NetEqDecoder::kDecoderPCM16Bswb32kHz_2ch, + NetEqDecoder::kDecoderPCM16Bswb48kHz_2ch, + NetEqDecoder::kDecoderPCM16B_5ch)); + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/DEPS b/third_party/libwebrtc/modules/audio_coding/codecs/opus/DEPS new file mode 100644 index 0000000000..c2530726ad --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/DEPS @@ -0,0 +1,5 @@ +specific_include_rules = { + "opus_inst\.h": [ + "+third_party/opus", + ], +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_coder_opus_common.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_coder_opus_common.cc new file mode 100644 index 0000000000..03c02186d0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_coder_opus_common.cc @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h" + +#include "absl/strings/string_view.h" + +namespace webrtc { + +absl::optional<std::string> GetFormatParameter(const SdpAudioFormat& format, + absl::string_view param) { + auto it = format.parameters.find(std::string(param)); + if (it == format.parameters.end()) + return absl::nullopt; + + return it->second; +} + +// Parses a comma-separated string "1,2,0,6" into a std::vector<unsigned char>. +template <> +absl::optional<std::vector<unsigned char>> GetFormatParameter( + const SdpAudioFormat& format, + absl::string_view param) { + std::vector<unsigned char> result; + const std::string comma_separated_list = + GetFormatParameter(format, param).value_or(""); + size_t pos = 0; + while (pos < comma_separated_list.size()) { + const size_t next_comma = comma_separated_list.find(',', pos); + const size_t distance_to_next_comma = next_comma == std::string::npos + ? std::string::npos + : (next_comma - pos); + auto substring_with_number = + comma_separated_list.substr(pos, distance_to_next_comma); + auto conv = rtc::StringToNumber<int>(substring_with_number); + if (!conv.has_value()) { + return absl::nullopt; + } + result.push_back(*conv); + pos += substring_with_number.size() + 1; + } + return result; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_coder_opus_common.h b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_coder_opus_common.h new file mode 100644 index 0000000000..5ebb51b577 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_coder_opus_common.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_CODER_OPUS_COMMON_H_ +#define MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_CODER_OPUS_COMMON_H_ + +#include <string> +#include <utility> +#include <vector> + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "api/audio_codecs/audio_decoder.h" +#include "api/audio_codecs/audio_format.h" +#include "rtc_base/string_to_number.h" + +namespace webrtc { + +absl::optional<std::string> GetFormatParameter(const SdpAudioFormat& format, + absl::string_view param); + +template <typename T> +absl::optional<T> GetFormatParameter(const SdpAudioFormat& format, + absl::string_view param) { + return rtc::StringToNumber<T>(GetFormatParameter(format, param).value_or("")); +} + +template <> +absl::optional<std::vector<unsigned char>> GetFormatParameter( + const SdpAudioFormat& format, + absl::string_view param); + +class OpusFrame : public AudioDecoder::EncodedAudioFrame { + public: + OpusFrame(AudioDecoder* decoder, + rtc::Buffer&& payload, + bool is_primary_payload) + : decoder_(decoder), + payload_(std::move(payload)), + is_primary_payload_(is_primary_payload) {} + + size_t Duration() const override { + int ret; + if (is_primary_payload_) { + ret = decoder_->PacketDuration(payload_.data(), payload_.size()); + } else { + ret = decoder_->PacketDurationRedundant(payload_.data(), payload_.size()); + } + return (ret < 0) ? 0 : static_cast<size_t>(ret); + } + + bool IsDtxPacket() const override { return payload_.size() <= 2; } + + absl::optional<DecodeResult> Decode( + rtc::ArrayView<int16_t> decoded) const override { + AudioDecoder::SpeechType speech_type = AudioDecoder::kSpeech; + int ret; + if (is_primary_payload_) { + ret = decoder_->Decode( + payload_.data(), payload_.size(), decoder_->SampleRateHz(), + decoded.size() * sizeof(int16_t), decoded.data(), &speech_type); + } else { + ret = decoder_->DecodeRedundant( + payload_.data(), payload_.size(), decoder_->SampleRateHz(), + decoded.size() * sizeof(int16_t), decoded.data(), &speech_type); + } + + if (ret < 0) + return absl::nullopt; + + return DecodeResult{static_cast<size_t>(ret), speech_type}; + } + + private: + AudioDecoder* const decoder_; + const rtc::Buffer payload_; + const bool is_primary_payload_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_CODER_OPUS_COMMON_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_impl.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_impl.cc new file mode 100644 index 0000000000..285ea89959 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_impl.cc @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_impl.h" + +#include <algorithm> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +#include "absl/memory/memory.h" +#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h" +#include "rtc_base/string_to_number.h" + +namespace webrtc { + +std::unique_ptr<AudioDecoderMultiChannelOpusImpl> +AudioDecoderMultiChannelOpusImpl::MakeAudioDecoder( + AudioDecoderMultiChannelOpusConfig config) { + if (!config.IsOk()) { + RTC_DCHECK_NOTREACHED(); + return nullptr; + } + // Fill the pointer with a working decoder through the C interface. This + // allocates memory. + OpusDecInst* dec_state = nullptr; + const int error = WebRtcOpus_MultistreamDecoderCreate( + &dec_state, config.num_channels, config.num_streams, + config.coupled_streams, config.channel_mapping.data()); + if (error != 0) { + return nullptr; + } + + // Pass the ownership to DecoderImpl. Not using 'make_unique' because the + // c-tor is private. + return std::unique_ptr<AudioDecoderMultiChannelOpusImpl>( + new AudioDecoderMultiChannelOpusImpl(dec_state, config)); +} + +AudioDecoderMultiChannelOpusImpl::AudioDecoderMultiChannelOpusImpl( + OpusDecInst* dec_state, + AudioDecoderMultiChannelOpusConfig config) + : dec_state_(dec_state), config_(config) { + RTC_DCHECK(dec_state); + WebRtcOpus_DecoderInit(dec_state_); +} + +AudioDecoderMultiChannelOpusImpl::~AudioDecoderMultiChannelOpusImpl() { + WebRtcOpus_DecoderFree(dec_state_); +} + +absl::optional<AudioDecoderMultiChannelOpusConfig> +AudioDecoderMultiChannelOpusImpl::SdpToConfig(const SdpAudioFormat& format) { + AudioDecoderMultiChannelOpusConfig config; + config.num_channels = format.num_channels; + auto num_streams = GetFormatParameter<int>(format, "num_streams"); + if (!num_streams.has_value()) { + return absl::nullopt; + } + config.num_streams = *num_streams; + + auto coupled_streams = GetFormatParameter<int>(format, "coupled_streams"); + if (!coupled_streams.has_value()) { + return absl::nullopt; + } + config.coupled_streams = *coupled_streams; + + auto channel_mapping = + GetFormatParameter<std::vector<unsigned char>>(format, "channel_mapping"); + if (!channel_mapping.has_value()) { + return absl::nullopt; + } + config.channel_mapping = *channel_mapping; + if (!config.IsOk()) { + return absl::nullopt; + } + return config; +} + +std::vector<AudioDecoder::ParseResult> +AudioDecoderMultiChannelOpusImpl::ParsePayload(rtc::Buffer&& payload, + uint32_t timestamp) { + std::vector<ParseResult> results; + + if (PacketHasFec(payload.data(), payload.size())) { + const int duration = + PacketDurationRedundant(payload.data(), payload.size()); + RTC_DCHECK_GE(duration, 0); + rtc::Buffer payload_copy(payload.data(), payload.size()); + std::unique_ptr<EncodedAudioFrame> fec_frame( + new OpusFrame(this, std::move(payload_copy), false)); + results.emplace_back(timestamp - duration, 1, std::move(fec_frame)); + } + std::unique_ptr<EncodedAudioFrame> frame( + new OpusFrame(this, std::move(payload), true)); + results.emplace_back(timestamp, 0, std::move(frame)); + return results; +} + +int AudioDecoderMultiChannelOpusImpl::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { + RTC_DCHECK_EQ(sample_rate_hz, 48000); + int16_t temp_type = 1; // Default is speech. + int ret = + WebRtcOpus_Decode(dec_state_, encoded, encoded_len, decoded, &temp_type); + if (ret > 0) + ret *= static_cast<int>( + config_.num_channels); // Return total number of samples. + *speech_type = ConvertSpeechType(temp_type); + return ret; +} + +int AudioDecoderMultiChannelOpusImpl::DecodeRedundantInternal( + const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { + if (!PacketHasFec(encoded, encoded_len)) { + // This packet is a RED packet. + return DecodeInternal(encoded, encoded_len, sample_rate_hz, decoded, + speech_type); + } + + RTC_DCHECK_EQ(sample_rate_hz, 48000); + int16_t temp_type = 1; // Default is speech. + int ret = WebRtcOpus_DecodeFec(dec_state_, encoded, encoded_len, decoded, + &temp_type); + if (ret > 0) + ret *= static_cast<int>( + config_.num_channels); // Return total number of samples. + *speech_type = ConvertSpeechType(temp_type); + return ret; +} + +void AudioDecoderMultiChannelOpusImpl::Reset() { + WebRtcOpus_DecoderInit(dec_state_); +} + +int AudioDecoderMultiChannelOpusImpl::PacketDuration(const uint8_t* encoded, + size_t encoded_len) const { + return WebRtcOpus_DurationEst(dec_state_, encoded, encoded_len); +} + +int AudioDecoderMultiChannelOpusImpl::PacketDurationRedundant( + const uint8_t* encoded, + size_t encoded_len) const { + if (!PacketHasFec(encoded, encoded_len)) { + // This packet is a RED packet. + return PacketDuration(encoded, encoded_len); + } + + return WebRtcOpus_FecDurationEst(encoded, encoded_len, 48000); +} + +bool AudioDecoderMultiChannelOpusImpl::PacketHasFec(const uint8_t* encoded, + size_t encoded_len) const { + int fec; + fec = WebRtcOpus_PacketHasFec(encoded, encoded_len); + return (fec == 1); +} + +int AudioDecoderMultiChannelOpusImpl::SampleRateHz() const { + return 48000; +} + +size_t AudioDecoderMultiChannelOpusImpl::Channels() const { + return config_.num_channels; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_impl.h b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_impl.h new file mode 100644 index 0000000000..2ff47a8a53 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_impl.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_MULTI_CHANNEL_OPUS_IMPL_H_ +#define MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_MULTI_CHANNEL_OPUS_IMPL_H_ + +#include <stddef.h> + +#include <memory> +#include <vector> + +#include "api/audio_codecs/audio_decoder.h" +#include "api/audio_codecs/audio_format.h" +#include "api/audio_codecs/opus/audio_decoder_multi_channel_opus_config.h" +#include "modules/audio_coding/codecs/opus/opus_interface.h" +#include "rtc_base/buffer.h" + +namespace webrtc { + +class AudioDecoderMultiChannelOpusImpl final : public AudioDecoder { + public: + static std::unique_ptr<AudioDecoderMultiChannelOpusImpl> MakeAudioDecoder( + AudioDecoderMultiChannelOpusConfig config); + + ~AudioDecoderMultiChannelOpusImpl() override; + + AudioDecoderMultiChannelOpusImpl(const AudioDecoderMultiChannelOpusImpl&) = + delete; + AudioDecoderMultiChannelOpusImpl& operator=( + const AudioDecoderMultiChannelOpusImpl&) = delete; + + std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload, + uint32_t timestamp) override; + void Reset() override; + int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override; + int PacketDurationRedundant(const uint8_t* encoded, + size_t encoded_len) const override; + bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const override; + int SampleRateHz() const override; + size_t Channels() const override; + + static absl::optional<AudioDecoderMultiChannelOpusConfig> SdpToConfig( + const SdpAudioFormat& format); + + protected: + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + int DecodeRedundantInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + + private: + AudioDecoderMultiChannelOpusImpl(OpusDecInst* dec_state, + AudioDecoderMultiChannelOpusConfig config); + + OpusDecInst* dec_state_; + const AudioDecoderMultiChannelOpusConfig config_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_MULTI_CHANNEL_OPUS_IMPL_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_unittest.cc new file mode 100644 index 0000000000..57e2107f3c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_unittest.cc @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "api/audio_codecs/opus/audio_decoder_multi_channel_opus.h" + +#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +using ::testing::NiceMock; +using ::testing::Return; + +TEST(AudioDecoderMultiOpusTest, GetFormatParameter) { + const SdpAudioFormat sdp_format("multiopus", 48000, 4, + {{"channel_mapping", "0,1,2,3"}, + {"coupled_streams", "2"}, + {"num_streams", "2"}}); + + EXPECT_EQ(GetFormatParameter(sdp_format, "channel_mapping"), + absl::optional<std::string>("0,1,2,3")); + + EXPECT_EQ(GetFormatParameter<int>(sdp_format, "coupled_streams"), + absl::optional<int>(2)); + + EXPECT_EQ(GetFormatParameter(sdp_format, "missing"), absl::nullopt); + + EXPECT_EQ(GetFormatParameter<int>(sdp_format, "channel_mapping"), + absl::nullopt); +} + +TEST(AudioDecoderMultiOpusTest, InvalidChannelMappings) { + { + // Can't use channel 3 if there are only 2 channels. + const SdpAudioFormat sdp_format("multiopus", 48000, 2, + {{"channel_mapping", "3,0"}, + {"coupled_streams", "1"}, + {"num_streams", "2"}}); + const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config = + AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format); + EXPECT_FALSE(decoder_config.has_value()); + } + { + // The mapping is too long. There are only 5 channels, but 6 elements in the + // mapping. + const SdpAudioFormat sdp_format("multiopus", 48000, 5, + {{"channel_mapping", "0,1,2,3,4,5"}, + {"coupled_streams", "0"}, + {"num_streams", "2"}}); + const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config = + AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format); + EXPECT_FALSE(decoder_config.has_value()); + } + { + // The mapping doesn't parse correctly. + const SdpAudioFormat sdp_format( + "multiopus", 48000, 5, + {{"channel_mapping", "0,1,two,3,4"}, {"coupled_streams", "0"}}); + const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config = + AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format); + EXPECT_FALSE(decoder_config.has_value()); + } +} + +TEST(AudioDecoderMultiOpusTest, ValidSdpToConfigProducesCorrectConfig) { + const SdpAudioFormat sdp_format("multiopus", 48000, 4, + {{"channel_mapping", "3,1,2,0"}, + {"coupled_streams", "2"}, + {"num_streams", "2"}}); + + const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config = + AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format); + + ASSERT_TRUE(decoder_config.has_value()); + EXPECT_TRUE(decoder_config->IsOk()); + EXPECT_EQ(decoder_config->coupled_streams, 2); + EXPECT_THAT(decoder_config->channel_mapping, + ::testing::ContainerEq(std::vector<unsigned char>({3, 1, 2, 0}))); +} + +TEST(AudioDecoderMultiOpusTest, InvalidSdpToConfigDoesNotProduceConfig) { + { + const SdpAudioFormat sdp_format("multiopus", 48000, 4, + {{"channel_mapping", "0,1,2,3"}, + {"coupled_stream", "2"}, + {"num_streams", "2"}}); + + const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config = + AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format); + + EXPECT_FALSE(decoder_config.has_value()); + } + + { + const SdpAudioFormat sdp_format("multiopus", 48000, 4, + {{"channel_mapping", "0,1,2 3"}, + {"coupled_streams", "2"}, + {"num_streams", "2"}}); + + const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config = + AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format); + + EXPECT_FALSE(decoder_config.has_value()); + } +} + +TEST(AudioDecoderMultiOpusTest, CodecsCanBeCreated) { + const SdpAudioFormat sdp_format("multiopus", 48000, 4, + {{"channel_mapping", "0,1,2,3"}, + {"coupled_streams", "2"}, + {"num_streams", "2"}}); + + const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config = + AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format); + + ASSERT_TRUE(decoder_config.has_value()); + + const std::unique_ptr<AudioDecoder> opus_decoder = + AudioDecoderMultiChannelOpus::MakeAudioDecoder(*decoder_config); + + EXPECT_TRUE(opus_decoder); +} + +TEST(AudioDecoderMultiOpusTest, AdvertisedCodecsCanBeCreated) { + std::vector<AudioCodecSpec> specs; + AudioDecoderMultiChannelOpus::AppendSupportedDecoders(&specs); + + EXPECT_FALSE(specs.empty()); + + for (const AudioCodecSpec& spec : specs) { + const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config = + AudioDecoderMultiChannelOpus::SdpToConfig(spec.format); + ASSERT_TRUE(decoder_config.has_value()); + + const std::unique_ptr<AudioDecoder> opus_decoder = + AudioDecoderMultiChannelOpus::MakeAudioDecoder(*decoder_config); + + EXPECT_TRUE(opus_decoder); + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.cc new file mode 100644 index 0000000000..cff9685548 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.cc @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/audio_decoder_opus.h" + +#include <memory> +#include <utility> + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +AudioDecoderOpusImpl::AudioDecoderOpusImpl(size_t num_channels, + int sample_rate_hz) + : channels_{num_channels}, sample_rate_hz_{sample_rate_hz} { + RTC_DCHECK(num_channels == 1 || num_channels == 2); + RTC_DCHECK(sample_rate_hz == 16000 || sample_rate_hz == 48000); + const int error = + WebRtcOpus_DecoderCreate(&dec_state_, channels_, sample_rate_hz_); + RTC_DCHECK(error == 0); + WebRtcOpus_DecoderInit(dec_state_); +} + +AudioDecoderOpusImpl::~AudioDecoderOpusImpl() { + WebRtcOpus_DecoderFree(dec_state_); +} + +std::vector<AudioDecoder::ParseResult> AudioDecoderOpusImpl::ParsePayload( + rtc::Buffer&& payload, + uint32_t timestamp) { + std::vector<ParseResult> results; + + if (PacketHasFec(payload.data(), payload.size())) { + const int duration = + PacketDurationRedundant(payload.data(), payload.size()); + RTC_DCHECK_GE(duration, 0); + rtc::Buffer payload_copy(payload.data(), payload.size()); + std::unique_ptr<EncodedAudioFrame> fec_frame( + new OpusFrame(this, std::move(payload_copy), false)); + results.emplace_back(timestamp - duration, 1, std::move(fec_frame)); + } + std::unique_ptr<EncodedAudioFrame> frame( + new OpusFrame(this, std::move(payload), true)); + results.emplace_back(timestamp, 0, std::move(frame)); + return results; +} + +int AudioDecoderOpusImpl::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { + RTC_DCHECK_EQ(sample_rate_hz, sample_rate_hz_); + int16_t temp_type = 1; // Default is speech. + int ret = + WebRtcOpus_Decode(dec_state_, encoded, encoded_len, decoded, &temp_type); + if (ret > 0) + ret *= static_cast<int>(channels_); // Return total number of samples. + *speech_type = ConvertSpeechType(temp_type); + return ret; +} + +int AudioDecoderOpusImpl::DecodeRedundantInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { + if (!PacketHasFec(encoded, encoded_len)) { + // This packet is a RED packet. + return DecodeInternal(encoded, encoded_len, sample_rate_hz, decoded, + speech_type); + } + + RTC_DCHECK_EQ(sample_rate_hz, sample_rate_hz_); + int16_t temp_type = 1; // Default is speech. + int ret = WebRtcOpus_DecodeFec(dec_state_, encoded, encoded_len, decoded, + &temp_type); + if (ret > 0) + ret *= static_cast<int>(channels_); // Return total number of samples. + *speech_type = ConvertSpeechType(temp_type); + return ret; +} + +void AudioDecoderOpusImpl::Reset() { + WebRtcOpus_DecoderInit(dec_state_); +} + +int AudioDecoderOpusImpl::PacketDuration(const uint8_t* encoded, + size_t encoded_len) const { + return WebRtcOpus_DurationEst(dec_state_, encoded, encoded_len); +} + +int AudioDecoderOpusImpl::PacketDurationRedundant(const uint8_t* encoded, + size_t encoded_len) const { + if (!PacketHasFec(encoded, encoded_len)) { + // This packet is a RED packet. + return PacketDuration(encoded, encoded_len); + } + + return WebRtcOpus_FecDurationEst(encoded, encoded_len, sample_rate_hz_); +} + +bool AudioDecoderOpusImpl::PacketHasFec(const uint8_t* encoded, + size_t encoded_len) const { + int fec; + fec = WebRtcOpus_PacketHasFec(encoded, encoded_len); + return (fec == 1); +} + +int AudioDecoderOpusImpl::SampleRateHz() const { + return sample_rate_hz_; +} + +size_t AudioDecoderOpusImpl::Channels() const { + return channels_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.h b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.h new file mode 100644 index 0000000000..e8fd0440bc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_OPUS_H_ +#define MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_OPUS_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <vector> + +#include "api/audio_codecs/audio_decoder.h" +#include "modules/audio_coding/codecs/opus/opus_interface.h" +#include "rtc_base/buffer.h" + +namespace webrtc { + +class AudioDecoderOpusImpl final : public AudioDecoder { + public: + explicit AudioDecoderOpusImpl(size_t num_channels, + int sample_rate_hz = 48000); + ~AudioDecoderOpusImpl() override; + + AudioDecoderOpusImpl(const AudioDecoderOpusImpl&) = delete; + AudioDecoderOpusImpl& operator=(const AudioDecoderOpusImpl&) = delete; + + std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload, + uint32_t timestamp) override; + void Reset() override; + int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override; + int PacketDurationRedundant(const uint8_t* encoded, + size_t encoded_len) const override; + bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const override; + int SampleRateHz() const override; + size_t Channels() const override; + + protected: + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + int DecodeRedundantInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + + private: + OpusDecInst* dec_state_; + const size_t channels_; + const int sample_rate_hz_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_OPUS_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_impl.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_impl.cc new file mode 100644 index 0000000000..38a11c123d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_impl.cc @@ -0,0 +1,366 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * LEFT TO DO: + * - WRITE TESTS for the stuff in this file. + * - Check the creation, maybe make it safer by returning an empty optional or + * unique_ptr. --- It looks OK, but RecreateEncoderInstance can perhaps crash + * on a valid config. Can run it in the fuzzer for some time. Should prbl also + * fuzz the config. + */ + +#include "modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_impl.h" + +#include <algorithm> +#include <memory> +#include <string> +#include <vector> + +#include "absl/strings/match.h" +#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/string_to_number.h" + +namespace webrtc { + +namespace { + +// Recommended bitrates for one channel: +// 8-12 kb/s for NB speech, +// 16-20 kb/s for WB speech, +// 28-40 kb/s for FB speech, +// 48-64 kb/s for FB mono music, and +// 64-128 kb/s for FB stereo music. +// The current implementation multiplies these values by the number of channels. +constexpr int kOpusBitrateNbBps = 12000; +constexpr int kOpusBitrateWbBps = 20000; +constexpr int kOpusBitrateFbBps = 32000; + +constexpr int kDefaultMaxPlaybackRate = 48000; +// These two lists must be sorted from low to high +#if WEBRTC_OPUS_SUPPORT_120MS_PTIME +constexpr int kOpusSupportedFrameLengths[] = {10, 20, 40, 60, 120}; +#else +constexpr int kOpusSupportedFrameLengths[] = {10, 20, 40, 60}; +#endif + +int GetBitrateBps(const AudioEncoderMultiChannelOpusConfig& config) { + RTC_DCHECK(config.IsOk()); + return config.bitrate_bps; +} +int GetMaxPlaybackRate(const SdpAudioFormat& format) { + const auto param = GetFormatParameter<int>(format, "maxplaybackrate"); + if (param && *param >= 8000) { + return std::min(*param, kDefaultMaxPlaybackRate); + } + return kDefaultMaxPlaybackRate; +} + +int GetFrameSizeMs(const SdpAudioFormat& format) { + const auto ptime = GetFormatParameter<int>(format, "ptime"); + if (ptime.has_value()) { + // Pick the next highest supported frame length from + // kOpusSupportedFrameLengths. + for (const int supported_frame_length : kOpusSupportedFrameLengths) { + if (supported_frame_length >= *ptime) { + return supported_frame_length; + } + } + // If none was found, return the largest supported frame length. + return *(std::end(kOpusSupportedFrameLengths) - 1); + } + + return AudioEncoderOpusConfig::kDefaultFrameSizeMs; +} + +int CalculateDefaultBitrate(int max_playback_rate, size_t num_channels) { + const int bitrate = [&] { + if (max_playback_rate <= 8000) { + return kOpusBitrateNbBps * rtc::dchecked_cast<int>(num_channels); + } else if (max_playback_rate <= 16000) { + return kOpusBitrateWbBps * rtc::dchecked_cast<int>(num_channels); + } else { + return kOpusBitrateFbBps * rtc::dchecked_cast<int>(num_channels); + } + }(); + RTC_DCHECK_GE(bitrate, AudioEncoderMultiChannelOpusConfig::kMinBitrateBps); + return bitrate; +} + +// Get the maxaveragebitrate parameter in string-form, so we can properly figure +// out how invalid it is and accurately log invalid values. +int CalculateBitrate(int max_playback_rate_hz, + size_t num_channels, + absl::optional<std::string> bitrate_param) { + const int default_bitrate = + CalculateDefaultBitrate(max_playback_rate_hz, num_channels); + + if (bitrate_param) { + const auto bitrate = rtc::StringToNumber<int>(*bitrate_param); + if (bitrate) { + const int chosen_bitrate = + std::max(AudioEncoderOpusConfig::kMinBitrateBps, + std::min(*bitrate, AudioEncoderOpusConfig::kMaxBitrateBps)); + if (bitrate != chosen_bitrate) { + RTC_LOG(LS_WARNING) << "Invalid maxaveragebitrate " << *bitrate + << " clamped to " << chosen_bitrate; + } + return chosen_bitrate; + } + RTC_LOG(LS_WARNING) << "Invalid maxaveragebitrate \"" << *bitrate_param + << "\" replaced by default bitrate " << default_bitrate; + } + + return default_bitrate; +} + +} // namespace + +std::unique_ptr<AudioEncoder> +AudioEncoderMultiChannelOpusImpl::MakeAudioEncoder( + const AudioEncoderMultiChannelOpusConfig& config, + int payload_type) { + if (!config.IsOk()) { + RTC_DCHECK_NOTREACHED(); + return nullptr; + } + return std::make_unique<AudioEncoderMultiChannelOpusImpl>(config, + payload_type); +} + +AudioEncoderMultiChannelOpusImpl::AudioEncoderMultiChannelOpusImpl( + const AudioEncoderMultiChannelOpusConfig& config, + int payload_type) + : payload_type_(payload_type), inst_(nullptr) { + RTC_DCHECK(0 <= payload_type && payload_type <= 127); + + RTC_CHECK(RecreateEncoderInstance(config)); +} + +AudioEncoderMultiChannelOpusImpl::~AudioEncoderMultiChannelOpusImpl() { + RTC_CHECK_EQ(0, WebRtcOpus_EncoderFree(inst_)); +} + +size_t AudioEncoderMultiChannelOpusImpl::SufficientOutputBufferSize() const { + // Calculate the number of bytes we expect the encoder to produce, + // then multiply by two to give a wide margin for error. + const size_t bytes_per_millisecond = + static_cast<size_t>(GetBitrateBps(config_) / (1000 * 8) + 1); + const size_t approx_encoded_bytes = + Num10msFramesPerPacket() * 10 * bytes_per_millisecond; + return 2 * approx_encoded_bytes; +} + +void AudioEncoderMultiChannelOpusImpl::Reset() { + RTC_CHECK(RecreateEncoderInstance(config_)); +} + +absl::optional<std::pair<TimeDelta, TimeDelta>> +AudioEncoderMultiChannelOpusImpl::GetFrameLengthRange() const { + return {{TimeDelta::Millis(config_.frame_size_ms), + TimeDelta::Millis(config_.frame_size_ms)}}; +} + +// If the given config is OK, recreate the Opus encoder instance with those +// settings, save the config, and return true. Otherwise, do nothing and return +// false. +bool AudioEncoderMultiChannelOpusImpl::RecreateEncoderInstance( + const AudioEncoderMultiChannelOpusConfig& config) { + if (!config.IsOk()) + return false; + config_ = config; + if (inst_) + RTC_CHECK_EQ(0, WebRtcOpus_EncoderFree(inst_)); + input_buffer_.clear(); + input_buffer_.reserve(Num10msFramesPerPacket() * SamplesPer10msFrame()); + RTC_CHECK_EQ( + 0, WebRtcOpus_MultistreamEncoderCreate( + &inst_, config.num_channels, + config.application == + AudioEncoderMultiChannelOpusConfig::ApplicationMode::kVoip + ? 0 + : 1, + config.num_streams, config.coupled_streams, + config.channel_mapping.data())); + const int bitrate = GetBitrateBps(config); + RTC_CHECK_EQ(0, WebRtcOpus_SetBitRate(inst_, bitrate)); + RTC_LOG(LS_VERBOSE) << "Set Opus bitrate to " << bitrate << " bps."; + if (config.fec_enabled) { + RTC_CHECK_EQ(0, WebRtcOpus_EnableFec(inst_)); + RTC_LOG(LS_VERBOSE) << "Opus enable FEC"; + } else { + RTC_CHECK_EQ(0, WebRtcOpus_DisableFec(inst_)); + RTC_LOG(LS_VERBOSE) << "Opus disable FEC"; + } + RTC_CHECK_EQ( + 0, WebRtcOpus_SetMaxPlaybackRate(inst_, config.max_playback_rate_hz)); + RTC_LOG(LS_VERBOSE) << "Set Opus playback rate to " + << config.max_playback_rate_hz << " hz."; + + // Use the DEFAULT complexity. + RTC_CHECK_EQ( + 0, WebRtcOpus_SetComplexity(inst_, AudioEncoderOpusConfig().complexity)); + RTC_LOG(LS_VERBOSE) << "Set Opus coding complexity to " + << AudioEncoderOpusConfig().complexity; + + if (config.dtx_enabled) { + RTC_CHECK_EQ(0, WebRtcOpus_EnableDtx(inst_)); + RTC_LOG(LS_VERBOSE) << "Opus enable DTX"; + } else { + RTC_CHECK_EQ(0, WebRtcOpus_DisableDtx(inst_)); + RTC_LOG(LS_VERBOSE) << "Opus disable DTX"; + } + + if (config.cbr_enabled) { + RTC_CHECK_EQ(0, WebRtcOpus_EnableCbr(inst_)); + RTC_LOG(LS_VERBOSE) << "Opus enable CBR"; + } else { + RTC_CHECK_EQ(0, WebRtcOpus_DisableCbr(inst_)); + RTC_LOG(LS_VERBOSE) << "Opus disable CBR"; + } + num_channels_to_encode_ = NumChannels(); + next_frame_length_ms_ = config_.frame_size_ms; + RTC_LOG(LS_VERBOSE) << "Set Opus frame length to " << config_.frame_size_ms + << " ms"; + return true; +} + +absl::optional<AudioEncoderMultiChannelOpusConfig> +AudioEncoderMultiChannelOpusImpl::SdpToConfig(const SdpAudioFormat& format) { + if (!absl::EqualsIgnoreCase(format.name, "multiopus") || + format.clockrate_hz != 48000) { + return absl::nullopt; + } + + AudioEncoderMultiChannelOpusConfig config; + config.num_channels = format.num_channels; + config.frame_size_ms = GetFrameSizeMs(format); + config.max_playback_rate_hz = GetMaxPlaybackRate(format); + config.fec_enabled = (GetFormatParameter(format, "useinbandfec") == "1"); + config.dtx_enabled = (GetFormatParameter(format, "usedtx") == "1"); + config.cbr_enabled = (GetFormatParameter(format, "cbr") == "1"); + config.bitrate_bps = + CalculateBitrate(config.max_playback_rate_hz, config.num_channels, + GetFormatParameter(format, "maxaveragebitrate")); + config.application = + config.num_channels == 1 + ? AudioEncoderMultiChannelOpusConfig::ApplicationMode::kVoip + : AudioEncoderMultiChannelOpusConfig::ApplicationMode::kAudio; + + config.supported_frame_lengths_ms.clear(); + std::copy(std::begin(kOpusSupportedFrameLengths), + std::end(kOpusSupportedFrameLengths), + std::back_inserter(config.supported_frame_lengths_ms)); + + auto num_streams = GetFormatParameter<int>(format, "num_streams"); + if (!num_streams.has_value()) { + return absl::nullopt; + } + config.num_streams = *num_streams; + + auto coupled_streams = GetFormatParameter<int>(format, "coupled_streams"); + if (!coupled_streams.has_value()) { + return absl::nullopt; + } + config.coupled_streams = *coupled_streams; + + auto channel_mapping = + GetFormatParameter<std::vector<unsigned char>>(format, "channel_mapping"); + if (!channel_mapping.has_value()) { + return absl::nullopt; + } + config.channel_mapping = *channel_mapping; + + if (!config.IsOk()) { + return absl::nullopt; + } + return config; +} + +AudioCodecInfo AudioEncoderMultiChannelOpusImpl::QueryAudioEncoder( + const AudioEncoderMultiChannelOpusConfig& config) { + RTC_DCHECK(config.IsOk()); + AudioCodecInfo info(48000, config.num_channels, config.bitrate_bps, + AudioEncoderOpusConfig::kMinBitrateBps, + AudioEncoderOpusConfig::kMaxBitrateBps); + info.allow_comfort_noise = false; + info.supports_network_adaption = false; + return info; +} + +size_t AudioEncoderMultiChannelOpusImpl::Num10msFramesPerPacket() const { + return static_cast<size_t>(rtc::CheckedDivExact(config_.frame_size_ms, 10)); +} +size_t AudioEncoderMultiChannelOpusImpl::SamplesPer10msFrame() const { + return rtc::CheckedDivExact(48000, 100) * config_.num_channels; +} +int AudioEncoderMultiChannelOpusImpl::SampleRateHz() const { + return 48000; +} +size_t AudioEncoderMultiChannelOpusImpl::NumChannels() const { + return config_.num_channels; +} +size_t AudioEncoderMultiChannelOpusImpl::Num10MsFramesInNextPacket() const { + return Num10msFramesPerPacket(); +} +size_t AudioEncoderMultiChannelOpusImpl::Max10MsFramesInAPacket() const { + return Num10msFramesPerPacket(); +} +int AudioEncoderMultiChannelOpusImpl::GetTargetBitrate() const { + return GetBitrateBps(config_); +} + +AudioEncoder::EncodedInfo AudioEncoderMultiChannelOpusImpl::EncodeImpl( + uint32_t rtp_timestamp, + rtc::ArrayView<const int16_t> audio, + rtc::Buffer* encoded) { + if (input_buffer_.empty()) + first_timestamp_in_buffer_ = rtp_timestamp; + + input_buffer_.insert(input_buffer_.end(), audio.cbegin(), audio.cend()); + if (input_buffer_.size() < + (Num10msFramesPerPacket() * SamplesPer10msFrame())) { + return EncodedInfo(); + } + RTC_CHECK_EQ(input_buffer_.size(), + Num10msFramesPerPacket() * SamplesPer10msFrame()); + + const size_t max_encoded_bytes = SufficientOutputBufferSize(); + EncodedInfo info; + info.encoded_bytes = encoded->AppendData( + max_encoded_bytes, [&](rtc::ArrayView<uint8_t> encoded) { + int status = WebRtcOpus_Encode( + inst_, &input_buffer_[0], + rtc::CheckedDivExact(input_buffer_.size(), config_.num_channels), + rtc::saturated_cast<int16_t>(max_encoded_bytes), encoded.data()); + + RTC_CHECK_GE(status, 0); // Fails only if fed invalid data. + + return static_cast<size_t>(status); + }); + input_buffer_.clear(); + + // Will use new packet size for next encoding. + config_.frame_size_ms = next_frame_length_ms_; + + info.encoded_timestamp = first_timestamp_in_buffer_; + info.payload_type = payload_type_; + info.send_even_if_empty = true; // Allows Opus to send empty packets. + + info.speech = true; + info.encoder_type = CodecType::kOther; + + return info; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_impl.h b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_impl.h new file mode 100644 index 0000000000..8a7210515c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_impl.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_MULTI_CHANNEL_OPUS_IMPL_H_ +#define MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_MULTI_CHANNEL_OPUS_IMPL_H_ + +#include <memory> +#include <utility> +#include <vector> + +#include "absl/types/optional.h" +#include "api/audio_codecs/audio_encoder.h" +#include "api/audio_codecs/audio_format.h" +#include "api/audio_codecs/opus/audio_encoder_multi_channel_opus_config.h" +#include "api/units/time_delta.h" +#include "modules/audio_coding/codecs/opus/opus_interface.h" + +namespace webrtc { + +class RtcEventLog; + +class AudioEncoderMultiChannelOpusImpl final : public AudioEncoder { + public: + AudioEncoderMultiChannelOpusImpl( + const AudioEncoderMultiChannelOpusConfig& config, + int payload_type); + ~AudioEncoderMultiChannelOpusImpl() override; + + AudioEncoderMultiChannelOpusImpl(const AudioEncoderMultiChannelOpusImpl&) = + delete; + AudioEncoderMultiChannelOpusImpl& operator=( + const AudioEncoderMultiChannelOpusImpl&) = delete; + + // Static interface for use by BuiltinAudioEncoderFactory. + static constexpr const char* GetPayloadName() { return "multiopus"; } + static absl::optional<AudioCodecInfo> QueryAudioEncoder( + const SdpAudioFormat& format); + + int SampleRateHz() const override; + size_t NumChannels() const override; + size_t Num10MsFramesInNextPacket() const override; + size_t Max10MsFramesInAPacket() const override; + int GetTargetBitrate() const override; + + void Reset() override; + absl::optional<std::pair<TimeDelta, TimeDelta>> GetFrameLengthRange() + const override; + + protected: + EncodedInfo EncodeImpl(uint32_t rtp_timestamp, + rtc::ArrayView<const int16_t> audio, + rtc::Buffer* encoded) override; + + private: + static absl::optional<AudioEncoderMultiChannelOpusConfig> SdpToConfig( + const SdpAudioFormat& format); + static AudioCodecInfo QueryAudioEncoder( + const AudioEncoderMultiChannelOpusConfig& config); + static std::unique_ptr<AudioEncoder> MakeAudioEncoder( + const AudioEncoderMultiChannelOpusConfig&, + int payload_type); + + size_t Num10msFramesPerPacket() const; + size_t SamplesPer10msFrame() const; + size_t SufficientOutputBufferSize() const; + bool RecreateEncoderInstance( + const AudioEncoderMultiChannelOpusConfig& config); + void SetFrameLength(int frame_length_ms); + void SetNumChannelsToEncode(size_t num_channels_to_encode); + void SetProjectedPacketLossRate(float fraction); + + AudioEncoderMultiChannelOpusConfig config_; + const int payload_type_; + std::vector<int16_t> input_buffer_; + OpusEncInst* inst_; + uint32_t first_timestamp_in_buffer_; + size_t num_channels_to_encode_; + int next_frame_length_ms_; + + friend struct AudioEncoderMultiChannelOpus; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_MULTI_CHANNEL_OPUS_IMPL_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_unittest.cc new file mode 100644 index 0000000000..92f6f2c169 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_unittest.cc @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "api/audio_codecs/opus/audio_encoder_multi_channel_opus.h" + +#include "test/gmock.h" + +namespace webrtc { +using ::testing::NiceMock; +using ::testing::Return; + +namespace { +constexpr int kOpusPayloadType = 120; +} // namespace + +TEST(AudioEncoderMultiOpusTest, CheckConfigValidity) { + { + const SdpAudioFormat sdp_format("multiopus", 48000, 2, + {{"channel_mapping", "3,0"}, + {"coupled_streams", "1"}, + {"num_streams", "2"}}); + const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config = + AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format); + + // Maps input channel 0 to coded channel 3, which doesn't exist. + EXPECT_FALSE(encoder_config.has_value()); + } + + { + const SdpAudioFormat sdp_format("multiopus", 48000, 2, + {{"channel_mapping", "0"}, + {"coupled_streams", "1"}, + {"num_streams", "2"}}); + const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config = + AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format); + + // The mapping is too short. + EXPECT_FALSE(encoder_config.has_value()); + } + { + const SdpAudioFormat sdp_format("multiopus", 48000, 3, + {{"channel_mapping", "0,0,0"}, + {"coupled_streams", "0"}, + {"num_streams", "1"}}); + const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config = + AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format); + + // Coded channel 0 comes from both input channels 0, 1 and 2. + EXPECT_FALSE(encoder_config.has_value()); + } + { + const SdpAudioFormat sdp_format("multiopus", 48000, 3, + {{"channel_mapping", "0,255,255"}, + {"coupled_streams", "0"}, + {"num_streams", "1"}}); + const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config = + AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format); + ASSERT_TRUE(encoder_config.has_value()); + + // This is fine, because channels 1, 2 are set to be ignored. + EXPECT_TRUE(encoder_config->IsOk()); + } + { + const SdpAudioFormat sdp_format("multiopus", 48000, 3, + {{"channel_mapping", "0,255,255"}, + {"coupled_streams", "0"}, + {"num_streams", "2"}}); + const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config = + AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format); + + // This is NOT fine, because channels nothing says how coded channel 1 + // should be coded. + EXPECT_FALSE(encoder_config.has_value()); + } +} + +TEST(AudioEncoderMultiOpusTest, ConfigValuesAreParsedCorrectly) { + SdpAudioFormat sdp_format({"multiopus", + 48000, + 6, + {{"minptime", "10"}, + {"useinbandfec", "1"}, + {"channel_mapping", "0,4,1,2,3,5"}, + {"num_streams", "4"}, + {"coupled_streams", "2"}}}); + const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config = + AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format); + ASSERT_TRUE(encoder_config.has_value()); + + EXPECT_EQ(encoder_config->coupled_streams, 2); + EXPECT_EQ(encoder_config->num_streams, 4); + EXPECT_THAT( + encoder_config->channel_mapping, + testing::ContainerEq(std::vector<unsigned char>({0, 4, 1, 2, 3, 5}))); +} + +TEST(AudioEncoderMultiOpusTest, CreateFromValidConfig) { + { + const SdpAudioFormat sdp_format("multiopus", 48000, 3, + {{"channel_mapping", "0,255,255"}, + {"coupled_streams", "0"}, + {"num_streams", "2"}}); + const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config = + AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format); + ASSERT_FALSE(encoder_config.has_value()); + } + { + const SdpAudioFormat sdp_format("multiopus", 48000, 3, + {{"channel_mapping", "1,255,0"}, + {"coupled_streams", "1"}, + {"num_streams", "1"}}); + const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config = + AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format); + ASSERT_TRUE(encoder_config.has_value()); + + EXPECT_THAT(encoder_config->channel_mapping, + testing::ContainerEq(std::vector<unsigned char>({1, 255, 0}))); + + EXPECT_TRUE(encoder_config->IsOk()); + + const std::unique_ptr<AudioEncoder> opus_encoder = + AudioEncoderMultiChannelOpus::MakeAudioEncoder(*encoder_config, + kOpusPayloadType); + + // Creating an encoder from a valid config should work. + EXPECT_TRUE(opus_encoder); + } +} + +TEST(AudioEncoderMultiOpusTest, AdvertisedCodecsCanBeCreated) { + std::vector<AudioCodecSpec> specs; + AudioEncoderMultiChannelOpus::AppendSupportedEncoders(&specs); + + EXPECT_FALSE(specs.empty()); + + for (const AudioCodecSpec& spec : specs) { + const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config = + AudioEncoderMultiChannelOpus::SdpToConfig(spec.format); + ASSERT_TRUE(encoder_config.has_value()); + + const std::unique_ptr<AudioEncoder> opus_encoder = + AudioEncoderMultiChannelOpus::MakeAudioEncoder(*encoder_config, + kOpusPayloadType); + + EXPECT_TRUE(opus_encoder); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_opus.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_opus.cc new file mode 100644 index 0000000000..17e0e33b1d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_opus.cc @@ -0,0 +1,824 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/audio_encoder_opus.h" + +#include <algorithm> +#include <iterator> +#include <memory> +#include <string> +#include <utility> + +#include "absl/strings/match.h" +#include "absl/strings/string_view.h" +#include "modules/audio_coding/audio_network_adaptor/audio_network_adaptor_impl.h" +#include "modules/audio_coding/audio_network_adaptor/controller_manager.h" +#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h" +#include "modules/audio_coding/codecs/opus/opus_interface.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/exp_filter.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "rtc_base/string_encode.h" +#include "rtc_base/string_to_number.h" +#include "rtc_base/time_utils.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { + +namespace { + +// Codec parameters for Opus. +// draft-spittka-payload-rtp-opus-03 + +// Recommended bitrates: +// 8-12 kb/s for NB speech, +// 16-20 kb/s for WB speech, +// 28-40 kb/s for FB speech, +// 48-64 kb/s for FB mono music, and +// 64-128 kb/s for FB stereo music. +// The current implementation applies the following values to mono signals, +// and multiplies them by 2 for stereo. +constexpr int kOpusBitrateNbBps = 12000; +constexpr int kOpusBitrateWbBps = 20000; +constexpr int kOpusBitrateFbBps = 32000; + +constexpr int kRtpTimestampRateHz = 48000; +constexpr int kDefaultMaxPlaybackRate = 48000; + +// These two lists must be sorted from low to high +#if WEBRTC_OPUS_SUPPORT_120MS_PTIME +constexpr int kANASupportedFrameLengths[] = {20, 40, 60, 120}; +constexpr int kOpusSupportedFrameLengths[] = {10, 20, 40, 60, 120}; +#else +constexpr int kANASupportedFrameLengths[] = {20, 40, 60}; +constexpr int kOpusSupportedFrameLengths[] = {10, 20, 40, 60}; +#endif + +// PacketLossFractionSmoother uses an exponential filter with a time constant +// of -1.0 / ln(0.9999) = 10000 ms. +constexpr float kAlphaForPacketLossFractionSmoother = 0.9999f; +constexpr float kMaxPacketLossFraction = 0.2f; + +int CalculateDefaultBitrate(int max_playback_rate, size_t num_channels) { + const int bitrate = [&] { + if (max_playback_rate <= 8000) { + return kOpusBitrateNbBps * rtc::dchecked_cast<int>(num_channels); + } else if (max_playback_rate <= 16000) { + return kOpusBitrateWbBps * rtc::dchecked_cast<int>(num_channels); + } else { + return kOpusBitrateFbBps * rtc::dchecked_cast<int>(num_channels); + } + }(); + RTC_DCHECK_GE(bitrate, AudioEncoderOpusConfig::kMinBitrateBps); + RTC_DCHECK_LE(bitrate, AudioEncoderOpusConfig::kMaxBitrateBps); + return bitrate; +} + +// Get the maxaveragebitrate parameter in string-form, so we can properly figure +// out how invalid it is and accurately log invalid values. +int CalculateBitrate(int max_playback_rate_hz, + size_t num_channels, + absl::optional<std::string> bitrate_param) { + const int default_bitrate = + CalculateDefaultBitrate(max_playback_rate_hz, num_channels); + + if (bitrate_param) { + const auto bitrate = rtc::StringToNumber<int>(*bitrate_param); + if (bitrate) { + const int chosen_bitrate = + std::max(AudioEncoderOpusConfig::kMinBitrateBps, + std::min(*bitrate, AudioEncoderOpusConfig::kMaxBitrateBps)); + if (bitrate != chosen_bitrate) { + RTC_LOG(LS_WARNING) << "Invalid maxaveragebitrate " << *bitrate + << " clamped to " << chosen_bitrate; + } + return chosen_bitrate; + } + RTC_LOG(LS_WARNING) << "Invalid maxaveragebitrate \"" << *bitrate_param + << "\" replaced by default bitrate " << default_bitrate; + } + + return default_bitrate; +} + +int GetChannelCount(const SdpAudioFormat& format) { + const auto param = GetFormatParameter(format, "stereo"); + if (param == "1") { + return 2; + } else { + return 1; + } +} + +int GetMaxPlaybackRate(const SdpAudioFormat& format) { + const auto param = GetFormatParameter<int>(format, "maxplaybackrate"); + if (param && *param >= 8000) { + return std::min(*param, kDefaultMaxPlaybackRate); + } + return kDefaultMaxPlaybackRate; +} + +int GetFrameSizeMs(const SdpAudioFormat& format) { + const auto ptime = GetFormatParameter<int>(format, "ptime"); + if (ptime) { + // Pick the next highest supported frame length from + // kOpusSupportedFrameLengths. + for (const int supported_frame_length : kOpusSupportedFrameLengths) { + if (supported_frame_length >= *ptime) { + return supported_frame_length; + } + } + // If none was found, return the largest supported frame length. + return *(std::end(kOpusSupportedFrameLengths) - 1); + } + + return AudioEncoderOpusConfig::kDefaultFrameSizeMs; +} + +void FindSupportedFrameLengths(int min_frame_length_ms, + int max_frame_length_ms, + std::vector<int>* out) { + out->clear(); + std::copy_if(std::begin(kANASupportedFrameLengths), + std::end(kANASupportedFrameLengths), std::back_inserter(*out), + [&](int frame_length_ms) { + return frame_length_ms >= min_frame_length_ms && + frame_length_ms <= max_frame_length_ms; + }); + RTC_DCHECK(std::is_sorted(out->begin(), out->end())); +} + +int GetBitrateBps(const AudioEncoderOpusConfig& config) { + RTC_DCHECK(config.IsOk()); + return *config.bitrate_bps; +} + +std::vector<float> GetBitrateMultipliers() { + constexpr char kBitrateMultipliersName[] = + "WebRTC-Audio-OpusBitrateMultipliers"; + const bool use_bitrate_multipliers = + webrtc::field_trial::IsEnabled(kBitrateMultipliersName); + if (use_bitrate_multipliers) { + const std::string field_trial_string = + webrtc::field_trial::FindFullName(kBitrateMultipliersName); + std::vector<std::string> pieces; + rtc::tokenize(field_trial_string, '-', &pieces); + if (pieces.size() < 2 || pieces[0] != "Enabled") { + RTC_LOG(LS_WARNING) << "Invalid parameters for " + << kBitrateMultipliersName + << ", not using custom values."; + return std::vector<float>(); + } + std::vector<float> multipliers(pieces.size() - 1); + for (size_t i = 1; i < pieces.size(); i++) { + if (!rtc::FromString(pieces[i], &multipliers[i - 1])) { + RTC_LOG(LS_WARNING) + << "Invalid parameters for " << kBitrateMultipliersName + << ", not using custom values."; + return std::vector<float>(); + } + } + RTC_LOG(LS_INFO) << "Using custom bitrate multipliers: " + << field_trial_string; + return multipliers; + } + return std::vector<float>(); +} + +int GetMultipliedBitrate(int bitrate, const std::vector<float>& multipliers) { + // The multipliers are valid from 5 kbps. + const size_t bitrate_kbps = static_cast<size_t>(bitrate / 1000); + if (bitrate_kbps < 5 || bitrate_kbps >= multipliers.size() + 5) { + return bitrate; + } + return static_cast<int>(multipliers[bitrate_kbps - 5] * bitrate); +} +} // namespace + +void AudioEncoderOpusImpl::AppendSupportedEncoders( + std::vector<AudioCodecSpec>* specs) { + const SdpAudioFormat fmt = {"opus", + kRtpTimestampRateHz, + 2, + {{"minptime", "10"}, {"useinbandfec", "1"}}}; + const AudioCodecInfo info = QueryAudioEncoder(*SdpToConfig(fmt)); + specs->push_back({fmt, info}); +} + +AudioCodecInfo AudioEncoderOpusImpl::QueryAudioEncoder( + const AudioEncoderOpusConfig& config) { + RTC_DCHECK(config.IsOk()); + AudioCodecInfo info(config.sample_rate_hz, config.num_channels, + *config.bitrate_bps, + AudioEncoderOpusConfig::kMinBitrateBps, + AudioEncoderOpusConfig::kMaxBitrateBps); + info.allow_comfort_noise = false; + info.supports_network_adaption = true; + return info; +} + +std::unique_ptr<AudioEncoder> AudioEncoderOpusImpl::MakeAudioEncoder( + const AudioEncoderOpusConfig& config, + int payload_type) { + if (!config.IsOk()) { + RTC_DCHECK_NOTREACHED(); + return nullptr; + } + return std::make_unique<AudioEncoderOpusImpl>(config, payload_type); +} + +absl::optional<AudioEncoderOpusConfig> AudioEncoderOpusImpl::SdpToConfig( + const SdpAudioFormat& format) { + if (!absl::EqualsIgnoreCase(format.name, "opus") || + format.clockrate_hz != kRtpTimestampRateHz) { + return absl::nullopt; + } + + AudioEncoderOpusConfig config; + config.num_channels = GetChannelCount(format); + config.frame_size_ms = GetFrameSizeMs(format); + config.max_playback_rate_hz = GetMaxPlaybackRate(format); + config.fec_enabled = (GetFormatParameter(format, "useinbandfec") == "1"); + config.dtx_enabled = (GetFormatParameter(format, "usedtx") == "1"); + config.cbr_enabled = (GetFormatParameter(format, "cbr") == "1"); + config.bitrate_bps = + CalculateBitrate(config.max_playback_rate_hz, config.num_channels, + GetFormatParameter(format, "maxaveragebitrate")); + config.application = config.num_channels == 1 + ? AudioEncoderOpusConfig::ApplicationMode::kVoip + : AudioEncoderOpusConfig::ApplicationMode::kAudio; + + constexpr int kMinANAFrameLength = kANASupportedFrameLengths[0]; + constexpr int kMaxANAFrameLength = + kANASupportedFrameLengths[arraysize(kANASupportedFrameLengths) - 1]; + + // For now, minptime and maxptime are only used with ANA. If ptime is outside + // of this range, it will get adjusted once ANA takes hold. Ideally, we'd know + // if ANA was to be used when setting up the config, and adjust accordingly. + const int min_frame_length_ms = + GetFormatParameter<int>(format, "minptime").value_or(kMinANAFrameLength); + const int max_frame_length_ms = + GetFormatParameter<int>(format, "maxptime").value_or(kMaxANAFrameLength); + + FindSupportedFrameLengths(min_frame_length_ms, max_frame_length_ms, + &config.supported_frame_lengths_ms); + if (!config.IsOk()) { + RTC_DCHECK_NOTREACHED(); + return absl::nullopt; + } + return config; +} + +absl::optional<int> AudioEncoderOpusImpl::GetNewComplexity( + const AudioEncoderOpusConfig& config) { + RTC_DCHECK(config.IsOk()); + const int bitrate_bps = GetBitrateBps(config); + if (bitrate_bps >= config.complexity_threshold_bps - + config.complexity_threshold_window_bps && + bitrate_bps <= config.complexity_threshold_bps + + config.complexity_threshold_window_bps) { + // Within the hysteresis window; make no change. + return absl::nullopt; + } else { + return bitrate_bps <= config.complexity_threshold_bps + ? config.low_rate_complexity + : config.complexity; + } +} + +absl::optional<int> AudioEncoderOpusImpl::GetNewBandwidth( + const AudioEncoderOpusConfig& config, + OpusEncInst* inst) { + constexpr int kMinWidebandBitrate = 8000; + constexpr int kMaxNarrowbandBitrate = 9000; + constexpr int kAutomaticThreshold = 11000; + RTC_DCHECK(config.IsOk()); + const int bitrate = GetBitrateBps(config); + if (bitrate > kAutomaticThreshold) { + return absl::optional<int>(OPUS_AUTO); + } + const int bandwidth = WebRtcOpus_GetBandwidth(inst); + RTC_DCHECK_GE(bandwidth, 0); + if (bitrate > kMaxNarrowbandBitrate && bandwidth < OPUS_BANDWIDTH_WIDEBAND) { + return absl::optional<int>(OPUS_BANDWIDTH_WIDEBAND); + } else if (bitrate < kMinWidebandBitrate && + bandwidth > OPUS_BANDWIDTH_NARROWBAND) { + return absl::optional<int>(OPUS_BANDWIDTH_NARROWBAND); + } + return absl::optional<int>(); +} + +class AudioEncoderOpusImpl::PacketLossFractionSmoother { + public: + explicit PacketLossFractionSmoother() + : last_sample_time_ms_(rtc::TimeMillis()), + smoother_(kAlphaForPacketLossFractionSmoother) {} + + // Gets the smoothed packet loss fraction. + float GetAverage() const { + float value = smoother_.filtered(); + return (value == rtc::ExpFilter::kValueUndefined) ? 0.0f : value; + } + + // Add new observation to the packet loss fraction smoother. + void AddSample(float packet_loss_fraction) { + int64_t now_ms = rtc::TimeMillis(); + smoother_.Apply(static_cast<float>(now_ms - last_sample_time_ms_), + packet_loss_fraction); + last_sample_time_ms_ = now_ms; + } + + private: + int64_t last_sample_time_ms_; + + // An exponential filter is used to smooth the packet loss fraction. + rtc::ExpFilter smoother_; +}; + +AudioEncoderOpusImpl::AudioEncoderOpusImpl(const AudioEncoderOpusConfig& config, + int payload_type) + : AudioEncoderOpusImpl( + config, + payload_type, + [this](absl::string_view config_string, RtcEventLog* event_log) { + return DefaultAudioNetworkAdaptorCreator(config_string, event_log); + }, + // We choose 5sec as initial time constant due to empirical data. + std::make_unique<SmoothingFilterImpl>(5000)) {} + +AudioEncoderOpusImpl::AudioEncoderOpusImpl( + const AudioEncoderOpusConfig& config, + int payload_type, + const AudioNetworkAdaptorCreator& audio_network_adaptor_creator, + std::unique_ptr<SmoothingFilter> bitrate_smoother) + : payload_type_(payload_type), + use_stable_target_for_adaptation_(!webrtc::field_trial::IsDisabled( + "WebRTC-Audio-StableTargetAdaptation")), + adjust_bandwidth_( + webrtc::field_trial::IsEnabled("WebRTC-AdjustOpusBandwidth")), + bitrate_changed_(true), + bitrate_multipliers_(GetBitrateMultipliers()), + packet_loss_rate_(0.0), + inst_(nullptr), + packet_loss_fraction_smoother_(new PacketLossFractionSmoother()), + audio_network_adaptor_creator_(audio_network_adaptor_creator), + bitrate_smoother_(std::move(bitrate_smoother)), + consecutive_dtx_frames_(0) { + RTC_DCHECK(0 <= payload_type && payload_type <= 127); + + // Sanity check of the redundant payload type field that we want to get rid + // of. See https://bugs.chromium.org/p/webrtc/issues/detail?id=7847 + RTC_CHECK(config.payload_type == -1 || config.payload_type == payload_type); + + RTC_CHECK(RecreateEncoderInstance(config)); + SetProjectedPacketLossRate(packet_loss_rate_); +} + +AudioEncoderOpusImpl::AudioEncoderOpusImpl(int payload_type, + const SdpAudioFormat& format) + : AudioEncoderOpusImpl(*SdpToConfig(format), payload_type) {} + +AudioEncoderOpusImpl::~AudioEncoderOpusImpl() { + RTC_CHECK_EQ(0, WebRtcOpus_EncoderFree(inst_)); +} + +int AudioEncoderOpusImpl::SampleRateHz() const { + return config_.sample_rate_hz; +} + +size_t AudioEncoderOpusImpl::NumChannels() const { + return config_.num_channels; +} + +int AudioEncoderOpusImpl::RtpTimestampRateHz() const { + return kRtpTimestampRateHz; +} + +size_t AudioEncoderOpusImpl::Num10MsFramesInNextPacket() const { + return Num10msFramesPerPacket(); +} + +size_t AudioEncoderOpusImpl::Max10MsFramesInAPacket() const { + return Num10msFramesPerPacket(); +} + +int AudioEncoderOpusImpl::GetTargetBitrate() const { + return GetBitrateBps(config_); +} + +void AudioEncoderOpusImpl::Reset() { + RTC_CHECK(RecreateEncoderInstance(config_)); +} + +bool AudioEncoderOpusImpl::SetFec(bool enable) { + if (enable) { + RTC_CHECK_EQ(0, WebRtcOpus_EnableFec(inst_)); + } else { + RTC_CHECK_EQ(0, WebRtcOpus_DisableFec(inst_)); + } + config_.fec_enabled = enable; + return true; +} + +bool AudioEncoderOpusImpl::SetDtx(bool enable) { + if (enable) { + RTC_CHECK_EQ(0, WebRtcOpus_EnableDtx(inst_)); + } else { + RTC_CHECK_EQ(0, WebRtcOpus_DisableDtx(inst_)); + } + config_.dtx_enabled = enable; + return true; +} + +bool AudioEncoderOpusImpl::GetDtx() const { + return config_.dtx_enabled; +} + +bool AudioEncoderOpusImpl::SetApplication(Application application) { + auto conf = config_; + switch (application) { + case Application::kSpeech: + conf.application = AudioEncoderOpusConfig::ApplicationMode::kVoip; + break; + case Application::kAudio: + conf.application = AudioEncoderOpusConfig::ApplicationMode::kAudio; + break; + } + return RecreateEncoderInstance(conf); +} + +void AudioEncoderOpusImpl::SetMaxPlaybackRate(int frequency_hz) { + auto conf = config_; + conf.max_playback_rate_hz = frequency_hz; + RTC_CHECK(RecreateEncoderInstance(conf)); +} + +bool AudioEncoderOpusImpl::EnableAudioNetworkAdaptor( + const std::string& config_string, + RtcEventLog* event_log) { + audio_network_adaptor_ = + audio_network_adaptor_creator_(config_string, event_log); + return audio_network_adaptor_.get() != nullptr; +} + +void AudioEncoderOpusImpl::DisableAudioNetworkAdaptor() { + audio_network_adaptor_.reset(nullptr); +} + +void AudioEncoderOpusImpl::OnReceivedUplinkPacketLossFraction( + float uplink_packet_loss_fraction) { + if (audio_network_adaptor_) { + audio_network_adaptor_->SetUplinkPacketLossFraction( + uplink_packet_loss_fraction); + ApplyAudioNetworkAdaptor(); + } + packet_loss_fraction_smoother_->AddSample(uplink_packet_loss_fraction); + float average_fraction_loss = packet_loss_fraction_smoother_->GetAverage(); + SetProjectedPacketLossRate(average_fraction_loss); +} + +void AudioEncoderOpusImpl::OnReceivedTargetAudioBitrate( + int target_audio_bitrate_bps) { + SetTargetBitrate(target_audio_bitrate_bps); +} + +void AudioEncoderOpusImpl::OnReceivedUplinkBandwidth( + int target_audio_bitrate_bps, + absl::optional<int64_t> bwe_period_ms, + absl::optional<int64_t> stable_target_bitrate_bps) { + if (audio_network_adaptor_) { + audio_network_adaptor_->SetTargetAudioBitrate(target_audio_bitrate_bps); + if (use_stable_target_for_adaptation_) { + if (stable_target_bitrate_bps) + audio_network_adaptor_->SetUplinkBandwidth(*stable_target_bitrate_bps); + } else { + // We give smoothed bitrate allocation to audio network adaptor as + // the uplink bandwidth. + // The BWE spikes should not affect the bitrate smoother more than 25%. + // To simplify the calculations we use a step response as input signal. + // The step response of an exponential filter is + // u(t) = 1 - e^(-t / time_constant). + // In order to limit the affect of a BWE spike within 25% of its value + // before + // the next BWE update, we would choose a time constant that fulfills + // 1 - e^(-bwe_period_ms / time_constant) < 0.25 + // Then 4 * bwe_period_ms is a good choice. + if (bwe_period_ms) + bitrate_smoother_->SetTimeConstantMs(*bwe_period_ms * 4); + bitrate_smoother_->AddSample(target_audio_bitrate_bps); + } + + ApplyAudioNetworkAdaptor(); + } else { + if (!overhead_bytes_per_packet_) { + RTC_LOG(LS_INFO) + << "AudioEncoderOpusImpl: Overhead unknown, target audio bitrate " + << target_audio_bitrate_bps << " bps is ignored."; + return; + } + const int overhead_bps = static_cast<int>( + *overhead_bytes_per_packet_ * 8 * 100 / Num10MsFramesInNextPacket()); + SetTargetBitrate( + std::min(AudioEncoderOpusConfig::kMaxBitrateBps, + std::max(AudioEncoderOpusConfig::kMinBitrateBps, + target_audio_bitrate_bps - overhead_bps))); + } +} +void AudioEncoderOpusImpl::OnReceivedUplinkBandwidth( + int target_audio_bitrate_bps, + absl::optional<int64_t> bwe_period_ms) { + OnReceivedUplinkBandwidth(target_audio_bitrate_bps, bwe_period_ms, + absl::nullopt); +} + +void AudioEncoderOpusImpl::OnReceivedUplinkAllocation( + BitrateAllocationUpdate update) { + OnReceivedUplinkBandwidth(update.target_bitrate.bps(), update.bwe_period.ms(), + update.stable_target_bitrate.bps()); +} + +void AudioEncoderOpusImpl::OnReceivedRtt(int rtt_ms) { + if (!audio_network_adaptor_) + return; + audio_network_adaptor_->SetRtt(rtt_ms); + ApplyAudioNetworkAdaptor(); +} + +void AudioEncoderOpusImpl::OnReceivedOverhead( + size_t overhead_bytes_per_packet) { + if (audio_network_adaptor_) { + audio_network_adaptor_->SetOverhead(overhead_bytes_per_packet); + ApplyAudioNetworkAdaptor(); + } else { + overhead_bytes_per_packet_ = overhead_bytes_per_packet; + } +} + +void AudioEncoderOpusImpl::SetReceiverFrameLengthRange( + int min_frame_length_ms, + int max_frame_length_ms) { + // Ensure that `SetReceiverFrameLengthRange` is called before + // `EnableAudioNetworkAdaptor`, otherwise we need to recreate + // `audio_network_adaptor_`, which is not a needed use case. + RTC_DCHECK(!audio_network_adaptor_); + FindSupportedFrameLengths(min_frame_length_ms, max_frame_length_ms, + &config_.supported_frame_lengths_ms); +} + +AudioEncoder::EncodedInfo AudioEncoderOpusImpl::EncodeImpl( + uint32_t rtp_timestamp, + rtc::ArrayView<const int16_t> audio, + rtc::Buffer* encoded) { + MaybeUpdateUplinkBandwidth(); + + if (input_buffer_.empty()) + first_timestamp_in_buffer_ = rtp_timestamp; + + input_buffer_.insert(input_buffer_.end(), audio.cbegin(), audio.cend()); + if (input_buffer_.size() < + (Num10msFramesPerPacket() * SamplesPer10msFrame())) { + return EncodedInfo(); + } + RTC_CHECK_EQ(input_buffer_.size(), + Num10msFramesPerPacket() * SamplesPer10msFrame()); + + const size_t max_encoded_bytes = SufficientOutputBufferSize(); + EncodedInfo info; + info.encoded_bytes = encoded->AppendData( + max_encoded_bytes, [&](rtc::ArrayView<uint8_t> encoded) { + int status = WebRtcOpus_Encode( + inst_, &input_buffer_[0], + rtc::CheckedDivExact(input_buffer_.size(), config_.num_channels), + rtc::saturated_cast<int16_t>(max_encoded_bytes), encoded.data()); + + RTC_CHECK_GE(status, 0); // Fails only if fed invalid data. + + return static_cast<size_t>(status); + }); + input_buffer_.clear(); + + bool dtx_frame = (info.encoded_bytes <= 2); + + // Will use new packet size for next encoding. + config_.frame_size_ms = next_frame_length_ms_; + + if (adjust_bandwidth_ && bitrate_changed_) { + const auto bandwidth = GetNewBandwidth(config_, inst_); + if (bandwidth) { + RTC_CHECK_EQ(0, WebRtcOpus_SetBandwidth(inst_, *bandwidth)); + } + bitrate_changed_ = false; + } + + info.encoded_timestamp = first_timestamp_in_buffer_; + info.payload_type = payload_type_; + info.send_even_if_empty = true; // Allows Opus to send empty packets. + // After 20 DTX frames (MAX_CONSECUTIVE_DTX) Opus will send a frame + // coding the background noise. Avoid flagging this frame as speech + // (even though there is a probability of the frame being speech). + info.speech = !dtx_frame && (consecutive_dtx_frames_ != 20); + info.encoder_type = CodecType::kOpus; + + // Increase or reset DTX counter. + consecutive_dtx_frames_ = (dtx_frame) ? (consecutive_dtx_frames_ + 1) : (0); + + return info; +} + +size_t AudioEncoderOpusImpl::Num10msFramesPerPacket() const { + return static_cast<size_t>(rtc::CheckedDivExact(config_.frame_size_ms, 10)); +} + +size_t AudioEncoderOpusImpl::SamplesPer10msFrame() const { + return rtc::CheckedDivExact(config_.sample_rate_hz, 100) * + config_.num_channels; +} + +size_t AudioEncoderOpusImpl::SufficientOutputBufferSize() const { + // Calculate the number of bytes we expect the encoder to produce, + // then multiply by two to give a wide margin for error. + const size_t bytes_per_millisecond = + static_cast<size_t>(GetBitrateBps(config_) / (1000 * 8) + 1); + const size_t approx_encoded_bytes = + Num10msFramesPerPacket() * 10 * bytes_per_millisecond; + return 2 * approx_encoded_bytes; +} + +// If the given config is OK, recreate the Opus encoder instance with those +// settings, save the config, and return true. Otherwise, do nothing and return +// false. +bool AudioEncoderOpusImpl::RecreateEncoderInstance( + const AudioEncoderOpusConfig& config) { + if (!config.IsOk()) + return false; + config_ = config; + if (inst_) + RTC_CHECK_EQ(0, WebRtcOpus_EncoderFree(inst_)); + input_buffer_.clear(); + input_buffer_.reserve(Num10msFramesPerPacket() * SamplesPer10msFrame()); + RTC_CHECK_EQ(0, WebRtcOpus_EncoderCreate( + &inst_, config.num_channels, + config.application == + AudioEncoderOpusConfig::ApplicationMode::kVoip + ? 0 + : 1, + config.sample_rate_hz)); + const int bitrate = GetBitrateBps(config); + RTC_CHECK_EQ(0, WebRtcOpus_SetBitRate(inst_, bitrate)); + RTC_LOG(LS_VERBOSE) << "Set Opus bitrate to " << bitrate << " bps."; + if (config.fec_enabled) { + RTC_CHECK_EQ(0, WebRtcOpus_EnableFec(inst_)); + } else { + RTC_CHECK_EQ(0, WebRtcOpus_DisableFec(inst_)); + } + RTC_CHECK_EQ( + 0, WebRtcOpus_SetMaxPlaybackRate(inst_, config.max_playback_rate_hz)); + // Use the default complexity if the start bitrate is within the hysteresis + // window. + complexity_ = GetNewComplexity(config).value_or(config.complexity); + RTC_CHECK_EQ(0, WebRtcOpus_SetComplexity(inst_, complexity_)); + bitrate_changed_ = true; + if (config.dtx_enabled) { + RTC_CHECK_EQ(0, WebRtcOpus_EnableDtx(inst_)); + } else { + RTC_CHECK_EQ(0, WebRtcOpus_DisableDtx(inst_)); + } + RTC_CHECK_EQ(0, + WebRtcOpus_SetPacketLossRate( + inst_, static_cast<int32_t>(packet_loss_rate_ * 100 + .5))); + if (config.cbr_enabled) { + RTC_CHECK_EQ(0, WebRtcOpus_EnableCbr(inst_)); + } else { + RTC_CHECK_EQ(0, WebRtcOpus_DisableCbr(inst_)); + } + num_channels_to_encode_ = NumChannels(); + next_frame_length_ms_ = config_.frame_size_ms; + return true; +} + +void AudioEncoderOpusImpl::SetFrameLength(int frame_length_ms) { + if (next_frame_length_ms_ != frame_length_ms) { + RTC_LOG(LS_VERBOSE) << "Update Opus frame length " + << "from " << next_frame_length_ms_ << " ms " + << "to " << frame_length_ms << " ms."; + } + next_frame_length_ms_ = frame_length_ms; +} + +void AudioEncoderOpusImpl::SetNumChannelsToEncode( + size_t num_channels_to_encode) { + RTC_DCHECK_GT(num_channels_to_encode, 0); + RTC_DCHECK_LE(num_channels_to_encode, config_.num_channels); + + if (num_channels_to_encode_ == num_channels_to_encode) + return; + + RTC_CHECK_EQ(0, WebRtcOpus_SetForceChannels(inst_, num_channels_to_encode)); + num_channels_to_encode_ = num_channels_to_encode; +} + +void AudioEncoderOpusImpl::SetProjectedPacketLossRate(float fraction) { + fraction = std::min(std::max(fraction, 0.0f), kMaxPacketLossFraction); + if (packet_loss_rate_ != fraction) { + packet_loss_rate_ = fraction; + RTC_CHECK_EQ( + 0, WebRtcOpus_SetPacketLossRate( + inst_, static_cast<int32_t>(packet_loss_rate_ * 100 + .5))); + } +} + +void AudioEncoderOpusImpl::SetTargetBitrate(int bits_per_second) { + const int new_bitrate = rtc::SafeClamp<int>( + bits_per_second, AudioEncoderOpusConfig::kMinBitrateBps, + AudioEncoderOpusConfig::kMaxBitrateBps); + if (config_.bitrate_bps && *config_.bitrate_bps != new_bitrate) { + config_.bitrate_bps = new_bitrate; + RTC_DCHECK(config_.IsOk()); + const int bitrate = GetBitrateBps(config_); + RTC_CHECK_EQ( + 0, WebRtcOpus_SetBitRate( + inst_, GetMultipliedBitrate(bitrate, bitrate_multipliers_))); + RTC_LOG(LS_VERBOSE) << "Set Opus bitrate to " << bitrate << " bps."; + bitrate_changed_ = true; + } + + const auto new_complexity = GetNewComplexity(config_); + if (new_complexity && complexity_ != *new_complexity) { + complexity_ = *new_complexity; + RTC_CHECK_EQ(0, WebRtcOpus_SetComplexity(inst_, complexity_)); + } +} + +void AudioEncoderOpusImpl::ApplyAudioNetworkAdaptor() { + auto config = audio_network_adaptor_->GetEncoderRuntimeConfig(); + + if (config.bitrate_bps) + SetTargetBitrate(*config.bitrate_bps); + if (config.frame_length_ms) + SetFrameLength(*config.frame_length_ms); + if (config.enable_dtx) + SetDtx(*config.enable_dtx); + if (config.num_channels) + SetNumChannelsToEncode(*config.num_channels); +} + +std::unique_ptr<AudioNetworkAdaptor> +AudioEncoderOpusImpl::DefaultAudioNetworkAdaptorCreator( + absl::string_view config_string, + RtcEventLog* event_log) const { + AudioNetworkAdaptorImpl::Config config; + config.event_log = event_log; + return std::unique_ptr<AudioNetworkAdaptor>(new AudioNetworkAdaptorImpl( + config, ControllerManagerImpl::Create( + config_string, NumChannels(), supported_frame_lengths_ms(), + AudioEncoderOpusConfig::kMinBitrateBps, + num_channels_to_encode_, next_frame_length_ms_, + GetTargetBitrate(), config_.fec_enabled, GetDtx()))); +} + +void AudioEncoderOpusImpl::MaybeUpdateUplinkBandwidth() { + if (audio_network_adaptor_ && !use_stable_target_for_adaptation_) { + int64_t now_ms = rtc::TimeMillis(); + if (!bitrate_smoother_last_update_time_ || + now_ms - *bitrate_smoother_last_update_time_ >= + config_.uplink_bandwidth_update_interval_ms) { + absl::optional<float> smoothed_bitrate = bitrate_smoother_->GetAverage(); + if (smoothed_bitrate) + audio_network_adaptor_->SetUplinkBandwidth(*smoothed_bitrate); + bitrate_smoother_last_update_time_ = now_ms; + } + } +} + +ANAStats AudioEncoderOpusImpl::GetANAStats() const { + if (audio_network_adaptor_) { + return audio_network_adaptor_->GetStats(); + } + return ANAStats(); +} + +absl::optional<std::pair<TimeDelta, TimeDelta> > +AudioEncoderOpusImpl::GetFrameLengthRange() const { + if (audio_network_adaptor_) { + if (config_.supported_frame_lengths_ms.empty()) { + return absl::nullopt; + } + return {{TimeDelta::Millis(config_.supported_frame_lengths_ms.front()), + TimeDelta::Millis(config_.supported_frame_lengths_ms.back())}}; + } else { + return {{TimeDelta::Millis(config_.frame_size_ms), + TimeDelta::Millis(config_.frame_size_ms)}}; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_opus.h b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_opus.h new file mode 100644 index 0000000000..8c5c235016 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_opus.h @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_OPUS_H_ +#define MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_OPUS_H_ + +#include <functional> +#include <memory> +#include <string> +#include <vector> + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "api/audio_codecs/audio_encoder.h" +#include "api/audio_codecs/audio_format.h" +#include "api/audio_codecs/opus/audio_encoder_opus_config.h" +#include "common_audio/smoothing_filter.h" +#include "modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor.h" +#include "modules/audio_coding/codecs/opus/opus_interface.h" + +namespace webrtc { + +class RtcEventLog; + +class AudioEncoderOpusImpl final : public AudioEncoder { + public: + // Returns empty if the current bitrate falls within the hysteresis window, + // defined by complexity_threshold_bps +/- complexity_threshold_window_bps. + // Otherwise, returns the current complexity depending on whether the + // current bitrate is above or below complexity_threshold_bps. + static absl::optional<int> GetNewComplexity( + const AudioEncoderOpusConfig& config); + + // Returns OPUS_AUTO if the the current bitrate is above wideband threshold. + // Returns empty if it is below, but bandwidth coincides with the desired one. + // Otherwise returns the desired bandwidth. + static absl::optional<int> GetNewBandwidth( + const AudioEncoderOpusConfig& config, + OpusEncInst* inst); + + using AudioNetworkAdaptorCreator = + std::function<std::unique_ptr<AudioNetworkAdaptor>(absl::string_view, + RtcEventLog*)>; + + AudioEncoderOpusImpl(const AudioEncoderOpusConfig& config, int payload_type); + + // Dependency injection for testing. + AudioEncoderOpusImpl( + const AudioEncoderOpusConfig& config, + int payload_type, + const AudioNetworkAdaptorCreator& audio_network_adaptor_creator, + std::unique_ptr<SmoothingFilter> bitrate_smoother); + + AudioEncoderOpusImpl(int payload_type, const SdpAudioFormat& format); + ~AudioEncoderOpusImpl() override; + + AudioEncoderOpusImpl(const AudioEncoderOpusImpl&) = delete; + AudioEncoderOpusImpl& operator=(const AudioEncoderOpusImpl&) = delete; + + int SampleRateHz() const override; + size_t NumChannels() const override; + int RtpTimestampRateHz() const override; + size_t Num10MsFramesInNextPacket() const override; + size_t Max10MsFramesInAPacket() const override; + int GetTargetBitrate() const override; + + void Reset() override; + bool SetFec(bool enable) override; + + // Set Opus DTX. Once enabled, Opus stops transmission, when it detects + // voice being inactive. During that, it still sends 2 packets (one for + // content, one for signaling) about every 400 ms. + bool SetDtx(bool enable) override; + bool GetDtx() const override; + + bool SetApplication(Application application) override; + void SetMaxPlaybackRate(int frequency_hz) override; + bool EnableAudioNetworkAdaptor(const std::string& config_string, + RtcEventLog* event_log) override; + void DisableAudioNetworkAdaptor() override; + void OnReceivedUplinkPacketLossFraction( + float uplink_packet_loss_fraction) override; + void OnReceivedTargetAudioBitrate(int target_audio_bitrate_bps) override; + void OnReceivedUplinkBandwidth( + int target_audio_bitrate_bps, + absl::optional<int64_t> bwe_period_ms) override; + void OnReceivedUplinkAllocation(BitrateAllocationUpdate update) override; + void OnReceivedRtt(int rtt_ms) override; + void OnReceivedOverhead(size_t overhead_bytes_per_packet) override; + void SetReceiverFrameLengthRange(int min_frame_length_ms, + int max_frame_length_ms) override; + ANAStats GetANAStats() const override; + absl::optional<std::pair<TimeDelta, TimeDelta> > GetFrameLengthRange() + const override; + rtc::ArrayView<const int> supported_frame_lengths_ms() const { + return config_.supported_frame_lengths_ms; + } + + // Getters for testing. + float packet_loss_rate() const { return packet_loss_rate_; } + AudioEncoderOpusConfig::ApplicationMode application() const { + return config_.application; + } + bool fec_enabled() const { return config_.fec_enabled; } + size_t num_channels_to_encode() const { return num_channels_to_encode_; } + int next_frame_length_ms() const { return next_frame_length_ms_; } + + protected: + EncodedInfo EncodeImpl(uint32_t rtp_timestamp, + rtc::ArrayView<const int16_t> audio, + rtc::Buffer* encoded) override; + + private: + class PacketLossFractionSmoother; + + static absl::optional<AudioEncoderOpusConfig> SdpToConfig( + const SdpAudioFormat& format); + static void AppendSupportedEncoders(std::vector<AudioCodecSpec>* specs); + static AudioCodecInfo QueryAudioEncoder(const AudioEncoderOpusConfig& config); + static std::unique_ptr<AudioEncoder> MakeAudioEncoder( + const AudioEncoderOpusConfig&, + int payload_type); + + size_t Num10msFramesPerPacket() const; + size_t SamplesPer10msFrame() const; + size_t SufficientOutputBufferSize() const; + bool RecreateEncoderInstance(const AudioEncoderOpusConfig& config); + void SetFrameLength(int frame_length_ms); + void SetNumChannelsToEncode(size_t num_channels_to_encode); + void SetProjectedPacketLossRate(float fraction); + + void OnReceivedUplinkBandwidth( + int target_audio_bitrate_bps, + absl::optional<int64_t> bwe_period_ms, + absl::optional<int64_t> link_capacity_allocation); + + // TODO(minyue): remove "override" when we can deprecate + // `AudioEncoder::SetTargetBitrate`. + void SetTargetBitrate(int target_bps) override; + + void ApplyAudioNetworkAdaptor(); + std::unique_ptr<AudioNetworkAdaptor> DefaultAudioNetworkAdaptorCreator( + absl::string_view config_string, + RtcEventLog* event_log) const; + + void MaybeUpdateUplinkBandwidth(); + + AudioEncoderOpusConfig config_; + const int payload_type_; + const bool use_stable_target_for_adaptation_; + const bool adjust_bandwidth_; + bool bitrate_changed_; + // A multiplier for bitrates at 5 kbps and higher. The target bitrate + // will be multiplied by these multipliers, each multiplier is applied to a + // 1 kbps range. + std::vector<float> bitrate_multipliers_; + float packet_loss_rate_; + std::vector<int16_t> input_buffer_; + OpusEncInst* inst_; + uint32_t first_timestamp_in_buffer_; + size_t num_channels_to_encode_; + int next_frame_length_ms_; + int complexity_; + std::unique_ptr<PacketLossFractionSmoother> packet_loss_fraction_smoother_; + const AudioNetworkAdaptorCreator audio_network_adaptor_creator_; + std::unique_ptr<AudioNetworkAdaptor> audio_network_adaptor_; + absl::optional<size_t> overhead_bytes_per_packet_; + const std::unique_ptr<SmoothingFilter> bitrate_smoother_; + absl::optional<int64_t> bitrate_smoother_last_update_time_; + int consecutive_dtx_frames_; + + friend struct AudioEncoderOpus; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_OPUS_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_opus_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_opus_unittest.cc new file mode 100644 index 0000000000..a2ebe43bbe --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/audio_encoder_opus_unittest.cc @@ -0,0 +1,914 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "api/audio_codecs/opus/audio_encoder_opus.h" + +#include <array> +#include <memory> +#include <utility> + +#include "absl/strings/string_view.h" +#include "common_audio/mocks/mock_smoothing_filter.h" +#include "modules/audio_coding/audio_network_adaptor/mock/mock_audio_network_adaptor.h" +#include "modules/audio_coding/codecs/opus/audio_encoder_opus.h" +#include "modules/audio_coding/codecs/opus/opus_interface.h" +#include "modules/audio_coding/neteq/tools/audio_loop.h" +#include "rtc_base/checks.h" +#include "rtc_base/fake_clock.h" +#include "test/field_trial.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +using ::testing::NiceMock; +using ::testing::Return; + +namespace { + +constexpr int kDefaultOpusPayloadType = 105; +constexpr int kDefaultOpusRate = 32000; +constexpr int kDefaultOpusPacSize = 960; +constexpr int64_t kInitialTimeUs = 12345678; + +AudioEncoderOpusConfig CreateConfigWithParameters( + const SdpAudioFormat::Parameters& params) { + const SdpAudioFormat format("opus", 48000, 2, params); + return *AudioEncoderOpus::SdpToConfig(format); +} + +struct AudioEncoderOpusStates { + MockAudioNetworkAdaptor* mock_audio_network_adaptor; + MockSmoothingFilter* mock_bitrate_smoother; + std::unique_ptr<AudioEncoderOpusImpl> encoder; + std::unique_ptr<rtc::ScopedFakeClock> fake_clock; + AudioEncoderOpusConfig config; +}; + +std::unique_ptr<AudioEncoderOpusStates> CreateCodec(int sample_rate_hz, + size_t num_channels) { + std::unique_ptr<AudioEncoderOpusStates> states = + std::make_unique<AudioEncoderOpusStates>(); + states->mock_audio_network_adaptor = nullptr; + states->fake_clock.reset(new rtc::ScopedFakeClock()); + states->fake_clock->SetTime(Timestamp::Micros(kInitialTimeUs)); + + MockAudioNetworkAdaptor** mock_ptr = &states->mock_audio_network_adaptor; + AudioEncoderOpusImpl::AudioNetworkAdaptorCreator creator = + [mock_ptr](absl::string_view, RtcEventLog* event_log) { + std::unique_ptr<MockAudioNetworkAdaptor> adaptor( + new NiceMock<MockAudioNetworkAdaptor>()); + EXPECT_CALL(*adaptor, Die()); + *mock_ptr = adaptor.get(); + return adaptor; + }; + + AudioEncoderOpusConfig config; + config.frame_size_ms = rtc::CheckedDivExact(kDefaultOpusPacSize, 48); + config.sample_rate_hz = sample_rate_hz; + config.num_channels = num_channels; + config.bitrate_bps = kDefaultOpusRate; + config.application = num_channels == 1 + ? AudioEncoderOpusConfig::ApplicationMode::kVoip + : AudioEncoderOpusConfig::ApplicationMode::kAudio; + config.supported_frame_lengths_ms.push_back(config.frame_size_ms); + states->config = config; + + std::unique_ptr<MockSmoothingFilter> bitrate_smoother( + new MockSmoothingFilter()); + states->mock_bitrate_smoother = bitrate_smoother.get(); + + states->encoder.reset( + new AudioEncoderOpusImpl(states->config, kDefaultOpusPayloadType, creator, + std::move(bitrate_smoother))); + return states; +} + +AudioEncoderRuntimeConfig CreateEncoderRuntimeConfig() { + constexpr int kBitrate = 40000; + constexpr int kFrameLength = 60; + constexpr bool kEnableDtx = false; + constexpr size_t kNumChannels = 1; + AudioEncoderRuntimeConfig config; + config.bitrate_bps = kBitrate; + config.frame_length_ms = kFrameLength; + config.enable_dtx = kEnableDtx; + config.num_channels = kNumChannels; + return config; +} + +void CheckEncoderRuntimeConfig(const AudioEncoderOpusImpl* encoder, + const AudioEncoderRuntimeConfig& config) { + EXPECT_EQ(*config.bitrate_bps, encoder->GetTargetBitrate()); + EXPECT_EQ(*config.frame_length_ms, encoder->next_frame_length_ms()); + EXPECT_EQ(*config.enable_dtx, encoder->GetDtx()); + EXPECT_EQ(*config.num_channels, encoder->num_channels_to_encode()); +} + +// Create 10ms audio data blocks for a total packet size of "packet_size_ms". +std::unique_ptr<test::AudioLoop> Create10msAudioBlocks( + const std::unique_ptr<AudioEncoderOpusImpl>& encoder, + int packet_size_ms) { + const std::string file_name = + test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + + std::unique_ptr<test::AudioLoop> speech_data(new test::AudioLoop()); + int audio_samples_per_ms = + rtc::CheckedDivExact(encoder->SampleRateHz(), 1000); + if (!speech_data->Init( + file_name, + packet_size_ms * audio_samples_per_ms * + encoder->num_channels_to_encode(), + 10 * audio_samples_per_ms * encoder->num_channels_to_encode())) + return nullptr; + return speech_data; +} + +} // namespace + +class AudioEncoderOpusTest : public ::testing::TestWithParam<int> { + protected: + int sample_rate_hz_{GetParam()}; +}; +INSTANTIATE_TEST_SUITE_P(Param, + AudioEncoderOpusTest, + ::testing::Values(16000, 48000)); + +TEST_P(AudioEncoderOpusTest, DefaultApplicationModeMono) { + auto states = CreateCodec(sample_rate_hz_, 1); + EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kVoip, + states->encoder->application()); +} + +TEST_P(AudioEncoderOpusTest, DefaultApplicationModeStereo) { + auto states = CreateCodec(sample_rate_hz_, 2); + EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kAudio, + states->encoder->application()); +} + +TEST_P(AudioEncoderOpusTest, ChangeApplicationMode) { + auto states = CreateCodec(sample_rate_hz_, 2); + EXPECT_TRUE( + states->encoder->SetApplication(AudioEncoder::Application::kSpeech)); + EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kVoip, + states->encoder->application()); +} + +TEST_P(AudioEncoderOpusTest, ResetWontChangeApplicationMode) { + auto states = CreateCodec(sample_rate_hz_, 2); + + // Trigger a reset. + states->encoder->Reset(); + // Verify that the mode is still kAudio. + EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kAudio, + states->encoder->application()); + + // Now change to kVoip. + EXPECT_TRUE( + states->encoder->SetApplication(AudioEncoder::Application::kSpeech)); + EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kVoip, + states->encoder->application()); + + // Trigger a reset again. + states->encoder->Reset(); + // Verify that the mode is still kVoip. + EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kVoip, + states->encoder->application()); +} + +TEST_P(AudioEncoderOpusTest, ToggleDtx) { + auto states = CreateCodec(sample_rate_hz_, 2); + // Enable DTX + EXPECT_TRUE(states->encoder->SetDtx(true)); + EXPECT_TRUE(states->encoder->GetDtx()); + // Turn off DTX. + EXPECT_TRUE(states->encoder->SetDtx(false)); + EXPECT_FALSE(states->encoder->GetDtx()); +} + +TEST_P(AudioEncoderOpusTest, + OnReceivedUplinkBandwidthWithoutAudioNetworkAdaptor) { + auto states = CreateCodec(sample_rate_hz_, 1); + // Constants are replicated from audio_states->encoderopus.cc. + const int kMinBitrateBps = 6000; + const int kMaxBitrateBps = 510000; + const int kOverheadBytesPerPacket = 64; + states->encoder->OnReceivedOverhead(kOverheadBytesPerPacket); + const int kOverheadBps = 8 * kOverheadBytesPerPacket * + rtc::CheckedDivExact(48000, kDefaultOpusPacSize); + // Set a too low bitrate. + states->encoder->OnReceivedUplinkBandwidth(kMinBitrateBps + kOverheadBps - 1, + absl::nullopt); + EXPECT_EQ(kMinBitrateBps, states->encoder->GetTargetBitrate()); + // Set a too high bitrate. + states->encoder->OnReceivedUplinkBandwidth(kMaxBitrateBps + kOverheadBps + 1, + absl::nullopt); + EXPECT_EQ(kMaxBitrateBps, states->encoder->GetTargetBitrate()); + // Set the minimum rate. + states->encoder->OnReceivedUplinkBandwidth(kMinBitrateBps + kOverheadBps, + absl::nullopt); + EXPECT_EQ(kMinBitrateBps, states->encoder->GetTargetBitrate()); + // Set the maximum rate. + states->encoder->OnReceivedUplinkBandwidth(kMaxBitrateBps + kOverheadBps, + absl::nullopt); + EXPECT_EQ(kMaxBitrateBps, states->encoder->GetTargetBitrate()); + // Set rates from kMaxBitrateBps up to 32000 bps. + for (int rate = kMinBitrateBps + kOverheadBps; rate <= 32000 + kOverheadBps; + rate += 1000) { + states->encoder->OnReceivedUplinkBandwidth(rate, absl::nullopt); + EXPECT_EQ(rate - kOverheadBps, states->encoder->GetTargetBitrate()); + } +} + +TEST_P(AudioEncoderOpusTest, SetReceiverFrameLengthRange) { + auto states = CreateCodec(sample_rate_hz_, 2); + // Before calling to `SetReceiverFrameLengthRange`, + // `supported_frame_lengths_ms` should contain only the frame length being + // used. + using ::testing::ElementsAre; + EXPECT_THAT(states->encoder->supported_frame_lengths_ms(), + ElementsAre(states->encoder->next_frame_length_ms())); + states->encoder->SetReceiverFrameLengthRange(0, 12345); + states->encoder->SetReceiverFrameLengthRange(21, 60); + EXPECT_THAT(states->encoder->supported_frame_lengths_ms(), + ElementsAre(40, 60)); + states->encoder->SetReceiverFrameLengthRange(20, 59); + EXPECT_THAT(states->encoder->supported_frame_lengths_ms(), + ElementsAre(20, 40)); +} + +TEST_P(AudioEncoderOpusTest, + InvokeAudioNetworkAdaptorOnReceivedUplinkPacketLossFraction) { + auto states = CreateCodec(sample_rate_hz_, 2); + states->encoder->EnableAudioNetworkAdaptor("", nullptr); + + auto config = CreateEncoderRuntimeConfig(); + EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig()) + .WillOnce(Return(config)); + + // Since using mock audio network adaptor, any packet loss fraction is fine. + constexpr float kUplinkPacketLoss = 0.1f; + EXPECT_CALL(*states->mock_audio_network_adaptor, + SetUplinkPacketLossFraction(kUplinkPacketLoss)); + states->encoder->OnReceivedUplinkPacketLossFraction(kUplinkPacketLoss); + + CheckEncoderRuntimeConfig(states->encoder.get(), config); +} + +TEST_P(AudioEncoderOpusTest, + InvokeAudioNetworkAdaptorOnReceivedUplinkBandwidth) { + test::ScopedFieldTrials override_field_trials( + "WebRTC-Audio-StableTargetAdaptation/Disabled/"); + auto states = CreateCodec(sample_rate_hz_, 2); + states->encoder->EnableAudioNetworkAdaptor("", nullptr); + + auto config = CreateEncoderRuntimeConfig(); + EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig()) + .WillOnce(Return(config)); + + // Since using mock audio network adaptor, any target audio bitrate is fine. + constexpr int kTargetAudioBitrate = 30000; + constexpr int64_t kProbingIntervalMs = 3000; + EXPECT_CALL(*states->mock_audio_network_adaptor, + SetTargetAudioBitrate(kTargetAudioBitrate)); + EXPECT_CALL(*states->mock_bitrate_smoother, + SetTimeConstantMs(kProbingIntervalMs * 4)); + EXPECT_CALL(*states->mock_bitrate_smoother, AddSample(kTargetAudioBitrate)); + states->encoder->OnReceivedUplinkBandwidth(kTargetAudioBitrate, + kProbingIntervalMs); + + CheckEncoderRuntimeConfig(states->encoder.get(), config); +} + +TEST_P(AudioEncoderOpusTest, + InvokeAudioNetworkAdaptorOnReceivedUplinkAllocation) { + auto states = CreateCodec(sample_rate_hz_, 2); + states->encoder->EnableAudioNetworkAdaptor("", nullptr); + + auto config = CreateEncoderRuntimeConfig(); + EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig()) + .WillOnce(Return(config)); + + BitrateAllocationUpdate update; + update.target_bitrate = DataRate::BitsPerSec(30000); + update.stable_target_bitrate = DataRate::BitsPerSec(20000); + update.bwe_period = TimeDelta::Millis(200); + EXPECT_CALL(*states->mock_audio_network_adaptor, + SetTargetAudioBitrate(update.target_bitrate.bps())); + EXPECT_CALL(*states->mock_audio_network_adaptor, + SetUplinkBandwidth(update.stable_target_bitrate.bps())); + states->encoder->OnReceivedUplinkAllocation(update); + + CheckEncoderRuntimeConfig(states->encoder.get(), config); +} + +TEST_P(AudioEncoderOpusTest, InvokeAudioNetworkAdaptorOnReceivedRtt) { + auto states = CreateCodec(sample_rate_hz_, 2); + states->encoder->EnableAudioNetworkAdaptor("", nullptr); + + auto config = CreateEncoderRuntimeConfig(); + EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig()) + .WillOnce(Return(config)); + + // Since using mock audio network adaptor, any rtt is fine. + constexpr int kRtt = 30; + EXPECT_CALL(*states->mock_audio_network_adaptor, SetRtt(kRtt)); + states->encoder->OnReceivedRtt(kRtt); + + CheckEncoderRuntimeConfig(states->encoder.get(), config); +} + +TEST_P(AudioEncoderOpusTest, InvokeAudioNetworkAdaptorOnReceivedOverhead) { + auto states = CreateCodec(sample_rate_hz_, 2); + states->encoder->EnableAudioNetworkAdaptor("", nullptr); + + auto config = CreateEncoderRuntimeConfig(); + EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig()) + .WillOnce(Return(config)); + + // Since using mock audio network adaptor, any overhead is fine. + constexpr size_t kOverhead = 64; + EXPECT_CALL(*states->mock_audio_network_adaptor, SetOverhead(kOverhead)); + states->encoder->OnReceivedOverhead(kOverhead); + + CheckEncoderRuntimeConfig(states->encoder.get(), config); +} + +TEST_P(AudioEncoderOpusTest, + PacketLossFractionSmoothedOnSetUplinkPacketLossFraction) { + auto states = CreateCodec(sample_rate_hz_, 2); + + // The values are carefully chosen so that if no smoothing is made, the test + // will fail. + constexpr float kPacketLossFraction_1 = 0.02f; + constexpr float kPacketLossFraction_2 = 0.198f; + // `kSecondSampleTimeMs` is chosen to ease the calculation since + // 0.9999 ^ 6931 = 0.5. + constexpr int64_t kSecondSampleTimeMs = 6931; + + // First time, no filtering. + states->encoder->OnReceivedUplinkPacketLossFraction(kPacketLossFraction_1); + EXPECT_FLOAT_EQ(0.02f, states->encoder->packet_loss_rate()); + + states->fake_clock->AdvanceTime(TimeDelta::Millis(kSecondSampleTimeMs)); + states->encoder->OnReceivedUplinkPacketLossFraction(kPacketLossFraction_2); + + // Now the output of packet loss fraction smoother should be + // (0.02 + 0.198) / 2 = 0.109. + EXPECT_NEAR(0.109f, states->encoder->packet_loss_rate(), 0.001); +} + +TEST_P(AudioEncoderOpusTest, PacketLossRateUpperBounded) { + auto states = CreateCodec(sample_rate_hz_, 2); + + states->encoder->OnReceivedUplinkPacketLossFraction(0.5); + EXPECT_FLOAT_EQ(0.2f, states->encoder->packet_loss_rate()); +} + +TEST_P(AudioEncoderOpusTest, DoNotInvokeSetTargetBitrateIfOverheadUnknown) { + auto states = CreateCodec(sample_rate_hz_, 2); + + states->encoder->OnReceivedUplinkBandwidth(kDefaultOpusRate * 2, + absl::nullopt); + + // Since `OnReceivedOverhead` has not been called, the codec bitrate should + // not change. + EXPECT_EQ(kDefaultOpusRate, states->encoder->GetTargetBitrate()); +} + +// Verifies that the complexity adaptation in the config works as intended. +TEST(AudioEncoderOpusTest, ConfigComplexityAdaptation) { + AudioEncoderOpusConfig config; + config.low_rate_complexity = 8; + config.complexity = 6; + + // Bitrate within hysteresis window. Expect empty output. + config.bitrate_bps = 12500; + EXPECT_EQ(absl::nullopt, AudioEncoderOpusImpl::GetNewComplexity(config)); + + // Bitrate below hysteresis window. Expect higher complexity. + config.bitrate_bps = 10999; + EXPECT_EQ(8, AudioEncoderOpusImpl::GetNewComplexity(config)); + + // Bitrate within hysteresis window. Expect empty output. + config.bitrate_bps = 12500; + EXPECT_EQ(absl::nullopt, AudioEncoderOpusImpl::GetNewComplexity(config)); + + // Bitrate above hysteresis window. Expect lower complexity. + config.bitrate_bps = 14001; + EXPECT_EQ(6, AudioEncoderOpusImpl::GetNewComplexity(config)); +} + +// Verifies that the bandwidth adaptation in the config works as intended. +TEST_P(AudioEncoderOpusTest, ConfigBandwidthAdaptation) { + AudioEncoderOpusConfig config; + const size_t opus_rate_khz = rtc::CheckedDivExact(sample_rate_hz_, 1000); + const std::vector<int16_t> silence( + opus_rate_khz * config.frame_size_ms * config.num_channels, 0); + constexpr size_t kMaxBytes = 1000; + uint8_t bitstream[kMaxBytes]; + + OpusEncInst* inst; + EXPECT_EQ(0, WebRtcOpus_EncoderCreate( + &inst, config.num_channels, + config.application == + AudioEncoderOpusConfig::ApplicationMode::kVoip + ? 0 + : 1, + sample_rate_hz_)); + + // Bitrate below minmum wideband. Expect narrowband. + config.bitrate_bps = absl::optional<int>(7999); + auto bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst); + EXPECT_EQ(absl::optional<int>(OPUS_BANDWIDTH_NARROWBAND), bandwidth); + WebRtcOpus_SetBandwidth(inst, *bandwidth); + // It is necessary to encode here because Opus has some logic in the encoder + // that goes from the user-set bandwidth to the used and returned one. + WebRtcOpus_Encode(inst, silence.data(), + rtc::CheckedDivExact(silence.size(), config.num_channels), + kMaxBytes, bitstream); + + // Bitrate not yet above maximum narrowband. Expect empty. + config.bitrate_bps = absl::optional<int>(9000); + bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst); + EXPECT_EQ(absl::optional<int>(), bandwidth); + + // Bitrate above maximum narrowband. Expect wideband. + config.bitrate_bps = absl::optional<int>(9001); + bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst); + EXPECT_EQ(absl::optional<int>(OPUS_BANDWIDTH_WIDEBAND), bandwidth); + WebRtcOpus_SetBandwidth(inst, *bandwidth); + // It is necessary to encode here because Opus has some logic in the encoder + // that goes from the user-set bandwidth to the used and returned one. + WebRtcOpus_Encode(inst, silence.data(), + rtc::CheckedDivExact(silence.size(), config.num_channels), + kMaxBytes, bitstream); + + // Bitrate not yet below minimum wideband. Expect empty. + config.bitrate_bps = absl::optional<int>(8000); + bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst); + EXPECT_EQ(absl::optional<int>(), bandwidth); + + // Bitrate above automatic threshold. Expect automatic. + config.bitrate_bps = absl::optional<int>(12001); + bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst); + EXPECT_EQ(absl::optional<int>(OPUS_AUTO), bandwidth); + + EXPECT_EQ(0, WebRtcOpus_EncoderFree(inst)); +} + +TEST_P(AudioEncoderOpusTest, EmptyConfigDoesNotAffectEncoderSettings) { + auto states = CreateCodec(sample_rate_hz_, 2); + states->encoder->EnableAudioNetworkAdaptor("", nullptr); + + auto config = CreateEncoderRuntimeConfig(); + AudioEncoderRuntimeConfig empty_config; + + EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig()) + .WillOnce(Return(config)) + .WillOnce(Return(empty_config)); + + constexpr size_t kOverhead = 64; + EXPECT_CALL(*states->mock_audio_network_adaptor, SetOverhead(kOverhead)) + .Times(2); + states->encoder->OnReceivedOverhead(kOverhead); + states->encoder->OnReceivedOverhead(kOverhead); + + CheckEncoderRuntimeConfig(states->encoder.get(), config); +} + +TEST_P(AudioEncoderOpusTest, UpdateUplinkBandwidthInAudioNetworkAdaptor) { + test::ScopedFieldTrials override_field_trials( + "WebRTC-Audio-StableTargetAdaptation/Disabled/"); + auto states = CreateCodec(sample_rate_hz_, 2); + states->encoder->EnableAudioNetworkAdaptor("", nullptr); + const size_t opus_rate_khz = rtc::CheckedDivExact(sample_rate_hz_, 1000); + const std::vector<int16_t> audio(opus_rate_khz * 10 * 2, 0); + rtc::Buffer encoded; + EXPECT_CALL(*states->mock_bitrate_smoother, GetAverage()) + .WillOnce(Return(50000)); + EXPECT_CALL(*states->mock_audio_network_adaptor, SetUplinkBandwidth(50000)); + states->encoder->Encode( + 0, rtc::ArrayView<const int16_t>(audio.data(), audio.size()), &encoded); + + // Repeat update uplink bandwidth tests. + for (int i = 0; i < 5; i++) { + // Don't update till it is time to update again. + states->fake_clock->AdvanceTime(TimeDelta::Millis( + states->config.uplink_bandwidth_update_interval_ms - 1)); + states->encoder->Encode( + 0, rtc::ArrayView<const int16_t>(audio.data(), audio.size()), &encoded); + + // Update when it is time to update. + EXPECT_CALL(*states->mock_bitrate_smoother, GetAverage()) + .WillOnce(Return(40000)); + EXPECT_CALL(*states->mock_audio_network_adaptor, SetUplinkBandwidth(40000)); + states->fake_clock->AdvanceTime(TimeDelta::Millis(1)); + states->encoder->Encode( + 0, rtc::ArrayView<const int16_t>(audio.data(), audio.size()), &encoded); + } +} + +TEST_P(AudioEncoderOpusTest, EncodeAtMinBitrate) { + auto states = CreateCodec(sample_rate_hz_, 1); + constexpr int kNumPacketsToEncode = 2; + auto audio_frames = + Create10msAudioBlocks(states->encoder, kNumPacketsToEncode * 20); + ASSERT_TRUE(audio_frames) << "Create10msAudioBlocks failed"; + rtc::Buffer encoded; + uint32_t rtp_timestamp = 12345; // Just a number not important to this test. + + states->encoder->OnReceivedUplinkBandwidth(0, absl::nullopt); + for (int packet_index = 0; packet_index < kNumPacketsToEncode; + packet_index++) { + // Make sure we are not encoding before we have enough data for + // a 20ms packet. + for (int index = 0; index < 1; index++) { + states->encoder->Encode(rtp_timestamp, audio_frames->GetNextBlock(), + &encoded); + EXPECT_EQ(0u, encoded.size()); + } + + // Should encode now. + states->encoder->Encode(rtp_timestamp, audio_frames->GetNextBlock(), + &encoded); + EXPECT_GT(encoded.size(), 0u); + encoded.Clear(); + } +} + +TEST(AudioEncoderOpusTest, TestConfigDefaults) { + const auto config_opt = AudioEncoderOpus::SdpToConfig({"opus", 48000, 2}); + ASSERT_TRUE(config_opt); + EXPECT_EQ(48000, config_opt->max_playback_rate_hz); + EXPECT_EQ(1u, config_opt->num_channels); + EXPECT_FALSE(config_opt->fec_enabled); + EXPECT_FALSE(config_opt->dtx_enabled); + EXPECT_EQ(20, config_opt->frame_size_ms); +} + +TEST(AudioEncoderOpusTest, TestConfigFromParams) { + const auto config1 = CreateConfigWithParameters({{"stereo", "0"}}); + EXPECT_EQ(1U, config1.num_channels); + + const auto config2 = CreateConfigWithParameters({{"stereo", "1"}}); + EXPECT_EQ(2U, config2.num_channels); + + const auto config3 = CreateConfigWithParameters({{"useinbandfec", "0"}}); + EXPECT_FALSE(config3.fec_enabled); + + const auto config4 = CreateConfigWithParameters({{"useinbandfec", "1"}}); + EXPECT_TRUE(config4.fec_enabled); + + const auto config5 = CreateConfigWithParameters({{"usedtx", "0"}}); + EXPECT_FALSE(config5.dtx_enabled); + + const auto config6 = CreateConfigWithParameters({{"usedtx", "1"}}); + EXPECT_TRUE(config6.dtx_enabled); + + const auto config7 = CreateConfigWithParameters({{"cbr", "0"}}); + EXPECT_FALSE(config7.cbr_enabled); + + const auto config8 = CreateConfigWithParameters({{"cbr", "1"}}); + EXPECT_TRUE(config8.cbr_enabled); + + const auto config9 = + CreateConfigWithParameters({{"maxplaybackrate", "12345"}}); + EXPECT_EQ(12345, config9.max_playback_rate_hz); + + const auto config10 = + CreateConfigWithParameters({{"maxaveragebitrate", "96000"}}); + EXPECT_EQ(96000, config10.bitrate_bps); + + const auto config11 = CreateConfigWithParameters({{"maxptime", "40"}}); + for (int frame_length : config11.supported_frame_lengths_ms) { + EXPECT_LE(frame_length, 40); + } + + const auto config12 = CreateConfigWithParameters({{"minptime", "40"}}); + for (int frame_length : config12.supported_frame_lengths_ms) { + EXPECT_GE(frame_length, 40); + } + + const auto config13 = CreateConfigWithParameters({{"ptime", "40"}}); + EXPECT_EQ(40, config13.frame_size_ms); + + constexpr int kMinSupportedFrameLength = 10; + constexpr int kMaxSupportedFrameLength = + WEBRTC_OPUS_SUPPORT_120MS_PTIME ? 120 : 60; + + const auto config14 = CreateConfigWithParameters({{"ptime", "1"}}); + EXPECT_EQ(kMinSupportedFrameLength, config14.frame_size_ms); + + const auto config15 = CreateConfigWithParameters({{"ptime", "2000"}}); + EXPECT_EQ(kMaxSupportedFrameLength, config15.frame_size_ms); +} + +TEST(AudioEncoderOpusTest, TestConfigFromInvalidParams) { + const webrtc::SdpAudioFormat format("opus", 48000, 2); + const auto default_config = *AudioEncoderOpus::SdpToConfig(format); +#if WEBRTC_OPUS_SUPPORT_120MS_PTIME + const std::vector<int> default_supported_frame_lengths_ms({20, 40, 60, 120}); +#else + const std::vector<int> default_supported_frame_lengths_ms({20, 40, 60}); +#endif + + AudioEncoderOpusConfig config; + config = CreateConfigWithParameters({{"stereo", "invalid"}}); + EXPECT_EQ(default_config.num_channels, config.num_channels); + + config = CreateConfigWithParameters({{"useinbandfec", "invalid"}}); + EXPECT_EQ(default_config.fec_enabled, config.fec_enabled); + + config = CreateConfigWithParameters({{"usedtx", "invalid"}}); + EXPECT_EQ(default_config.dtx_enabled, config.dtx_enabled); + + config = CreateConfigWithParameters({{"cbr", "invalid"}}); + EXPECT_EQ(default_config.dtx_enabled, config.dtx_enabled); + + config = CreateConfigWithParameters({{"maxplaybackrate", "0"}}); + EXPECT_EQ(default_config.max_playback_rate_hz, config.max_playback_rate_hz); + + config = CreateConfigWithParameters({{"maxplaybackrate", "-23"}}); + EXPECT_EQ(default_config.max_playback_rate_hz, config.max_playback_rate_hz); + + config = CreateConfigWithParameters({{"maxplaybackrate", "not a number!"}}); + EXPECT_EQ(default_config.max_playback_rate_hz, config.max_playback_rate_hz); + + config = CreateConfigWithParameters({{"maxaveragebitrate", "0"}}); + EXPECT_EQ(6000, config.bitrate_bps); + + config = CreateConfigWithParameters({{"maxaveragebitrate", "-1000"}}); + EXPECT_EQ(6000, config.bitrate_bps); + + config = CreateConfigWithParameters({{"maxaveragebitrate", "1024000"}}); + EXPECT_EQ(510000, config.bitrate_bps); + + config = CreateConfigWithParameters({{"maxaveragebitrate", "not a number!"}}); + EXPECT_EQ(default_config.bitrate_bps, config.bitrate_bps); + + config = CreateConfigWithParameters({{"maxptime", "invalid"}}); + EXPECT_EQ(default_supported_frame_lengths_ms, + config.supported_frame_lengths_ms); + + config = CreateConfigWithParameters({{"minptime", "invalid"}}); + EXPECT_EQ(default_supported_frame_lengths_ms, + config.supported_frame_lengths_ms); + + config = CreateConfigWithParameters({{"ptime", "invalid"}}); + EXPECT_EQ(default_supported_frame_lengths_ms, + config.supported_frame_lengths_ms); +} + +TEST(AudioEncoderOpusTest, GetFrameLenghtRange) { + AudioEncoderOpusConfig config = + CreateConfigWithParameters({{"maxptime", "10"}, {"ptime", "10"}}); + std::unique_ptr<AudioEncoder> encoder = + AudioEncoderOpus::MakeAudioEncoder(config, kDefaultOpusPayloadType); + auto ptime = webrtc::TimeDelta::Millis(10); + absl::optional<std::pair<webrtc::TimeDelta, webrtc::TimeDelta>> range = { + {ptime, ptime}}; + EXPECT_EQ(encoder->GetFrameLengthRange(), range); +} + +// Test that bitrate will be overridden by the "maxaveragebitrate" parameter. +// Also test that the "maxaveragebitrate" can't be set to values outside the +// range of 6000 and 510000 +TEST(AudioEncoderOpusTest, SetSendCodecOpusMaxAverageBitrate) { + // Ignore if less than 6000. + const auto config1 = AudioEncoderOpus::SdpToConfig( + {"opus", 48000, 2, {{"maxaveragebitrate", "5999"}}}); + EXPECT_EQ(6000, config1->bitrate_bps); + + // Ignore if larger than 510000. + const auto config2 = AudioEncoderOpus::SdpToConfig( + {"opus", 48000, 2, {{"maxaveragebitrate", "510001"}}}); + EXPECT_EQ(510000, config2->bitrate_bps); + + const auto config3 = AudioEncoderOpus::SdpToConfig( + {"opus", 48000, 2, {{"maxaveragebitrate", "200000"}}}); + EXPECT_EQ(200000, config3->bitrate_bps); +} + +// Test maxplaybackrate <= 8000 triggers Opus narrow band mode. +TEST(AudioEncoderOpusTest, SetMaxPlaybackRateNb) { + auto config = CreateConfigWithParameters({{"maxplaybackrate", "8000"}}); + EXPECT_EQ(8000, config.max_playback_rate_hz); + EXPECT_EQ(12000, config.bitrate_bps); + + config = CreateConfigWithParameters( + {{"maxplaybackrate", "8000"}, {"stereo", "1"}}); + EXPECT_EQ(8000, config.max_playback_rate_hz); + EXPECT_EQ(24000, config.bitrate_bps); +} + +// Test 8000 < maxplaybackrate <= 12000 triggers Opus medium band mode. +TEST(AudioEncoderOpusTest, SetMaxPlaybackRateMb) { + auto config = CreateConfigWithParameters({{"maxplaybackrate", "8001"}}); + EXPECT_EQ(8001, config.max_playback_rate_hz); + EXPECT_EQ(20000, config.bitrate_bps); + + config = CreateConfigWithParameters( + {{"maxplaybackrate", "8001"}, {"stereo", "1"}}); + EXPECT_EQ(8001, config.max_playback_rate_hz); + EXPECT_EQ(40000, config.bitrate_bps); +} + +// Test 12000 < maxplaybackrate <= 16000 triggers Opus wide band mode. +TEST(AudioEncoderOpusTest, SetMaxPlaybackRateWb) { + auto config = CreateConfigWithParameters({{"maxplaybackrate", "12001"}}); + EXPECT_EQ(12001, config.max_playback_rate_hz); + EXPECT_EQ(20000, config.bitrate_bps); + + config = CreateConfigWithParameters( + {{"maxplaybackrate", "12001"}, {"stereo", "1"}}); + EXPECT_EQ(12001, config.max_playback_rate_hz); + EXPECT_EQ(40000, config.bitrate_bps); +} + +// Test 16000 < maxplaybackrate <= 24000 triggers Opus super wide band mode. +TEST(AudioEncoderOpusTest, SetMaxPlaybackRateSwb) { + auto config = CreateConfigWithParameters({{"maxplaybackrate", "16001"}}); + EXPECT_EQ(16001, config.max_playback_rate_hz); + EXPECT_EQ(32000, config.bitrate_bps); + + config = CreateConfigWithParameters( + {{"maxplaybackrate", "16001"}, {"stereo", "1"}}); + EXPECT_EQ(16001, config.max_playback_rate_hz); + EXPECT_EQ(64000, config.bitrate_bps); +} + +// Test 24000 < maxplaybackrate triggers Opus full band mode. +TEST(AudioEncoderOpusTest, SetMaxPlaybackRateFb) { + auto config = CreateConfigWithParameters({{"maxplaybackrate", "24001"}}); + EXPECT_EQ(24001, config.max_playback_rate_hz); + EXPECT_EQ(32000, config.bitrate_bps); + + config = CreateConfigWithParameters( + {{"maxplaybackrate", "24001"}, {"stereo", "1"}}); + EXPECT_EQ(24001, config.max_playback_rate_hz); + EXPECT_EQ(64000, config.bitrate_bps); +} + +TEST_P(AudioEncoderOpusTest, OpusFlagDtxAsNonSpeech) { + // Create encoder with DTX enabled. + AudioEncoderOpusConfig config; + config.dtx_enabled = true; + config.sample_rate_hz = sample_rate_hz_; + constexpr int payload_type = 17; + const auto encoder = AudioEncoderOpus::MakeAudioEncoder(config, payload_type); + + // Open file containing speech and silence. + const std::string kInputFileName = + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + test::AudioLoop audio_loop; + // Use the file as if it were sampled at our desired input rate. + const size_t max_loop_length_samples = + sample_rate_hz_ * 10; // Max 10 second loop. + const size_t input_block_size_samples = + 10 * sample_rate_hz_ / 1000; // 10 ms. + EXPECT_TRUE(audio_loop.Init(kInputFileName, max_loop_length_samples, + input_block_size_samples)); + + // Encode. + AudioEncoder::EncodedInfo info; + rtc::Buffer encoded(500); + int nonspeech_frames = 0; + int max_nonspeech_frames = 0; + int dtx_frames = 0; + int max_dtx_frames = 0; + uint32_t rtp_timestamp = 0u; + for (size_t i = 0; i < 500; ++i) { + encoded.Clear(); + + // Every second call to the encoder will generate an Opus packet. + for (int j = 0; j < 2; j++) { + info = + encoder->Encode(rtp_timestamp, audio_loop.GetNextBlock(), &encoded); + rtp_timestamp += input_block_size_samples; + } + + // Bookkeeping of number of DTX frames. + if (info.encoded_bytes <= 2) { + ++dtx_frames; + } else { + if (dtx_frames > max_dtx_frames) + max_dtx_frames = dtx_frames; + dtx_frames = 0; + } + + // Bookkeeping of number of non-speech frames. + if (info.speech == 0) { + ++nonspeech_frames; + } else { + if (nonspeech_frames > max_nonspeech_frames) + max_nonspeech_frames = nonspeech_frames; + nonspeech_frames = 0; + } + } + + // Maximum number of consecutive non-speech packets should exceed 15. + EXPECT_GT(max_nonspeech_frames, 15); +} + +TEST(AudioEncoderOpusTest, OpusDtxFilteringHighEnergyRefreshPackets) { + test::ScopedFieldTrials override_field_trials( + "WebRTC-Audio-OpusAvoidNoisePumpingDuringDtx/Enabled/"); + const std::string kInputFileName = + webrtc::test::ResourcePath("audio_coding/testfile16kHz", "pcm"); + constexpr int kSampleRateHz = 16000; + AudioEncoderOpusConfig config; + config.dtx_enabled = true; + config.sample_rate_hz = kSampleRateHz; + constexpr int payload_type = 17; + const auto encoder = AudioEncoderOpus::MakeAudioEncoder(config, payload_type); + test::AudioLoop audio_loop; + constexpr size_t kMaxLoopLengthSaples = kSampleRateHz * 11.6f; + constexpr size_t kInputBlockSizeSamples = kSampleRateHz / 100; + EXPECT_TRUE(audio_loop.Init(kInputFileName, kMaxLoopLengthSaples, + kInputBlockSizeSamples)); + AudioEncoder::EncodedInfo info; + rtc::Buffer encoded(500); + // Encode the audio file and store the last part that corresponds to silence. + constexpr size_t kSilenceDurationSamples = kSampleRateHz * 0.2f; + std::array<int16_t, kSilenceDurationSamples> silence; + uint32_t rtp_timestamp = 0; + bool last_packet_dtx_frame = false; + bool opus_entered_dtx = false; + bool silence_filled = false; + size_t timestamp_start_silence = 0; + while (!silence_filled && rtp_timestamp < kMaxLoopLengthSaples) { + encoded.Clear(); + // Every second call to the encoder will generate an Opus packet. + for (int j = 0; j < 2; j++) { + auto next_frame = audio_loop.GetNextBlock(); + info = encoder->Encode(rtp_timestamp, next_frame, &encoded); + if (opus_entered_dtx) { + size_t silence_frame_start = rtp_timestamp - timestamp_start_silence; + silence_filled = silence_frame_start >= kSilenceDurationSamples; + if (!silence_filled) { + std::copy(next_frame.begin(), next_frame.end(), + silence.begin() + silence_frame_start); + } + } + rtp_timestamp += kInputBlockSizeSamples; + } + EXPECT_TRUE(info.encoded_bytes > 0 || last_packet_dtx_frame); + last_packet_dtx_frame = info.encoded_bytes > 0 ? info.encoded_bytes <= 2 + : last_packet_dtx_frame; + if (info.encoded_bytes <= 2 && !opus_entered_dtx) { + timestamp_start_silence = rtp_timestamp; + } + opus_entered_dtx = info.encoded_bytes <= 2; + } + + EXPECT_TRUE(silence_filled); + // The copied 200 ms of silence is used for creating 6 bursts that are fed to + // the encoder, the first three ones with a larger energy and the last three + // with a lower energy. This test verifies that the encoder just sends refresh + // DTX packets during the last bursts. + int number_non_empty_packets_during_increase = 0; + int number_non_empty_packets_during_decrease = 0; + for (size_t burst = 0; burst < 6; ++burst) { + uint32_t rtp_timestamp_start = rtp_timestamp; + const bool increase_noise = burst < 3; + const float gain = increase_noise ? 1.4f : 0.0f; + while (rtp_timestamp < rtp_timestamp_start + kSilenceDurationSamples) { + encoded.Clear(); + // Every second call to the encoder will generate an Opus packet. + for (int j = 0; j < 2; j++) { + std::array<int16_t, kInputBlockSizeSamples> silence_frame; + size_t silence_frame_start = rtp_timestamp - rtp_timestamp_start; + std::transform( + silence.begin() + silence_frame_start, + silence.begin() + silence_frame_start + kInputBlockSizeSamples, + silence_frame.begin(), [gain](float s) { return gain * s; }); + info = encoder->Encode(rtp_timestamp, silence_frame, &encoded); + rtp_timestamp += kInputBlockSizeSamples; + } + EXPECT_TRUE(info.encoded_bytes > 0 || last_packet_dtx_frame); + last_packet_dtx_frame = info.encoded_bytes > 0 ? info.encoded_bytes <= 2 + : last_packet_dtx_frame; + // Tracking the number of non empty packets. + if (increase_noise && info.encoded_bytes > 2) { + number_non_empty_packets_during_increase++; + } + if (!increase_noise && info.encoded_bytes > 2) { + number_non_empty_packets_during_decrease++; + } + } + } + // Check that the refresh DTX packets are just sent during the decrease energy + // region. + EXPECT_EQ(number_non_empty_packets_during_increase, 0); + EXPECT_GT(number_non_empty_packets_during_decrease, 0); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_bandwidth_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_bandwidth_unittest.cc new file mode 100644 index 0000000000..38b60c6187 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_bandwidth_unittest.cc @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "api/audio_codecs/opus/audio_decoder_opus.h" +#include "api/audio_codecs/opus/audio_encoder_opus.h" +#include "common_audio/include/audio_util.h" +#include "common_audio/window_generator.h" +#include "modules/audio_coding/codecs/opus/test/lapped_transform.h" +#include "modules/audio_coding/neteq/tools/audio_loop.h" +#include "test/field_trial.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace { + +constexpr size_t kNumChannels = 1u; +constexpr int kSampleRateHz = 48000; +constexpr size_t kMaxLoopLengthSamples = kSampleRateHz * 50; // 50 seconds. +constexpr size_t kInputBlockSizeSamples = 10 * kSampleRateHz / 1000; // 10 ms +constexpr size_t kOutputBlockSizeSamples = 20 * kSampleRateHz / 1000; // 20 ms +constexpr size_t kFftSize = 1024; +constexpr size_t kNarrowbandSize = 4000 * kFftSize / kSampleRateHz; +constexpr float kKbdAlpha = 1.5f; + +class PowerRatioEstimator : public LappedTransform::Callback { + public: + PowerRatioEstimator() : low_pow_(0.f), high_pow_(0.f) { + WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); + transform_.reset(new LappedTransform(kNumChannels, 0u, + kInputBlockSizeSamples, window_, + kFftSize, kFftSize / 2, this)); + } + + void ProcessBlock(float* data) { transform_->ProcessChunk(&data, nullptr); } + + float PowerRatio() { return high_pow_ / low_pow_; } + + protected: + void ProcessAudioBlock(const std::complex<float>* const* input, + size_t num_input_channels, + size_t num_freq_bins, + size_t num_output_channels, + std::complex<float>* const* output) override { + float low_pow = 0.f; + float high_pow = 0.f; + for (size_t i = 0u; i < num_input_channels; ++i) { + for (size_t j = 0u; j < kNarrowbandSize; ++j) { + float low_mag = std::abs(input[i][j]); + low_pow += low_mag * low_mag; + float high_mag = std::abs(input[i][j + kNarrowbandSize]); + high_pow += high_mag * high_mag; + } + } + low_pow_ += low_pow / (num_input_channels * kFftSize); + high_pow_ += high_pow / (num_input_channels * kFftSize); + } + + private: + std::unique_ptr<LappedTransform> transform_; + float window_[kFftSize]; + float low_pow_; + float high_pow_; +}; + +float EncodedPowerRatio(AudioEncoder* encoder, + AudioDecoder* decoder, + test::AudioLoop* audio_loop) { + // Encode and decode. + uint32_t rtp_timestamp = 0u; + constexpr size_t kBufferSize = 500; + rtc::Buffer encoded(kBufferSize); + std::vector<int16_t> decoded(kOutputBlockSizeSamples); + std::vector<float> decoded_float(kOutputBlockSizeSamples); + AudioDecoder::SpeechType speech_type = AudioDecoder::kSpeech; + PowerRatioEstimator power_ratio_estimator; + for (size_t i = 0; i < 1000; ++i) { + encoded.Clear(); + AudioEncoder::EncodedInfo encoder_info = + encoder->Encode(rtp_timestamp, audio_loop->GetNextBlock(), &encoded); + rtp_timestamp += kInputBlockSizeSamples; + if (encoded.size() > 0) { + int decoder_info = decoder->Decode( + encoded.data(), encoded.size(), kSampleRateHz, + decoded.size() * sizeof(decoded[0]), decoded.data(), &speech_type); + if (decoder_info > 0) { + S16ToFloat(decoded.data(), decoded.size(), decoded_float.data()); + power_ratio_estimator.ProcessBlock(decoded_float.data()); + } + } + } + return power_ratio_estimator.PowerRatio(); +} + +} // namespace + +// TODO(ivoc): Remove this test, WebRTC-AdjustOpusBandwidth is obsolete. +TEST(BandwidthAdaptationTest, BandwidthAdaptationTest) { + test::ScopedFieldTrials override_field_trials( + "WebRTC-AdjustOpusBandwidth/Enabled/"); + + constexpr float kMaxNarrowbandRatio = 0.0035f; + constexpr float kMinWidebandRatio = 0.01f; + + // Create encoder. + AudioEncoderOpusConfig enc_config; + enc_config.bitrate_bps = absl::optional<int>(7999); + enc_config.num_channels = kNumChannels; + constexpr int payload_type = 17; + auto encoder = AudioEncoderOpus::MakeAudioEncoder(enc_config, payload_type); + + // Create decoder. + AudioDecoderOpus::Config dec_config; + dec_config.num_channels = kNumChannels; + auto decoder = AudioDecoderOpus::MakeAudioDecoder(dec_config); + + // Open speech file. + const std::string kInputFileName = + webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz", "pcm"); + test::AudioLoop audio_loop; + EXPECT_EQ(kSampleRateHz, encoder->SampleRateHz()); + ASSERT_TRUE(audio_loop.Init(kInputFileName, kMaxLoopLengthSamples, + kInputBlockSizeSamples)); + + EXPECT_LT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop), + kMaxNarrowbandRatio); + + encoder->OnReceivedTargetAudioBitrate(9000); + EXPECT_LT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop), + kMaxNarrowbandRatio); + + encoder->OnReceivedTargetAudioBitrate(9001); + EXPECT_GT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop), + kMinWidebandRatio); + + encoder->OnReceivedTargetAudioBitrate(8000); + EXPECT_GT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop), + kMinWidebandRatio); + + encoder->OnReceivedTargetAudioBitrate(12001); + EXPECT_GT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop), + kMinWidebandRatio); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_complexity_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_complexity_unittest.cc new file mode 100644 index 0000000000..e8c131092c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_complexity_unittest.cc @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "api/audio_codecs/opus/audio_encoder_opus.h" +#include "api/test/metrics/global_metrics_logger_and_exporter.h" +#include "api/test/metrics/metric.h" +#include "modules/audio_coding/neteq/tools/audio_loop.h" +#include "rtc_base/time_utils.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace { + +using ::webrtc::test::GetGlobalMetricsLogger; +using ::webrtc::test::ImprovementDirection; +using ::webrtc::test::Unit; + +int64_t RunComplexityTest(const AudioEncoderOpusConfig& config) { + // Create encoder. + constexpr int payload_type = 17; + const auto encoder = AudioEncoderOpus::MakeAudioEncoder(config, payload_type); + // Open speech file. + const std::string kInputFileName = + webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz", "pcm"); + test::AudioLoop audio_loop; + constexpr int kSampleRateHz = 48000; + EXPECT_EQ(kSampleRateHz, encoder->SampleRateHz()); + constexpr size_t kMaxLoopLengthSamples = + kSampleRateHz * 10; // 10 second loop. + constexpr size_t kInputBlockSizeSamples = + 10 * kSampleRateHz / 1000; // 60 ms. + EXPECT_TRUE(audio_loop.Init(kInputFileName, kMaxLoopLengthSamples, + kInputBlockSizeSamples)); + // Encode. + const int64_t start_time_ms = rtc::TimeMillis(); + AudioEncoder::EncodedInfo info; + rtc::Buffer encoded(500); + uint32_t rtp_timestamp = 0u; + for (size_t i = 0; i < 10000; ++i) { + encoded.Clear(); + info = encoder->Encode(rtp_timestamp, audio_loop.GetNextBlock(), &encoded); + rtp_timestamp += kInputBlockSizeSamples; + } + return rtc::TimeMillis() - start_time_ms; +} + +// This test encodes an audio file using Opus twice with different bitrates +// (~11 kbps and 15.5 kbps). The runtime for each is measured, and the ratio +// between the two is calculated and tracked. This test explicitly sets the +// low_rate_complexity to 9. When running on desktop platforms, this is the same +// as the regular complexity, and the expectation is that the resulting ratio +// should be less than 100% (since the encoder runs faster at lower bitrates, +// given a fixed complexity setting). On the other hand, when running on +// mobiles, the regular complexity is 5, and we expect the resulting ratio to +// be higher, since we have explicitly asked for a higher complexity setting at +// the lower rate. +TEST(AudioEncoderOpusComplexityAdaptationTest, Adaptation_On) { + // Create config. + AudioEncoderOpusConfig config; + // The limit -- including the hysteresis window -- at which the complexity + // shuold be increased. + config.bitrate_bps = 11000 - 1; + config.low_rate_complexity = 9; + int64_t runtime_10999bps = RunComplexityTest(config); + + config.bitrate_bps = 15500; + int64_t runtime_15500bps = RunComplexityTest(config); + + GetGlobalMetricsLogger()->LogSingleValueMetric( + "opus_encoding_complexity_ratio", "adaptation_on", + 100.0 * runtime_10999bps / runtime_15500bps, Unit::kPercent, + ImprovementDirection::kNeitherIsBetter); +} + +// This test is identical to the one above, but without the complexity +// adaptation enabled (neither on desktop, nor on mobile). The expectation is +// that the resulting ratio is less than 100% at all times. +TEST(AudioEncoderOpusComplexityAdaptationTest, Adaptation_Off) { + // Create config. + AudioEncoderOpusConfig config; + // The limit -- including the hysteresis window -- at which the complexity + // shuold be increased (but not in this test since complexity adaptation is + // disabled). + config.bitrate_bps = 11000 - 1; + int64_t runtime_10999bps = RunComplexityTest(config); + + config.bitrate_bps = 15500; + int64_t runtime_15500bps = RunComplexityTest(config); + + GetGlobalMetricsLogger()->LogSingleValueMetric( + "opus_encoding_complexity_ratio", "adaptation_off", + 100.0 * runtime_10999bps / runtime_15500bps, Unit::kPercent, + ImprovementDirection::kNeitherIsBetter); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_fec_test.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_fec_test.cc new file mode 100644 index 0000000000..815f26e31c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_fec_test.cc @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <memory> + +#include "modules/audio_coding/codecs/opus/opus_interface.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +using std::get; +using std::string; +using std::tuple; +using ::testing::TestWithParam; + +namespace webrtc { + +// Define coding parameter as <channels, bit_rate, filename, extension>. +typedef tuple<size_t, int, string, string> coding_param; +typedef struct mode mode; + +struct mode { + bool fec; + uint8_t target_packet_loss_rate; +}; + +const int kOpusBlockDurationMs = 20; +const int kOpusSamplingKhz = 48; + +class OpusFecTest : public TestWithParam<coding_param> { + protected: + OpusFecTest(); + + void SetUp() override; + void TearDown() override; + + virtual void EncodeABlock(); + + virtual void DecodeABlock(bool lost_previous, bool lost_current); + + int block_duration_ms_; + int sampling_khz_; + size_t block_length_sample_; + + size_t channels_; + int bit_rate_; + + size_t data_pointer_; + size_t loop_length_samples_; + size_t max_bytes_; + size_t encoded_bytes_; + + WebRtcOpusEncInst* opus_encoder_; + WebRtcOpusDecInst* opus_decoder_; + + string in_filename_; + + std::unique_ptr<int16_t[]> in_data_; + std::unique_ptr<int16_t[]> out_data_; + std::unique_ptr<uint8_t[]> bit_stream_; +}; + +void OpusFecTest::SetUp() { + channels_ = get<0>(GetParam()); + bit_rate_ = get<1>(GetParam()); + printf("Coding %zu channel signal at %d bps.\n", channels_, bit_rate_); + + in_filename_ = test::ResourcePath(get<2>(GetParam()), get<3>(GetParam())); + + FILE* fp = fopen(in_filename_.c_str(), "rb"); + ASSERT_FALSE(fp == NULL); + + // Obtain file size. + fseek(fp, 0, SEEK_END); + loop_length_samples_ = ftell(fp) / sizeof(int16_t); + rewind(fp); + + // Allocate memory to contain the whole file. + in_data_.reset( + new int16_t[loop_length_samples_ + block_length_sample_ * channels_]); + + // Copy the file into the buffer. + ASSERT_EQ(fread(&in_data_[0], sizeof(int16_t), loop_length_samples_, fp), + loop_length_samples_); + fclose(fp); + + // The audio will be used in a looped manner. To ease the acquisition of an + // audio frame that crosses the end of the excerpt, we add an extra block + // length of samples to the end of the array, starting over again from the + // beginning of the array. Audio frames cross the end of the excerpt always + // appear as a continuum of memory. + memcpy(&in_data_[loop_length_samples_], &in_data_[0], + block_length_sample_ * channels_ * sizeof(int16_t)); + + // Maximum number of bytes in output bitstream. + max_bytes_ = block_length_sample_ * channels_ * sizeof(int16_t); + + out_data_.reset(new int16_t[2 * block_length_sample_ * channels_]); + bit_stream_.reset(new uint8_t[max_bytes_]); + + // If channels_ == 1, use Opus VOIP mode, otherwise, audio mode. + int app = channels_ == 1 ? 0 : 1; + + // Create encoder memory. + EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_encoder_, channels_, app, 48000)); + EXPECT_EQ(0, WebRtcOpus_DecoderCreate(&opus_decoder_, channels_, 48000)); + // Set bitrate. + EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, bit_rate_)); +} + +void OpusFecTest::TearDown() { + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); +} + +OpusFecTest::OpusFecTest() + : block_duration_ms_(kOpusBlockDurationMs), + sampling_khz_(kOpusSamplingKhz), + block_length_sample_( + static_cast<size_t>(block_duration_ms_ * sampling_khz_)), + data_pointer_(0), + max_bytes_(0), + encoded_bytes_(0), + opus_encoder_(NULL), + opus_decoder_(NULL) {} + +void OpusFecTest::EncodeABlock() { + int value = + WebRtcOpus_Encode(opus_encoder_, &in_data_[data_pointer_], + block_length_sample_, max_bytes_, &bit_stream_[0]); + EXPECT_GT(value, 0); + + encoded_bytes_ = static_cast<size_t>(value); +} + +void OpusFecTest::DecodeABlock(bool lost_previous, bool lost_current) { + int16_t audio_type; + int value_1 = 0, value_2 = 0; + + if (lost_previous) { + // Decode previous frame. + if (!lost_current && + WebRtcOpus_PacketHasFec(&bit_stream_[0], encoded_bytes_) == 1) { + value_1 = + WebRtcOpus_DecodeFec(opus_decoder_, &bit_stream_[0], encoded_bytes_, + &out_data_[0], &audio_type); + } else { + // Call decoder PLC. + while (value_1 < static_cast<int>(block_length_sample_)) { + int ret = WebRtcOpus_Decode(opus_decoder_, NULL, 0, &out_data_[value_1], + &audio_type); + EXPECT_EQ(ret, sampling_khz_ * 10); // Should return 10 ms of samples. + value_1 += ret; + } + } + EXPECT_EQ(static_cast<int>(block_length_sample_), value_1); + } + + if (!lost_current) { + // Decode current frame. + value_2 = WebRtcOpus_Decode(opus_decoder_, &bit_stream_[0], encoded_bytes_, + &out_data_[value_1 * channels_], &audio_type); + EXPECT_EQ(static_cast<int>(block_length_sample_), value_2); + } +} + +TEST_P(OpusFecTest, RandomPacketLossTest) { + const int kDurationMs = 200000; + int time_now_ms, fec_frames; + int actual_packet_loss_rate; + bool lost_current, lost_previous; + mode mode_set[3] = {{true, 0}, {false, 0}, {true, 50}}; + + lost_current = false; + for (int i = 0; i < 3; i++) { + if (mode_set[i].fec) { + EXPECT_EQ(0, WebRtcOpus_EnableFec(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_SetPacketLossRate( + opus_encoder_, mode_set[i].target_packet_loss_rate)); + printf("FEC is ON, target at packet loss rate %d percent.\n", + mode_set[i].target_packet_loss_rate); + } else { + EXPECT_EQ(0, WebRtcOpus_DisableFec(opus_encoder_)); + printf("FEC is OFF.\n"); + } + // In this test, we let the target packet loss rate match the actual rate. + actual_packet_loss_rate = mode_set[i].target_packet_loss_rate; + // Run every mode a certain time. + time_now_ms = 0; + fec_frames = 0; + while (time_now_ms < kDurationMs) { + // Encode & decode. + EncodeABlock(); + + // Check if payload has FEC. + int fec = WebRtcOpus_PacketHasFec(&bit_stream_[0], encoded_bytes_); + + // If FEC is disabled or the target packet loss rate is set to 0, there + // should be no FEC in the bit stream. + if (!mode_set[i].fec || mode_set[i].target_packet_loss_rate == 0) { + EXPECT_EQ(fec, 0); + } else if (fec == 1) { + fec_frames++; + } + + lost_previous = lost_current; + lost_current = rand() < actual_packet_loss_rate * (RAND_MAX / 100); + DecodeABlock(lost_previous, lost_current); + + time_now_ms += block_duration_ms_; + + // `data_pointer_` is incremented and wrapped across + // `loop_length_samples_`. + data_pointer_ = (data_pointer_ + block_length_sample_ * channels_) % + loop_length_samples_; + } + if (mode_set[i].fec) { + printf("%.2f percent frames has FEC.\n", + static_cast<float>(fec_frames) * block_duration_ms_ / 2000); + } + } +} + +const coding_param param_set[] = { + std::make_tuple(1, + 64000, + string("audio_coding/testfile32kHz"), + string("pcm")), + std::make_tuple(1, + 32000, + string("audio_coding/testfile32kHz"), + string("pcm")), + std::make_tuple(2, + 64000, + string("audio_coding/teststereo32kHz"), + string("pcm"))}; + +// 64 kbps, stereo +INSTANTIATE_TEST_SUITE_P(AllTest, OpusFecTest, ::testing::ValuesIn(param_set)); + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_inst.h b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_inst.h new file mode 100644 index 0000000000..92c5c354a7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_inst.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INST_H_ +#define MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INST_H_ + +#include <stddef.h> + +#include "rtc_base/ignore_wundef.h" + +RTC_PUSH_IGNORING_WUNDEF() +#include "third_party/opus/src/include/opus.h" +#include "third_party/opus/src/include/opus_multistream.h" +RTC_POP_IGNORING_WUNDEF() + +struct WebRtcOpusEncInst { + OpusEncoder* encoder; + OpusMSEncoder* multistream_encoder; + size_t channels; + int in_dtx_mode; + bool avoid_noise_pumping_during_dtx; + int sample_rate_hz; + float smooth_energy_non_active_frames; +}; + +struct WebRtcOpusDecInst { + OpusDecoder* decoder; + OpusMSDecoder* multistream_decoder; + int prev_decoded_samples; + bool plc_use_prev_decoded_samples; + size_t channels; + int in_dtx_mode; + int sample_rate_hz; +}; + +#endif // MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INST_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_interface.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_interface.cc new file mode 100644 index 0000000000..67d8619b34 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_interface.cc @@ -0,0 +1,881 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/opus_interface.h" + +#include <cstdlib> + +#include <numeric> + +#include "api/array_view.h" +#include "rtc_base/checks.h" +#include "system_wrappers/include/field_trial.h" + +enum { +#if WEBRTC_OPUS_SUPPORT_120MS_PTIME + /* Maximum supported frame size in WebRTC is 120 ms. */ + kWebRtcOpusMaxEncodeFrameSizeMs = 120, +#else + /* Maximum supported frame size in WebRTC is 60 ms. */ + kWebRtcOpusMaxEncodeFrameSizeMs = 60, +#endif + + /* The format allows up to 120 ms frames. Since we don't control the other + * side, we must allow for packets of that size. NetEq is currently limited + * to 60 ms on the receive side. */ + kWebRtcOpusMaxDecodeFrameSizeMs = 120, + + // Duration of audio that each call to packet loss concealment covers. + kWebRtcOpusPlcFrameSizeMs = 10, +}; + +constexpr char kPlcUsePrevDecodedSamplesFieldTrial[] = + "WebRTC-Audio-OpusPlcUsePrevDecodedSamples"; + +constexpr char kAvoidNoisePumpingDuringDtxFieldTrial[] = + "WebRTC-Audio-OpusAvoidNoisePumpingDuringDtx"; + +constexpr char kSetSignalVoiceWithDtxFieldTrial[] = + "WebRTC-Audio-OpusSetSignalVoiceWithDtx"; + +static int FrameSizePerChannel(int frame_size_ms, int sample_rate_hz) { + RTC_DCHECK_GT(frame_size_ms, 0); + RTC_DCHECK_EQ(frame_size_ms % 10, 0); + RTC_DCHECK_GT(sample_rate_hz, 0); + RTC_DCHECK_EQ(sample_rate_hz % 1000, 0); + return frame_size_ms * (sample_rate_hz / 1000); +} + +// Maximum sample count per channel. +static int MaxFrameSizePerChannel(int sample_rate_hz) { + return FrameSizePerChannel(kWebRtcOpusMaxDecodeFrameSizeMs, sample_rate_hz); +} + +// Default sample count per channel. +static int DefaultFrameSizePerChannel(int sample_rate_hz) { + return FrameSizePerChannel(20, sample_rate_hz); +} + +// Returns true if the `encoded` payload corresponds to a refresh DTX packet +// whose energy is larger than the expected for non activity packets. +static bool WebRtcOpus_IsHighEnergyRefreshDtxPacket( + OpusEncInst* inst, + rtc::ArrayView<const int16_t> frame, + rtc::ArrayView<const uint8_t> encoded) { + if (encoded.size() <= 2) { + return false; + } + int number_frames = + frame.size() / DefaultFrameSizePerChannel(inst->sample_rate_hz); + if (number_frames > 0 && + WebRtcOpus_PacketHasVoiceActivity(encoded.data(), encoded.size()) == 0) { + const float average_frame_energy = + std::accumulate(frame.begin(), frame.end(), 0.0f, + [](float a, int32_t b) { return a + b * b; }) / + number_frames; + if (WebRtcOpus_GetInDtx(inst) == 1 && + average_frame_energy >= inst->smooth_energy_non_active_frames * 0.5f) { + // This is a refresh DTX packet as the encoder is in DTX and has + // produced a payload > 2 bytes. This refresh packet has a higher energy + // than the smooth energy of non activity frames (with a 3 dB negative + // margin) and, therefore, it is flagged as a high energy refresh DTX + // packet. + return true; + } + // The average energy is tracked in a similar way as the modeling of the + // comfort noise in the Silk decoder in Opus + // (third_party/opus/src/silk/CNG.c). + if (average_frame_energy < inst->smooth_energy_non_active_frames * 0.5f) { + inst->smooth_energy_non_active_frames = average_frame_energy; + } else { + inst->smooth_energy_non_active_frames += + (average_frame_energy - inst->smooth_energy_non_active_frames) * + 0.25f; + } + } + return false; +} + +int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst, + size_t channels, + int32_t application, + int sample_rate_hz) { + int opus_app; + if (!inst) + return -1; + + switch (application) { + case 0: + opus_app = OPUS_APPLICATION_VOIP; + break; + case 1: + opus_app = OPUS_APPLICATION_AUDIO; + break; + default: + return -1; + } + + OpusEncInst* state = + reinterpret_cast<OpusEncInst*>(calloc(1, sizeof(OpusEncInst))); + RTC_DCHECK(state); + + int error; + state->encoder = opus_encoder_create( + sample_rate_hz, static_cast<int>(channels), opus_app, &error); + + if (error != OPUS_OK || (!state->encoder && !state->multistream_encoder)) { + WebRtcOpus_EncoderFree(state); + return -1; + } + + state->in_dtx_mode = 0; + state->channels = channels; + state->sample_rate_hz = sample_rate_hz; + state->smooth_energy_non_active_frames = 0.0f; + state->avoid_noise_pumping_during_dtx = + webrtc::field_trial::IsEnabled(kAvoidNoisePumpingDuringDtxFieldTrial); + + *inst = state; + return 0; +} + +int16_t WebRtcOpus_MultistreamEncoderCreate( + OpusEncInst** inst, + size_t channels, + int32_t application, + size_t streams, + size_t coupled_streams, + const unsigned char* channel_mapping) { + int opus_app; + if (!inst) + return -1; + + switch (application) { + case 0: + opus_app = OPUS_APPLICATION_VOIP; + break; + case 1: + opus_app = OPUS_APPLICATION_AUDIO; + break; + default: + return -1; + } + + OpusEncInst* state = + reinterpret_cast<OpusEncInst*>(calloc(1, sizeof(OpusEncInst))); + RTC_DCHECK(state); + + int error; + const int sample_rate_hz = 48000; + state->multistream_encoder = opus_multistream_encoder_create( + sample_rate_hz, channels, streams, coupled_streams, channel_mapping, + opus_app, &error); + + if (error != OPUS_OK || (!state->encoder && !state->multistream_encoder)) { + WebRtcOpus_EncoderFree(state); + return -1; + } + + state->in_dtx_mode = 0; + state->channels = channels; + state->sample_rate_hz = sample_rate_hz; + state->smooth_energy_non_active_frames = 0.0f; + state->avoid_noise_pumping_during_dtx = false; + + *inst = state; + return 0; +} + +int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) { + if (inst) { + if (inst->encoder) { + opus_encoder_destroy(inst->encoder); + } else { + opus_multistream_encoder_destroy(inst->multistream_encoder); + } + free(inst); + return 0; + } else { + return -1; + } +} + +int WebRtcOpus_Encode(OpusEncInst* inst, + const int16_t* audio_in, + size_t samples, + size_t length_encoded_buffer, + uint8_t* encoded) { + int res; + + if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) { + return -1; + } + + if (inst->encoder) { + res = opus_encode(inst->encoder, (const opus_int16*)audio_in, + static_cast<int>(samples), encoded, + static_cast<opus_int32>(length_encoded_buffer)); + } else { + res = opus_multistream_encode( + inst->multistream_encoder, (const opus_int16*)audio_in, + static_cast<int>(samples), encoded, + static_cast<opus_int32>(length_encoded_buffer)); + } + + if (res <= 0) { + return -1; + } + + if (res <= 2) { + // Indicates DTX since the packet has nothing but a header. In principle, + // there is no need to send this packet. However, we do transmit the first + // occurrence to let the decoder know that the encoder enters DTX mode. + if (inst->in_dtx_mode) { + return 0; + } else { + inst->in_dtx_mode = 1; + return res; + } + } + + if (inst->avoid_noise_pumping_during_dtx && WebRtcOpus_GetUseDtx(inst) == 1 && + WebRtcOpus_IsHighEnergyRefreshDtxPacket( + inst, rtc::MakeArrayView(audio_in, samples), + rtc::MakeArrayView(encoded, res))) { + // This packet is a high energy refresh DTX packet. For avoiding an increase + // of the energy in the DTX region at the decoder, this packet is + // substituted by a TOC byte with one empty frame. + // The number of frames described in the TOC byte + // (https://tools.ietf.org/html/rfc6716#section-3.1) are overwritten to + // always indicate one frame (last two bits equal to 0). + encoded[0] = encoded[0] & 0b11111100; + inst->in_dtx_mode = 1; + // The payload is just the TOC byte and has 1 byte as length. + return 1; + } + inst->in_dtx_mode = 0; + return res; +} + +#define ENCODER_CTL(inst, vargs) \ + (inst->encoder \ + ? opus_encoder_ctl(inst->encoder, vargs) \ + : opus_multistream_encoder_ctl(inst->multistream_encoder, vargs)) + +int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate) { + if (inst) { + return ENCODER_CTL(inst, OPUS_SET_BITRATE(rate)); + } else { + return -1; + } +} + +int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate) { + if (inst) { + return ENCODER_CTL(inst, OPUS_SET_PACKET_LOSS_PERC(loss_rate)); + } else { + return -1; + } +} + +int16_t WebRtcOpus_SetMaxPlaybackRate(OpusEncInst* inst, int32_t frequency_hz) { + opus_int32 set_bandwidth; + + if (!inst) + return -1; + + if (frequency_hz <= 8000) { + set_bandwidth = OPUS_BANDWIDTH_NARROWBAND; + } else if (frequency_hz <= 12000) { + set_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; + } else if (frequency_hz <= 16000) { + set_bandwidth = OPUS_BANDWIDTH_WIDEBAND; + } else if (frequency_hz <= 24000) { + set_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; + } else { + set_bandwidth = OPUS_BANDWIDTH_FULLBAND; + } + return ENCODER_CTL(inst, OPUS_SET_MAX_BANDWIDTH(set_bandwidth)); +} + +int16_t WebRtcOpus_GetMaxPlaybackRate(OpusEncInst* const inst, + int32_t* result_hz) { + if (inst->encoder) { + if (opus_encoder_ctl(inst->encoder, OPUS_GET_MAX_BANDWIDTH(result_hz)) == + OPUS_OK) { + return 0; + } + return -1; + } + + opus_int32 max_bandwidth; + int s; + int ret; + + max_bandwidth = 0; + ret = OPUS_OK; + s = 0; + while (ret == OPUS_OK) { + OpusEncoder* enc; + opus_int32 bandwidth; + + ret = ENCODER_CTL(inst, OPUS_MULTISTREAM_GET_ENCODER_STATE(s, &enc)); + if (ret == OPUS_BAD_ARG) + break; + if (ret != OPUS_OK) + return -1; + if (opus_encoder_ctl(enc, OPUS_GET_MAX_BANDWIDTH(&bandwidth)) != OPUS_OK) + return -1; + + if (max_bandwidth != 0 && max_bandwidth != bandwidth) + return -1; + + max_bandwidth = bandwidth; + s++; + } + *result_hz = max_bandwidth; + return 0; +} + +int16_t WebRtcOpus_EnableFec(OpusEncInst* inst) { + if (inst) { + return ENCODER_CTL(inst, OPUS_SET_INBAND_FEC(1)); + } else { + return -1; + } +} + +int16_t WebRtcOpus_DisableFec(OpusEncInst* inst) { + if (inst) { + return ENCODER_CTL(inst, OPUS_SET_INBAND_FEC(0)); + } else { + return -1; + } +} + +int16_t WebRtcOpus_EnableDtx(OpusEncInst* inst) { + if (inst) { + if (webrtc::field_trial::IsEnabled(kSetSignalVoiceWithDtxFieldTrial)) { + int ret = ENCODER_CTL(inst, OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE)); + if (ret != OPUS_OK) { + return ret; + } + } + return ENCODER_CTL(inst, OPUS_SET_DTX(1)); + } else { + return -1; + } +} + +int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst) { + if (inst) { + if (webrtc::field_trial::IsEnabled(kSetSignalVoiceWithDtxFieldTrial)) { + int ret = ENCODER_CTL(inst, OPUS_SET_SIGNAL(OPUS_AUTO)); + if (ret != OPUS_OK) { + return ret; + } + } + return ENCODER_CTL(inst, OPUS_SET_DTX(0)); + } else { + return -1; + } +} + +int16_t WebRtcOpus_GetUseDtx(OpusEncInst* inst) { + if (inst) { + opus_int32 use_dtx; + if (ENCODER_CTL(inst, OPUS_GET_DTX(&use_dtx)) == 0) { + return use_dtx; + } + } + return -1; +} + +int16_t WebRtcOpus_EnableCbr(OpusEncInst* inst) { + if (inst) { + return ENCODER_CTL(inst, OPUS_SET_VBR(0)); + } else { + return -1; + } +} + +int16_t WebRtcOpus_DisableCbr(OpusEncInst* inst) { + if (inst) { + return ENCODER_CTL(inst, OPUS_SET_VBR(1)); + } else { + return -1; + } +} + +int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) { + if (inst) { + return ENCODER_CTL(inst, OPUS_SET_COMPLEXITY(complexity)); + } else { + return -1; + } +} + +int32_t WebRtcOpus_GetBandwidth(OpusEncInst* inst) { + if (!inst) { + return -1; + } + int32_t bandwidth; + if (ENCODER_CTL(inst, OPUS_GET_BANDWIDTH(&bandwidth)) == 0) { + return bandwidth; + } else { + return -1; + } +} + +int16_t WebRtcOpus_SetBandwidth(OpusEncInst* inst, int32_t bandwidth) { + if (inst) { + return ENCODER_CTL(inst, OPUS_SET_BANDWIDTH(bandwidth)); + } else { + return -1; + } +} + +int16_t WebRtcOpus_SetForceChannels(OpusEncInst* inst, size_t num_channels) { + if (!inst) + return -1; + if (num_channels == 0) { + return ENCODER_CTL(inst, OPUS_SET_FORCE_CHANNELS(OPUS_AUTO)); + } else if (num_channels == 1 || num_channels == 2) { + return ENCODER_CTL(inst, OPUS_SET_FORCE_CHANNELS(num_channels)); + } else { + return -1; + } +} + +int32_t WebRtcOpus_GetInDtx(OpusEncInst* inst) { + if (!inst) { + return -1; + } +#ifdef OPUS_GET_IN_DTX + int32_t in_dtx; + if (ENCODER_CTL(inst, OPUS_GET_IN_DTX(&in_dtx)) == 0) { + return in_dtx; + } +#endif + return -1; +} + +int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, + size_t channels, + int sample_rate_hz) { + int error; + OpusDecInst* state; + + if (inst != NULL) { + // Create Opus decoder state. + state = reinterpret_cast<OpusDecInst*>(calloc(1, sizeof(OpusDecInst))); + if (state == NULL) { + return -1; + } + + state->decoder = + opus_decoder_create(sample_rate_hz, static_cast<int>(channels), &error); + if (error == OPUS_OK && state->decoder) { + // Creation of memory all ok. + state->channels = channels; + state->sample_rate_hz = sample_rate_hz; + state->plc_use_prev_decoded_samples = + webrtc::field_trial::IsEnabled(kPlcUsePrevDecodedSamplesFieldTrial); + if (state->plc_use_prev_decoded_samples) { + state->prev_decoded_samples = + DefaultFrameSizePerChannel(state->sample_rate_hz); + } + state->in_dtx_mode = 0; + *inst = state; + return 0; + } + + // If memory allocation was unsuccessful, free the entire state. + if (state->decoder) { + opus_decoder_destroy(state->decoder); + } + free(state); + } + return -1; +} + +int16_t WebRtcOpus_MultistreamDecoderCreate( + OpusDecInst** inst, + size_t channels, + size_t streams, + size_t coupled_streams, + const unsigned char* channel_mapping) { + int error; + OpusDecInst* state; + + if (inst != NULL) { + // Create Opus decoder state. + state = reinterpret_cast<OpusDecInst*>(calloc(1, sizeof(OpusDecInst))); + if (state == NULL) { + return -1; + } + + // Create new memory, always at 48000 Hz. + state->multistream_decoder = opus_multistream_decoder_create( + 48000, channels, streams, coupled_streams, channel_mapping, &error); + + if (error == OPUS_OK && state->multistream_decoder) { + // Creation of memory all ok. + state->channels = channels; + state->sample_rate_hz = 48000; + state->plc_use_prev_decoded_samples = + webrtc::field_trial::IsEnabled(kPlcUsePrevDecodedSamplesFieldTrial); + if (state->plc_use_prev_decoded_samples) { + state->prev_decoded_samples = + DefaultFrameSizePerChannel(state->sample_rate_hz); + } + state->in_dtx_mode = 0; + *inst = state; + return 0; + } + + // If memory allocation was unsuccessful, free the entire state. + opus_multistream_decoder_destroy(state->multistream_decoder); + free(state); + } + return -1; +} + +int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst) { + if (inst) { + if (inst->decoder) { + opus_decoder_destroy(inst->decoder); + } else if (inst->multistream_decoder) { + opus_multistream_decoder_destroy(inst->multistream_decoder); + } + free(inst); + return 0; + } else { + return -1; + } +} + +size_t WebRtcOpus_DecoderChannels(OpusDecInst* inst) { + return inst->channels; +} + +void WebRtcOpus_DecoderInit(OpusDecInst* inst) { + if (inst->decoder) { + opus_decoder_ctl(inst->decoder, OPUS_RESET_STATE); + } else { + opus_multistream_decoder_ctl(inst->multistream_decoder, OPUS_RESET_STATE); + } + inst->in_dtx_mode = 0; +} + +/* For decoder to determine if it is to output speech or comfort noise. */ +static int16_t DetermineAudioType(OpusDecInst* inst, size_t encoded_bytes) { + // Audio type becomes comfort noise if `encoded_byte` is 1 and keeps + // to be so if the following `encoded_byte` are 0 or 1. + if (encoded_bytes == 0 && inst->in_dtx_mode) { + return 2; // Comfort noise. + } else if (encoded_bytes == 1 || encoded_bytes == 2) { + // TODO(henrik.lundin): There is a slight risk that a 2-byte payload is in + // fact a 1-byte TOC with a 1-byte payload. That will be erroneously + // interpreted as comfort noise output, but such a payload is probably + // faulty anyway. + + // TODO(webrtc:10218): This is wrong for multistream opus. Then are several + // single-stream packets glued together with some packet size bytes in + // between. See https://tools.ietf.org/html/rfc6716#appendix-B + inst->in_dtx_mode = 1; + return 2; // Comfort noise. + } else { + inst->in_dtx_mode = 0; + return 0; // Speech. + } +} + +/* `frame_size` is set to maximum Opus frame size in the normal case, and + * is set to the number of samples needed for PLC in case of losses. + * It is up to the caller to make sure the value is correct. */ +static int DecodeNative(OpusDecInst* inst, + const uint8_t* encoded, + size_t encoded_bytes, + int frame_size, + int16_t* decoded, + int16_t* audio_type, + int decode_fec) { + int res = -1; + if (inst->decoder) { + res = opus_decode( + inst->decoder, encoded, static_cast<opus_int32>(encoded_bytes), + reinterpret_cast<opus_int16*>(decoded), frame_size, decode_fec); + } else { + res = opus_multistream_decode(inst->multistream_decoder, encoded, + static_cast<opus_int32>(encoded_bytes), + reinterpret_cast<opus_int16*>(decoded), + frame_size, decode_fec); + } + + if (res <= 0) + return -1; + + *audio_type = DetermineAudioType(inst, encoded_bytes); + + return res; +} + +static int DecodePlc(OpusDecInst* inst, int16_t* decoded) { + int16_t audio_type = 0; + int decoded_samples; + int plc_samples = + FrameSizePerChannel(kWebRtcOpusPlcFrameSizeMs, inst->sample_rate_hz); + + if (inst->plc_use_prev_decoded_samples) { + /* The number of samples we ask for is `number_of_lost_frames` times + * `prev_decoded_samples_`. Limit the number of samples to maximum + * `MaxFrameSizePerChannel()`. */ + plc_samples = inst->prev_decoded_samples; + const int max_samples_per_channel = + MaxFrameSizePerChannel(inst->sample_rate_hz); + plc_samples = plc_samples <= max_samples_per_channel + ? plc_samples + : max_samples_per_channel; + } + decoded_samples = + DecodeNative(inst, NULL, 0, plc_samples, decoded, &audio_type, 0); + if (decoded_samples < 0) { + return -1; + } + + return decoded_samples; +} + +int WebRtcOpus_Decode(OpusDecInst* inst, + const uint8_t* encoded, + size_t encoded_bytes, + int16_t* decoded, + int16_t* audio_type) { + int decoded_samples; + + if (encoded_bytes == 0) { + *audio_type = DetermineAudioType(inst, encoded_bytes); + decoded_samples = DecodePlc(inst, decoded); + } else { + decoded_samples = DecodeNative(inst, encoded, encoded_bytes, + MaxFrameSizePerChannel(inst->sample_rate_hz), + decoded, audio_type, 0); + } + if (decoded_samples < 0) { + return -1; + } + + if (inst->plc_use_prev_decoded_samples) { + /* Update decoded sample memory, to be used by the PLC in case of losses. */ + inst->prev_decoded_samples = decoded_samples; + } + + return decoded_samples; +} + +int WebRtcOpus_DecodeFec(OpusDecInst* inst, + const uint8_t* encoded, + size_t encoded_bytes, + int16_t* decoded, + int16_t* audio_type) { + int decoded_samples; + int fec_samples; + + if (WebRtcOpus_PacketHasFec(encoded, encoded_bytes) != 1) { + return 0; + } + + fec_samples = + opus_packet_get_samples_per_frame(encoded, inst->sample_rate_hz); + + decoded_samples = DecodeNative(inst, encoded, encoded_bytes, fec_samples, + decoded, audio_type, 1); + if (decoded_samples < 0) { + return -1; + } + + return decoded_samples; +} + +int WebRtcOpus_DurationEst(OpusDecInst* inst, + const uint8_t* payload, + size_t payload_length_bytes) { + if (payload_length_bytes == 0) { + // WebRtcOpus_Decode calls PLC when payload length is zero. So we return + // PLC duration correspondingly. + return WebRtcOpus_PlcDuration(inst); + } + + int frames, samples; + frames = opus_packet_get_nb_frames( + payload, static_cast<opus_int32>(payload_length_bytes)); + if (frames < 0) { + /* Invalid payload data. */ + return 0; + } + samples = + frames * opus_packet_get_samples_per_frame(payload, inst->sample_rate_hz); + if (samples > 120 * inst->sample_rate_hz / 1000) { + // More than 120 ms' worth of samples. + return 0; + } + return samples; +} + +int WebRtcOpus_PlcDuration(OpusDecInst* inst) { + if (inst->plc_use_prev_decoded_samples) { + /* The number of samples we ask for is `number_of_lost_frames` times + * `prev_decoded_samples_`. Limit the number of samples to maximum + * `MaxFrameSizePerChannel()`. */ + const int plc_samples = inst->prev_decoded_samples; + const int max_samples_per_channel = + MaxFrameSizePerChannel(inst->sample_rate_hz); + return plc_samples <= max_samples_per_channel ? plc_samples + : max_samples_per_channel; + } + return FrameSizePerChannel(kWebRtcOpusPlcFrameSizeMs, inst->sample_rate_hz); +} + +int WebRtcOpus_FecDurationEst(const uint8_t* payload, + size_t payload_length_bytes, + int sample_rate_hz) { + if (WebRtcOpus_PacketHasFec(payload, payload_length_bytes) != 1) { + return 0; + } + const int samples = + opus_packet_get_samples_per_frame(payload, sample_rate_hz); + const int samples_per_ms = sample_rate_hz / 1000; + if (samples < 10 * samples_per_ms || samples > 120 * samples_per_ms) { + /* Invalid payload duration. */ + return 0; + } + return samples; +} + +int WebRtcOpus_NumSilkFrames(const uint8_t* payload) { + // For computing the payload length in ms, the sample rate is not important + // since it cancels out. We use 48 kHz, but any valid sample rate would work. + int payload_length_ms = + opus_packet_get_samples_per_frame(payload, 48000) / 48; + if (payload_length_ms < 10) + payload_length_ms = 10; + + int silk_frames; + switch (payload_length_ms) { + case 10: + case 20: + silk_frames = 1; + break; + case 40: + silk_frames = 2; + break; + case 60: + silk_frames = 3; + break; + default: + return 0; // It is actually even an invalid packet. + } + return silk_frames; +} + +// This method is based on Definition of the Opus Audio Codec +// (https://tools.ietf.org/html/rfc6716). Basically, this method is based on +// parsing the LP layer of an Opus packet, particularly the LBRR flag. +int WebRtcOpus_PacketHasFec(const uint8_t* payload, + size_t payload_length_bytes) { + if (payload == NULL || payload_length_bytes == 0) + return 0; + + // In CELT_ONLY mode, packets should not have FEC. + if (payload[0] & 0x80) + return 0; + + int silk_frames = WebRtcOpus_NumSilkFrames(payload); + if (silk_frames == 0) + return 0; // Not valid. + + const int channels = opus_packet_get_nb_channels(payload); + RTC_DCHECK(channels == 1 || channels == 2); + + // Max number of frames in an Opus packet is 48. + opus_int16 frame_sizes[48]; + const unsigned char* frame_data[48]; + + // Parse packet to get the frames. But we only care about the first frame, + // since we can only decode the FEC from the first one. + if (opus_packet_parse(payload, static_cast<opus_int32>(payload_length_bytes), + NULL, frame_data, frame_sizes, NULL) < 0) { + return 0; + } + + if (frame_sizes[0] < 1) { + return 0; + } + + // A frame starts with the LP layer. The LP layer begins with two to eight + // header bits.These consist of one VAD bit per SILK frame (up to 3), + // followed by a single flag indicating the presence of LBRR frames. + // For a stereo packet, these first flags correspond to the mid channel, and + // a second set of flags is included for the side channel. Because these are + // the first symbols decoded by the range coder and because they are coded + // as binary values with uniform probability, they can be extracted directly + // from the most significant bits of the first byte of compressed data. + for (int n = 0; n < channels; n++) { + // The LBRR bit for channel 1 is on the (`silk_frames` + 1)-th bit, and + // that of channel 2 is on the |(`silk_frames` + 1) * 2 + 1|-th bit. + if (frame_data[0][0] & (0x80 >> ((n + 1) * (silk_frames + 1) - 1))) + return 1; + } + + return 0; +} + +int WebRtcOpus_PacketHasVoiceActivity(const uint8_t* payload, + size_t payload_length_bytes) { + if (payload == NULL || payload_length_bytes == 0) + return 0; + + // In CELT_ONLY mode we can not determine whether there is VAD. + if (payload[0] & 0x80) + return -1; + + int silk_frames = WebRtcOpus_NumSilkFrames(payload); + if (silk_frames == 0) + return -1; + + const int channels = opus_packet_get_nb_channels(payload); + RTC_DCHECK(channels == 1 || channels == 2); + + // Max number of frames in an Opus packet is 48. + opus_int16 frame_sizes[48]; + const unsigned char* frame_data[48]; + + // Parse packet to get the frames. + int frames = + opus_packet_parse(payload, static_cast<opus_int32>(payload_length_bytes), + NULL, frame_data, frame_sizes, NULL); + if (frames < 0) + return -1; + + // Iterate over all Opus frames which may contain multiple SILK frames. + for (int frame = 0; frame < frames; frame++) { + if (frame_sizes[frame] < 1) { + continue; + } + if (frame_data[frame][0] >> (8 - silk_frames)) + return 1; + if (channels == 2 && + (frame_data[frame][0] << (silk_frames + 1)) >> (8 - silk_frames)) + return 1; + } + + return 0; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_interface.h b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_interface.h new file mode 100644 index 0000000000..89159ce1c0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_interface.h @@ -0,0 +1,547 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INTERFACE_H_ +#define MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INTERFACE_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "modules/audio_coding/codecs/opus/opus_inst.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Opaque wrapper types for the codec state. +typedef struct WebRtcOpusEncInst OpusEncInst; +typedef struct WebRtcOpusDecInst OpusDecInst; + +/**************************************************************************** + * WebRtcOpus_EncoderCreate(...) + * + * This function creates an Opus encoder that encodes mono or stereo. + * + * Input: + * - channels : number of channels; 1 or 2. + * - application : 0 - VOIP applications. + * Favor speech intelligibility. + * 1 - Audio applications. + * Favor faithfulness to the original input. + * - sample_rate_hz : sample rate of input audio + * + * Output: + * - inst : a pointer to Encoder context that is created + * if success. + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst, + size_t channels, + int32_t application, + int sample_rate_hz); + +/**************************************************************************** + * WebRtcOpus_MultistreamEncoderCreate(...) + * + * This function creates an Opus encoder with any supported channel count. + * + * Input: + * - channels : number of channels in the input of the encoder. + * - application : 0 - VOIP applications. + * Favor speech intelligibility. + * 1 - Audio applications. + * Favor faithfulness to the original input. + * - streams : number of streams, as described in RFC 7845. + * - coupled_streams : number of coupled streams, as described in + * RFC 7845. + * - channel_mapping : the channel mapping; pointer to array of + * `channel` bytes, as described in RFC 7845. + * + * Output: + * - inst : a pointer to Encoder context that is created + * if success. + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_MultistreamEncoderCreate( + OpusEncInst** inst, + size_t channels, + int32_t application, + size_t streams, + size_t coupled_streams, + const unsigned char* channel_mapping); + +int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst); + +/**************************************************************************** + * WebRtcOpus_Encode(...) + * + * This function encodes audio as a series of Opus frames and inserts + * it into a packet. Input buffer can be any length. + * + * Input: + * - inst : Encoder context + * - audio_in : Input speech data buffer + * - samples : Samples per channel in audio_in + * - length_encoded_buffer : Output buffer size + * + * Output: + * - encoded : Output compressed data buffer + * + * Return value : >=0 - Length (in bytes) of coded data + * -1 - Error + */ +int WebRtcOpus_Encode(OpusEncInst* inst, + const int16_t* audio_in, + size_t samples, + size_t length_encoded_buffer, + uint8_t* encoded); + +/**************************************************************************** + * WebRtcOpus_SetBitRate(...) + * + * This function adjusts the target bitrate of the encoder. + * + * Input: + * - inst : Encoder context + * - rate : New target bitrate + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate); + +/**************************************************************************** + * WebRtcOpus_SetPacketLossRate(...) + * + * This function configures the encoder's expected packet loss percentage. + * + * Input: + * - inst : Encoder context + * - loss_rate : loss percentage in the range 0-100, inclusive. + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate); + +/**************************************************************************** + * WebRtcOpus_SetMaxPlaybackRate(...) + * + * Configures the maximum playback rate for encoding. Due to hardware + * limitations, the receiver may render audio up to a playback rate. Opus + * encoder can use this information to optimize for network usage and encoding + * complexity. This will affect the audio bandwidth in the coded audio. However, + * the input/output sample rate is not affected. + * + * Input: + * - inst : Encoder context + * - frequency_hz : Maximum playback rate in Hz. + * This parameter can take any value. The relation + * between the value and the Opus internal mode is + * as following: + * frequency_hz <= 8000 narrow band + * 8000 < frequency_hz <= 12000 medium band + * 12000 < frequency_hz <= 16000 wide band + * 16000 < frequency_hz <= 24000 super wide band + * frequency_hz > 24000 full band + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_SetMaxPlaybackRate(OpusEncInst* inst, int32_t frequency_hz); + +/**************************************************************************** + * WebRtcOpus_GetMaxPlaybackRate(...) + * + * Queries the maximum playback rate for encoding. If different single-stream + * encoders have different maximum playback rates, this function fails. + * + * Input: + * - inst : Encoder context. + * Output: + * - result_hz : The maximum playback rate in Hz. + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_GetMaxPlaybackRate(OpusEncInst* const inst, + int32_t* result_hz); + +/* TODO(minyue): Check whether an API to check the FEC and the packet loss rate + * is needed. It might not be very useful since there are not many use cases and + * the caller can always maintain the states. */ + +/**************************************************************************** + * WebRtcOpus_EnableFec() + * + * This function enables FEC for encoding. + * + * Input: + * - inst : Encoder context + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_EnableFec(OpusEncInst* inst); + +/**************************************************************************** + * WebRtcOpus_DisableFec() + * + * This function disables FEC for encoding. + * + * Input: + * - inst : Encoder context + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_DisableFec(OpusEncInst* inst); + +/**************************************************************************** + * WebRtcOpus_EnableDtx() + * + * This function enables Opus internal DTX for encoding. + * + * Input: + * - inst : Encoder context + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_EnableDtx(OpusEncInst* inst); + +/**************************************************************************** + * WebRtcOpus_DisableDtx() + * + * This function disables Opus internal DTX for encoding. + * + * Input: + * - inst : Encoder context + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst); + +/**************************************************************************** + * WebRtcOpus_GetUseDtx() + * + * This function gets the DTX configuration used for encoding. + * + * Input: + * - inst : Encoder context + * + * Return value : 0 - Encoder does not use DTX. + * 1 - Encoder uses DTX. + * -1 - Error. + */ +int16_t WebRtcOpus_GetUseDtx(OpusEncInst* inst); + +/**************************************************************************** + * WebRtcOpus_EnableCbr() + * + * This function enables CBR for encoding. + * + * Input: + * - inst : Encoder context + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_EnableCbr(OpusEncInst* inst); + +/**************************************************************************** + * WebRtcOpus_DisableCbr() + * + * This function disables CBR for encoding. + * + * Input: + * - inst : Encoder context + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_DisableCbr(OpusEncInst* inst); + +/* + * WebRtcOpus_SetComplexity(...) + * + * This function adjusts the computational complexity. The effect is the same as + * calling the complexity setting of Opus as an Opus encoder related CTL. + * + * Input: + * - inst : Encoder context + * - complexity : New target complexity (0-10, inclusive) + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity); + +/* + * WebRtcOpus_GetBandwidth(...) + * + * This function returns the current bandwidth. + * + * Input: + * - inst : Encoder context + * + * Return value : Bandwidth - Success + * -1 - Error + */ +int32_t WebRtcOpus_GetBandwidth(OpusEncInst* inst); + +/* + * WebRtcOpus_SetBandwidth(...) + * + * By default Opus decides which bandwidth to encode the signal in depending on + * the the bitrate. This function overrules the previous setting and forces the + * encoder to encode in narrowband/wideband/fullband/etc. + * + * Input: + * - inst : Encoder context + * - bandwidth : New target bandwidth. Valid values are: + * OPUS_BANDWIDTH_NARROWBAND + * OPUS_BANDWIDTH_MEDIUMBAND + * OPUS_BANDWIDTH_WIDEBAND + * OPUS_BANDWIDTH_SUPERWIDEBAND + * OPUS_BANDWIDTH_FULLBAND + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_SetBandwidth(OpusEncInst* inst, int32_t bandwidth); + +/* + * WebRtcOpus_GetInDtx(...) + * + * Gets the DTX state of the encoder. + * + * Input: + * - inst : Encoder context + * + * Return value : -1 - Error. + * 1 - Last encoded frame was comfort noise update during DTX. + * 0 - Last encoded frame was encoded with encoder not in DTX. + */ +int32_t WebRtcOpus_GetInDtx(OpusEncInst* inst); + +/* + * WebRtcOpus_SetForceChannels(...) + * + * If the encoder is initialized as a stereo encoder, Opus will by default + * decide whether to encode in mono or stereo based on the bitrate. This + * function overrules the previous setting, and forces the encoder to encode + * in auto/mono/stereo. + * + * If the Encoder is initialized as a mono encoder, and one tries to force + * stereo, the function will return an error. + * + * Input: + * - inst : Encoder context + * - num_channels : 0 - Not forced + * 1 - Mono + * 2 - Stereo + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_SetForceChannels(OpusEncInst* inst, size_t num_channels); + +int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, + size_t channels, + int sample_rate_hz); + +/**************************************************************************** + * WebRtcOpus_MultistreamDecoderCreate(...) + * + * This function creates an Opus decoder with any supported channel count. + * + * Input: + * - channels : number of output channels that the decoder + * will produce. + * - streams : number of encoded streams, as described in + * RFC 7845. + * - coupled_streams : number of coupled streams, as described in + * RFC 7845. + * - channel_mapping : the channel mapping; pointer to array of + * `channel` bytes, as described in RFC 7845. + * + * Output: + * - inst : a pointer to a Decoder context that is created + * if success. + * + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_MultistreamDecoderCreate( + OpusDecInst** inst, + size_t channels, + size_t streams, + size_t coupled_streams, + const unsigned char* channel_mapping); + +int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst); + +/**************************************************************************** + * WebRtcOpus_DecoderChannels(...) + * + * This function returns the number of channels created for Opus decoder. + */ +size_t WebRtcOpus_DecoderChannels(OpusDecInst* inst); + +/**************************************************************************** + * WebRtcOpus_DecoderInit(...) + * + * This function resets state of the decoder. + * + * Input: + * - inst : Decoder context + */ +void WebRtcOpus_DecoderInit(OpusDecInst* inst); + +/**************************************************************************** + * WebRtcOpus_Decode(...) + * + * This function decodes an Opus packet into one or more audio frames at the + * ACM interface's sampling rate (32 kHz). + * + * Input: + * - inst : Decoder context + * - encoded : Encoded data + * - encoded_bytes : Bytes in encoded vector + * + * Output: + * - decoded : The decoded vector + * - audio_type : 1 normal, 2 CNG (for Opus it should + * always return 1 since we're not using Opus's + * built-in DTX/CNG scheme) + * + * Return value : >0 - Samples per channel in decoded vector + * -1 - Error + */ +int WebRtcOpus_Decode(OpusDecInst* inst, + const uint8_t* encoded, + size_t encoded_bytes, + int16_t* decoded, + int16_t* audio_type); + +/**************************************************************************** + * WebRtcOpus_DecodeFec(...) + * + * This function decodes the FEC data from an Opus packet into one or more audio + * frames at the ACM interface's sampling rate (32 kHz). + * + * Input: + * - inst : Decoder context + * - encoded : Encoded data + * - encoded_bytes : Bytes in encoded vector + * + * Output: + * - decoded : The decoded vector (previous frame) + * + * Return value : >0 - Samples per channel in decoded vector + * 0 - No FEC data in the packet + * -1 - Error + */ +int WebRtcOpus_DecodeFec(OpusDecInst* inst, + const uint8_t* encoded, + size_t encoded_bytes, + int16_t* decoded, + int16_t* audio_type); + +/**************************************************************************** + * WebRtcOpus_DurationEst(...) + * + * This function calculates the duration of an opus packet. + * Input: + * - inst : Decoder context + * - payload : Encoded data pointer + * - payload_length_bytes : Bytes of encoded data + * + * Return value : The duration of the packet, in samples per + * channel. + */ +int WebRtcOpus_DurationEst(OpusDecInst* inst, + const uint8_t* payload, + size_t payload_length_bytes); + +/**************************************************************************** + * WebRtcOpus_PlcDuration(...) + * + * This function calculates the duration of a frame returned by packet loss + * concealment (PLC). + * + * Input: + * - inst : Decoder context + * + * Return value : The duration of a frame returned by PLC, in + * samples per channel. + */ +int WebRtcOpus_PlcDuration(OpusDecInst* inst); + +/* TODO(minyue): Check whether it is needed to add a decoder context to the + * arguments, like WebRtcOpus_DurationEst(...). In fact, the packet itself tells + * the duration. The decoder context in WebRtcOpus_DurationEst(...) is not used. + * So it may be advisable to remove it from WebRtcOpus_DurationEst(...). */ + +/**************************************************************************** + * WebRtcOpus_FecDurationEst(...) + * + * This function calculates the duration of the FEC data within an opus packet. + * Input: + * - payload : Encoded data pointer + * - payload_length_bytes : Bytes of encoded data + * - sample_rate_hz : Sample rate of output audio + * + * Return value : >0 - The duration of the FEC data in the + * packet in samples per channel. + * 0 - No FEC data in the packet. + */ +int WebRtcOpus_FecDurationEst(const uint8_t* payload, + size_t payload_length_bytes, + int sample_rate_hz); + +/**************************************************************************** + * WebRtcOpus_PacketHasFec(...) + * + * This function detects if an opus packet has FEC. + * Input: + * - payload : Encoded data pointer + * - payload_length_bytes : Bytes of encoded data + * + * Return value : 0 - the packet does NOT contain FEC. + * 1 - the packet contains FEC. + */ +int WebRtcOpus_PacketHasFec(const uint8_t* payload, + size_t payload_length_bytes); + +/**************************************************************************** + * WebRtcOpus_PacketHasVoiceActivity(...) + * + * This function returns the SILK VAD information encoded in the opus packet. + * For CELT-only packets that do not have VAD information, it returns -1. + * Input: + * - payload : Encoded data pointer + * - payload_length_bytes : Bytes of encoded data + * + * Return value : 0 - no frame had the VAD flag set. + * 1 - at least one frame had the VAD flag set. + * -1 - VAD status could not be determined. + */ +int WebRtcOpus_PacketHasVoiceActivity(const uint8_t* payload, + size_t payload_length_bytes); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INTERFACE_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_speed_test.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_speed_test.cc new file mode 100644 index 0000000000..4477e8a5f8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_speed_test.cc @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/opus_interface.h" +#include "modules/audio_coding/codecs/tools/audio_codec_speed_test.h" + +using ::std::string; + +namespace webrtc { + +static const int kOpusBlockDurationMs = 20; +static const int kOpusSamplingKhz = 48; + +class OpusSpeedTest : public AudioCodecSpeedTest { + protected: + OpusSpeedTest(); + void SetUp() override; + void TearDown() override; + float EncodeABlock(int16_t* in_data, + uint8_t* bit_stream, + size_t max_bytes, + size_t* encoded_bytes) override; + float DecodeABlock(const uint8_t* bit_stream, + size_t encoded_bytes, + int16_t* out_data) override; + WebRtcOpusEncInst* opus_encoder_; + WebRtcOpusDecInst* opus_decoder_; +}; + +OpusSpeedTest::OpusSpeedTest() + : AudioCodecSpeedTest(kOpusBlockDurationMs, + kOpusSamplingKhz, + kOpusSamplingKhz), + opus_encoder_(NULL), + opus_decoder_(NULL) {} + +void OpusSpeedTest::SetUp() { + AudioCodecSpeedTest::SetUp(); + // If channels_ == 1, use Opus VOIP mode, otherwise, audio mode. + int app = channels_ == 1 ? 0 : 1; + /* Create encoder memory. */ + EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_encoder_, channels_, app, 48000)); + EXPECT_EQ(0, WebRtcOpus_DecoderCreate(&opus_decoder_, channels_, 48000)); + /* Set bitrate. */ + EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, bit_rate_)); +} + +void OpusSpeedTest::TearDown() { + AudioCodecSpeedTest::TearDown(); + /* Free memory. */ + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); +} + +float OpusSpeedTest::EncodeABlock(int16_t* in_data, + uint8_t* bit_stream, + size_t max_bytes, + size_t* encoded_bytes) { + clock_t clocks = clock(); + int value = WebRtcOpus_Encode(opus_encoder_, in_data, input_length_sample_, + max_bytes, bit_stream); + clocks = clock() - clocks; + EXPECT_GT(value, 0); + *encoded_bytes = static_cast<size_t>(value); + return 1000.0 * clocks / CLOCKS_PER_SEC; +} + +float OpusSpeedTest::DecodeABlock(const uint8_t* bit_stream, + size_t encoded_bytes, + int16_t* out_data) { + int value; + int16_t audio_type; + clock_t clocks = clock(); + value = WebRtcOpus_Decode(opus_decoder_, bit_stream, encoded_bytes, out_data, + &audio_type); + clocks = clock() - clocks; + EXPECT_EQ(output_length_sample_, static_cast<size_t>(value)); + return 1000.0 * clocks / CLOCKS_PER_SEC; +} + +/* Test audio length in second. */ +constexpr size_t kDurationSec = 400; + +#define ADD_TEST(complexity) \ + TEST_P(OpusSpeedTest, OpusSetComplexityTest##complexity) { \ + /* Set complexity. */ \ + printf("Setting complexity to %d ...\n", complexity); \ + EXPECT_EQ(0, WebRtcOpus_SetComplexity(opus_encoder_, complexity)); \ + EncodeDecode(kDurationSec); \ + } + +ADD_TEST(10) +ADD_TEST(9) +ADD_TEST(8) +ADD_TEST(7) +ADD_TEST(6) +ADD_TEST(5) +ADD_TEST(4) +ADD_TEST(3) +ADD_TEST(2) +ADD_TEST(1) +ADD_TEST(0) + +#define ADD_BANDWIDTH_TEST(bandwidth) \ + TEST_P(OpusSpeedTest, OpusSetBandwidthTest##bandwidth) { \ + /* Set bandwidth. */ \ + printf("Setting bandwidth to %d ...\n", bandwidth); \ + EXPECT_EQ(0, WebRtcOpus_SetBandwidth(opus_encoder_, bandwidth)); \ + EncodeDecode(kDurationSec); \ + } + +ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_NARROWBAND) +ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_MEDIUMBAND) +ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_WIDEBAND) +ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_SUPERWIDEBAND) +ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_FULLBAND) + +// List all test cases: (channel, bit rat, filename, extension). +const coding_param param_set[] = { + std::make_tuple(1, + 64000, + string("audio_coding/speech_mono_32_48kHz"), + string("pcm"), + true), + std::make_tuple(1, + 32000, + string("audio_coding/speech_mono_32_48kHz"), + string("pcm"), + true), + std::make_tuple(2, + 64000, + string("audio_coding/music_stereo_48kHz"), + string("pcm"), + true)}; + +INSTANTIATE_TEST_SUITE_P(AllTest, + OpusSpeedTest, + ::testing::ValuesIn(param_set)); + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_unittest.cc new file mode 100644 index 0000000000..4a9156ad58 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/opus_unittest.cc @@ -0,0 +1,979 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <memory> +#include <string> + +#include "modules/audio_coding/codecs/opus/opus_inst.h" +#include "modules/audio_coding/codecs/opus/opus_interface.h" +#include "modules/audio_coding/neteq/tools/audio_loop.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +namespace { +// Equivalent to SDP params +// {{"channel_mapping", "0,1,2,3"}, {"coupled_streams", "2"}}. +constexpr unsigned char kQuadChannelMapping[] = {0, 1, 2, 3}; +constexpr int kQuadTotalStreams = 2; +constexpr int kQuadCoupledStreams = 2; + +constexpr unsigned char kStereoChannelMapping[] = {0, 1}; +constexpr int kStereoTotalStreams = 1; +constexpr int kStereoCoupledStreams = 1; + +constexpr unsigned char kMonoChannelMapping[] = {0}; +constexpr int kMonoTotalStreams = 1; +constexpr int kMonoCoupledStreams = 0; + +void CreateSingleOrMultiStreamEncoder(WebRtcOpusEncInst** opus_encoder, + int channels, + int application, + bool use_multistream, + int encoder_sample_rate_hz) { + EXPECT_TRUE(channels == 1 || channels == 2 || use_multistream); + if (use_multistream) { + EXPECT_EQ(encoder_sample_rate_hz, 48000); + if (channels == 1) { + EXPECT_EQ(0, WebRtcOpus_MultistreamEncoderCreate( + opus_encoder, channels, application, kMonoTotalStreams, + kMonoCoupledStreams, kMonoChannelMapping)); + } else if (channels == 2) { + EXPECT_EQ(0, WebRtcOpus_MultistreamEncoderCreate( + opus_encoder, channels, application, kStereoTotalStreams, + kStereoCoupledStreams, kStereoChannelMapping)); + } else if (channels == 4) { + EXPECT_EQ(0, WebRtcOpus_MultistreamEncoderCreate( + opus_encoder, channels, application, kQuadTotalStreams, + kQuadCoupledStreams, kQuadChannelMapping)); + } else { + EXPECT_TRUE(false) << channels; + } + } else { + EXPECT_EQ(0, WebRtcOpus_EncoderCreate(opus_encoder, channels, application, + encoder_sample_rate_hz)); + } +} + +void CreateSingleOrMultiStreamDecoder(WebRtcOpusDecInst** opus_decoder, + int channels, + bool use_multistream, + int decoder_sample_rate_hz) { + EXPECT_TRUE(channels == 1 || channels == 2 || use_multistream); + if (use_multistream) { + EXPECT_EQ(decoder_sample_rate_hz, 48000); + if (channels == 1) { + EXPECT_EQ(0, WebRtcOpus_MultistreamDecoderCreate( + opus_decoder, channels, kMonoTotalStreams, + kMonoCoupledStreams, kMonoChannelMapping)); + } else if (channels == 2) { + EXPECT_EQ(0, WebRtcOpus_MultistreamDecoderCreate( + opus_decoder, channels, kStereoTotalStreams, + kStereoCoupledStreams, kStereoChannelMapping)); + } else if (channels == 4) { + EXPECT_EQ(0, WebRtcOpus_MultistreamDecoderCreate( + opus_decoder, channels, kQuadTotalStreams, + kQuadCoupledStreams, kQuadChannelMapping)); + } else { + EXPECT_TRUE(false) << channels; + } + } else { + EXPECT_EQ(0, WebRtcOpus_DecoderCreate(opus_decoder, channels, + decoder_sample_rate_hz)); + } +} + +int SamplesPerChannel(int sample_rate_hz, int duration_ms) { + const int samples_per_ms = rtc::CheckedDivExact(sample_rate_hz, 1000); + return samples_per_ms * duration_ms; +} + +using test::AudioLoop; +using ::testing::Combine; +using ::testing::TestWithParam; +using ::testing::Values; + +// Maximum number of bytes in output bitstream. +const size_t kMaxBytes = 2000; + +class OpusTest + : public TestWithParam<::testing::tuple<size_t, int, bool, int, int>> { + protected: + OpusTest() = default; + + void TestDtxEffect(bool dtx, int block_length_ms); + + void TestCbrEffect(bool dtx, int block_length_ms); + + // Prepare `speech_data_` for encoding, read from a hard-coded file. + // After preparation, `speech_data_.GetNextBlock()` returns a pointer to a + // block of `block_length_ms` milliseconds. The data is looped every + // `loop_length_ms` milliseconds. + void PrepareSpeechData(int block_length_ms, int loop_length_ms); + + int EncodeDecode(WebRtcOpusEncInst* encoder, + rtc::ArrayView<const int16_t> input_audio, + WebRtcOpusDecInst* decoder, + int16_t* output_audio, + int16_t* audio_type); + + void SetMaxPlaybackRate(WebRtcOpusEncInst* encoder, + opus_int32 expect, + int32_t set); + + void CheckAudioBounded(const int16_t* audio, + size_t samples, + size_t channels, + uint16_t bound) const; + + WebRtcOpusEncInst* opus_encoder_ = nullptr; + WebRtcOpusDecInst* opus_decoder_ = nullptr; + AudioLoop speech_data_; + uint8_t bitstream_[kMaxBytes]; + size_t encoded_bytes_ = 0; + const size_t channels_{std::get<0>(GetParam())}; + const int application_{std::get<1>(GetParam())}; + const bool use_multistream_{std::get<2>(GetParam())}; + const int encoder_sample_rate_hz_{std::get<3>(GetParam())}; + const int decoder_sample_rate_hz_{std::get<4>(GetParam())}; +}; + +} // namespace + +// Singlestream: Try all combinations. +INSTANTIATE_TEST_SUITE_P(Singlestream, + OpusTest, + testing::Combine(testing::Values(1, 2), + testing::Values(0, 1), + testing::Values(false), + testing::Values(16000, 48000), + testing::Values(16000, 48000))); + +// Multistream: Some representative cases (only 48 kHz for now). +INSTANTIATE_TEST_SUITE_P( + Multistream, + OpusTest, + testing::Values(std::make_tuple(1, 0, true, 48000, 48000), + std::make_tuple(2, 1, true, 48000, 48000), + std::make_tuple(4, 0, true, 48000, 48000), + std::make_tuple(4, 1, true, 48000, 48000))); + +void OpusTest::PrepareSpeechData(int block_length_ms, int loop_length_ms) { + std::map<int, std::string> channel_to_basename = { + {1, "audio_coding/testfile32kHz"}, + {2, "audio_coding/teststereo32kHz"}, + {4, "audio_coding/speech_4_channels_48k_one_second"}}; + std::map<int, std::string> channel_to_suffix = { + {1, "pcm"}, {2, "pcm"}, {4, "wav"}}; + const std::string file_name = webrtc::test::ResourcePath( + channel_to_basename[channels_], channel_to_suffix[channels_]); + if (loop_length_ms < block_length_ms) { + loop_length_ms = block_length_ms; + } + const int sample_rate_khz = + rtc::CheckedDivExact(encoder_sample_rate_hz_, 1000); + EXPECT_TRUE(speech_data_.Init(file_name, + loop_length_ms * sample_rate_khz * channels_, + block_length_ms * sample_rate_khz * channels_)); +} + +void OpusTest::SetMaxPlaybackRate(WebRtcOpusEncInst* encoder, + opus_int32 expect, + int32_t set) { + opus_int32 bandwidth; + EXPECT_EQ(0, WebRtcOpus_SetMaxPlaybackRate(opus_encoder_, set)); + EXPECT_EQ(0, WebRtcOpus_GetMaxPlaybackRate(opus_encoder_, &bandwidth)); + EXPECT_EQ(expect, bandwidth); +} + +void OpusTest::CheckAudioBounded(const int16_t* audio, + size_t samples, + size_t channels, + uint16_t bound) const { + for (size_t i = 0; i < samples; ++i) { + for (size_t c = 0; c < channels; ++c) { + ASSERT_GE(audio[i * channels + c], -bound); + ASSERT_LE(audio[i * channels + c], bound); + } + } +} + +int OpusTest::EncodeDecode(WebRtcOpusEncInst* encoder, + rtc::ArrayView<const int16_t> input_audio, + WebRtcOpusDecInst* decoder, + int16_t* output_audio, + int16_t* audio_type) { + const int input_samples_per_channel = + rtc::CheckedDivExact(input_audio.size(), channels_); + int encoded_bytes_int = + WebRtcOpus_Encode(encoder, input_audio.data(), input_samples_per_channel, + kMaxBytes, bitstream_); + EXPECT_GE(encoded_bytes_int, 0); + encoded_bytes_ = static_cast<size_t>(encoded_bytes_int); + if (encoded_bytes_ != 0) { + int est_len = WebRtcOpus_DurationEst(decoder, bitstream_, encoded_bytes_); + int act_len = WebRtcOpus_Decode(decoder, bitstream_, encoded_bytes_, + output_audio, audio_type); + EXPECT_EQ(est_len, act_len); + return act_len; + } else { + int total_dtx_len = 0; + const int output_samples_per_channel = input_samples_per_channel * + decoder_sample_rate_hz_ / + encoder_sample_rate_hz_; + while (total_dtx_len < output_samples_per_channel) { + int est_len = WebRtcOpus_DurationEst(decoder, NULL, 0); + int act_len = WebRtcOpus_Decode(decoder, NULL, 0, + &output_audio[total_dtx_len * channels_], + audio_type); + EXPECT_EQ(est_len, act_len); + total_dtx_len += act_len; + } + return total_dtx_len; + } +} + +// Test if encoder/decoder can enter DTX mode properly and do not enter DTX when +// they should not. This test is signal dependent. +void OpusTest::TestDtxEffect(bool dtx, int block_length_ms) { + PrepareSpeechData(block_length_ms, 2000); + const size_t input_samples = + rtc::CheckedDivExact(encoder_sample_rate_hz_, 1000) * block_length_ms; + const size_t output_samples = + rtc::CheckedDivExact(decoder_sample_rate_hz_, 1000) * block_length_ms; + + // Create encoder memory. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_, + decoder_sample_rate_hz_); + + // Set bitrate. + EXPECT_EQ( + 0, WebRtcOpus_SetBitRate(opus_encoder_, channels_ == 1 ? 32000 : 64000)); + + // Set input audio as silence. + std::vector<int16_t> silence(input_samples * channels_, 0); + + // Setting DTX. + EXPECT_EQ(0, dtx ? WebRtcOpus_EnableDtx(opus_encoder_) + : WebRtcOpus_DisableDtx(opus_encoder_)); + + int16_t audio_type; + int16_t* output_data_decode = new int16_t[output_samples * channels_]; + + for (int i = 0; i < 100; ++i) { + EXPECT_EQ(output_samples, + static_cast<size_t>(EncodeDecode( + opus_encoder_, speech_data_.GetNextBlock(), opus_decoder_, + output_data_decode, &audio_type))); + // If not DTX, it should never enter DTX mode. If DTX, we do not care since + // whether it enters DTX depends on the signal type. + if (!dtx) { + EXPECT_GT(encoded_bytes_, 1U); + EXPECT_EQ(0, opus_encoder_->in_dtx_mode); + EXPECT_EQ(0, opus_decoder_->in_dtx_mode); + EXPECT_EQ(0, audio_type); // Speech. + } + } + + // We input some silent segments. In DTX mode, the encoder will stop sending. + // However, DTX may happen after a while. + for (int i = 0; i < 30; ++i) { + EXPECT_EQ(output_samples, static_cast<size_t>(EncodeDecode( + opus_encoder_, silence, opus_decoder_, + output_data_decode, &audio_type))); + if (!dtx) { + EXPECT_GT(encoded_bytes_, 1U); + EXPECT_EQ(0, opus_encoder_->in_dtx_mode); + EXPECT_EQ(0, opus_decoder_->in_dtx_mode); + EXPECT_EQ(0, audio_type); // Speech. + } else if (encoded_bytes_ == 1) { + EXPECT_EQ(1, opus_encoder_->in_dtx_mode); + EXPECT_EQ(1, opus_decoder_->in_dtx_mode); + EXPECT_EQ(2, audio_type); // Comfort noise. + break; + } + } + + // When Opus is in DTX, it wakes up in a regular basis. It sends two packets, + // one with an arbitrary size and the other of 1-byte, then stops sending for + // a certain number of frames. + + // `max_dtx_frames` is the maximum number of frames Opus can stay in DTX. + // TODO(kwiberg): Why does this number depend on the encoding sample rate? + const int max_dtx_frames = + (encoder_sample_rate_hz_ == 16000 ? 800 : 400) / block_length_ms + 1; + + // We run `kRunTimeMs` milliseconds of pure silence. + const int kRunTimeMs = 4500; + + // We check that, after a `kCheckTimeMs` milliseconds (given that the CNG in + // Opus needs time to adapt), the absolute values of DTX decoded signal are + // bounded by `kOutputValueBound`. + const int kCheckTimeMs = 4000; + +#if defined(OPUS_FIXED_POINT) + // Fixed-point Opus generates a random (comfort) noise, which has a less + // predictable value bound than its floating-point Opus. This value depends on + // input signal, and the time window for checking the output values (between + // `kCheckTimeMs` and `kRunTimeMs`). + const uint16_t kOutputValueBound = 30; + +#else + const uint16_t kOutputValueBound = 2; +#endif + + int time = 0; + while (time < kRunTimeMs) { + // DTX mode is maintained for maximum `max_dtx_frames` frames. + int i = 0; + for (; i < max_dtx_frames; ++i) { + time += block_length_ms; + EXPECT_EQ(output_samples, static_cast<size_t>(EncodeDecode( + opus_encoder_, silence, opus_decoder_, + output_data_decode, &audio_type))); + if (dtx) { + if (encoded_bytes_ > 1) + break; + EXPECT_EQ(0U, encoded_bytes_) // Send 0 byte. + << "Opus should have entered DTX mode."; + EXPECT_EQ(1, opus_encoder_->in_dtx_mode); + EXPECT_EQ(1, opus_decoder_->in_dtx_mode); + EXPECT_EQ(2, audio_type); // Comfort noise. + if (time >= kCheckTimeMs) { + CheckAudioBounded(output_data_decode, output_samples, channels_, + kOutputValueBound); + } + } else { + EXPECT_GT(encoded_bytes_, 1U); + EXPECT_EQ(0, opus_encoder_->in_dtx_mode); + EXPECT_EQ(0, opus_decoder_->in_dtx_mode); + EXPECT_EQ(0, audio_type); // Speech. + } + } + + if (dtx) { + // With DTX, Opus must stop transmission for some time. + EXPECT_GT(i, 1); + } + + // We expect a normal payload. + EXPECT_EQ(0, opus_encoder_->in_dtx_mode); + EXPECT_EQ(0, opus_decoder_->in_dtx_mode); + EXPECT_EQ(0, audio_type); // Speech. + + // Enters DTX again immediately. + time += block_length_ms; + EXPECT_EQ(output_samples, static_cast<size_t>(EncodeDecode( + opus_encoder_, silence, opus_decoder_, + output_data_decode, &audio_type))); + if (dtx) { + EXPECT_EQ(1U, encoded_bytes_); // Send 1 byte. + EXPECT_EQ(1, opus_encoder_->in_dtx_mode); + EXPECT_EQ(1, opus_decoder_->in_dtx_mode); + EXPECT_EQ(2, audio_type); // Comfort noise. + if (time >= kCheckTimeMs) { + CheckAudioBounded(output_data_decode, output_samples, channels_, + kOutputValueBound); + } + } else { + EXPECT_GT(encoded_bytes_, 1U); + EXPECT_EQ(0, opus_encoder_->in_dtx_mode); + EXPECT_EQ(0, opus_decoder_->in_dtx_mode); + EXPECT_EQ(0, audio_type); // Speech. + } + } + + silence[0] = 10000; + if (dtx) { + // Verify that encoder/decoder can jump out from DTX mode. + EXPECT_EQ(output_samples, static_cast<size_t>(EncodeDecode( + opus_encoder_, silence, opus_decoder_, + output_data_decode, &audio_type))); + EXPECT_GT(encoded_bytes_, 1U); + EXPECT_EQ(0, opus_encoder_->in_dtx_mode); + EXPECT_EQ(0, opus_decoder_->in_dtx_mode); + EXPECT_EQ(0, audio_type); // Speech. + } + + // Free memory. + delete[] output_data_decode; + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); +} + +// Test if CBR does what we expect. +void OpusTest::TestCbrEffect(bool cbr, int block_length_ms) { + PrepareSpeechData(block_length_ms, 2000); + const size_t output_samples = + rtc::CheckedDivExact(decoder_sample_rate_hz_, 1000) * block_length_ms; + + int32_t max_pkt_size_diff = 0; + int32_t prev_pkt_size = 0; + + // Create encoder memory. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_, + decoder_sample_rate_hz_); + + // Set bitrate. + EXPECT_EQ( + 0, WebRtcOpus_SetBitRate(opus_encoder_, channels_ == 1 ? 32000 : 64000)); + + // Setting CBR. + EXPECT_EQ(0, cbr ? WebRtcOpus_EnableCbr(opus_encoder_) + : WebRtcOpus_DisableCbr(opus_encoder_)); + + int16_t audio_type; + std::vector<int16_t> audio_out(output_samples * channels_); + for (int i = 0; i < 100; ++i) { + EXPECT_EQ(output_samples, + static_cast<size_t>( + EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(), + opus_decoder_, audio_out.data(), &audio_type))); + + if (prev_pkt_size > 0) { + int32_t diff = std::abs((int32_t)encoded_bytes_ - prev_pkt_size); + max_pkt_size_diff = std::max(max_pkt_size_diff, diff); + } + prev_pkt_size = rtc::checked_cast<int32_t>(encoded_bytes_); + } + + if (cbr) { + EXPECT_EQ(max_pkt_size_diff, 0); + } else { + EXPECT_GT(max_pkt_size_diff, 0); + } + + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); +} + +// Test failing Create. +TEST(OpusTest, OpusCreateFail) { + WebRtcOpusEncInst* opus_encoder; + WebRtcOpusDecInst* opus_decoder; + + // Test to see that an invalid pointer is caught. + EXPECT_EQ(-1, WebRtcOpus_EncoderCreate(NULL, 1, 0, 48000)); + // Invalid channel number. + EXPECT_EQ(-1, WebRtcOpus_EncoderCreate(&opus_encoder, 257, 0, 48000)); + // Invalid applciation mode. + EXPECT_EQ(-1, WebRtcOpus_EncoderCreate(&opus_encoder, 1, 2, 48000)); + // Invalid sample rate. + EXPECT_EQ(-1, WebRtcOpus_EncoderCreate(&opus_encoder, 1, 0, 12345)); + + EXPECT_EQ(-1, WebRtcOpus_DecoderCreate(NULL, 1, 48000)); + // Invalid channel number. + EXPECT_EQ(-1, WebRtcOpus_DecoderCreate(&opus_decoder, 257, 48000)); + // Invalid sample rate. + EXPECT_EQ(-1, WebRtcOpus_DecoderCreate(&opus_decoder, 1, 12345)); +} + +// Test failing Free. +TEST(OpusTest, OpusFreeFail) { + // Test to see that an invalid pointer is caught. + EXPECT_EQ(-1, WebRtcOpus_EncoderFree(NULL)); + EXPECT_EQ(-1, WebRtcOpus_DecoderFree(NULL)); +} + +// Test normal Create and Free. +TEST_P(OpusTest, OpusCreateFree) { + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_, + decoder_sample_rate_hz_); + EXPECT_TRUE(opus_encoder_ != NULL); + EXPECT_TRUE(opus_decoder_ != NULL); + // Free encoder and decoder memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); +} + +#define ENCODER_CTL(inst, vargs) \ + inst->encoder \ + ? opus_encoder_ctl(inst->encoder, vargs) \ + : opus_multistream_encoder_ctl(inst->multistream_encoder, vargs) + +TEST_P(OpusTest, OpusEncodeDecode) { + PrepareSpeechData(20, 20); + + // Create encoder memory. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_, + decoder_sample_rate_hz_); + + // Set bitrate. + EXPECT_EQ( + 0, WebRtcOpus_SetBitRate(opus_encoder_, channels_ == 1 ? 32000 : 64000)); + + // Check number of channels for decoder. + EXPECT_EQ(channels_, WebRtcOpus_DecoderChannels(opus_decoder_)); + + // Check application mode. + opus_int32 app; + ENCODER_CTL(opus_encoder_, OPUS_GET_APPLICATION(&app)); + EXPECT_EQ(application_ == 0 ? OPUS_APPLICATION_VOIP : OPUS_APPLICATION_AUDIO, + app); + + // Encode & decode. + int16_t audio_type; + const int decode_samples_per_channel = + SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20); + int16_t* output_data_decode = + new int16_t[decode_samples_per_channel * channels_]; + EXPECT_EQ(decode_samples_per_channel, + EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(), + opus_decoder_, output_data_decode, &audio_type)); + + // Free memory. + delete[] output_data_decode; + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); +} + +TEST_P(OpusTest, OpusSetBitRate) { + // Test without creating encoder memory. + EXPECT_EQ(-1, WebRtcOpus_SetBitRate(opus_encoder_, 60000)); + + // Create encoder memory, try with different bitrates. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, 30000)); + EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, 60000)); + EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, 300000)); + EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, 600000)); + + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); +} + +TEST_P(OpusTest, OpusSetComplexity) { + // Test without creating encoder memory. + EXPECT_EQ(-1, WebRtcOpus_SetComplexity(opus_encoder_, 9)); + + // Create encoder memory, try with different complexities. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + + EXPECT_EQ(0, WebRtcOpus_SetComplexity(opus_encoder_, 0)); + EXPECT_EQ(0, WebRtcOpus_SetComplexity(opus_encoder_, 10)); + EXPECT_EQ(-1, WebRtcOpus_SetComplexity(opus_encoder_, 11)); + + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); +} + +TEST_P(OpusTest, OpusSetBandwidth) { + if (channels_ > 2) { + // TODO(webrtc:10217): investigate why multi-stream Opus reports + // narrowband when it's configured with FULLBAND. + return; + } + PrepareSpeechData(20, 20); + + int16_t audio_type; + const int decode_samples_per_channel = + SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20); + std::unique_ptr<int16_t[]> output_data_decode( + new int16_t[decode_samples_per_channel * channels_]()); + + // Test without creating encoder memory. + EXPECT_EQ(-1, + WebRtcOpus_SetBandwidth(opus_encoder_, OPUS_BANDWIDTH_NARROWBAND)); + EXPECT_EQ(-1, WebRtcOpus_GetBandwidth(opus_encoder_)); + + // Create encoder memory, try with different bandwidths. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_, + decoder_sample_rate_hz_); + + EXPECT_EQ(-1, WebRtcOpus_SetBandwidth(opus_encoder_, + OPUS_BANDWIDTH_NARROWBAND - 1)); + EXPECT_EQ(0, + WebRtcOpus_SetBandwidth(opus_encoder_, OPUS_BANDWIDTH_NARROWBAND)); + EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(), opus_decoder_, + output_data_decode.get(), &audio_type); + EXPECT_EQ(OPUS_BANDWIDTH_NARROWBAND, WebRtcOpus_GetBandwidth(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_SetBandwidth(opus_encoder_, OPUS_BANDWIDTH_FULLBAND)); + EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(), opus_decoder_, + output_data_decode.get(), &audio_type); + EXPECT_EQ(encoder_sample_rate_hz_ == 16000 ? OPUS_BANDWIDTH_WIDEBAND + : OPUS_BANDWIDTH_FULLBAND, + WebRtcOpus_GetBandwidth(opus_encoder_)); + EXPECT_EQ( + -1, WebRtcOpus_SetBandwidth(opus_encoder_, OPUS_BANDWIDTH_FULLBAND + 1)); + EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(), opus_decoder_, + output_data_decode.get(), &audio_type); + EXPECT_EQ(encoder_sample_rate_hz_ == 16000 ? OPUS_BANDWIDTH_WIDEBAND + : OPUS_BANDWIDTH_FULLBAND, + WebRtcOpus_GetBandwidth(opus_encoder_)); + + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); +} + +TEST_P(OpusTest, OpusForceChannels) { + // Test without creating encoder memory. + EXPECT_EQ(-1, WebRtcOpus_SetForceChannels(opus_encoder_, 1)); + + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + ASSERT_NE(nullptr, opus_encoder_); + + if (channels_ >= 2) { + EXPECT_EQ(-1, WebRtcOpus_SetForceChannels(opus_encoder_, 3)); + EXPECT_EQ(0, WebRtcOpus_SetForceChannels(opus_encoder_, 2)); + EXPECT_EQ(0, WebRtcOpus_SetForceChannels(opus_encoder_, 1)); + EXPECT_EQ(0, WebRtcOpus_SetForceChannels(opus_encoder_, 0)); + } else { + EXPECT_EQ(-1, WebRtcOpus_SetForceChannels(opus_encoder_, 2)); + EXPECT_EQ(0, WebRtcOpus_SetForceChannels(opus_encoder_, 1)); + EXPECT_EQ(0, WebRtcOpus_SetForceChannels(opus_encoder_, 0)); + } + + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); +} + +// Encode and decode one frame, initialize the decoder and +// decode once more. +TEST_P(OpusTest, OpusDecodeInit) { + PrepareSpeechData(20, 20); + + // Create encoder memory. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_, + decoder_sample_rate_hz_); + + // Encode & decode. + int16_t audio_type; + const int decode_samples_per_channel = + SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20); + int16_t* output_data_decode = + new int16_t[decode_samples_per_channel * channels_]; + EXPECT_EQ(decode_samples_per_channel, + EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(), + opus_decoder_, output_data_decode, &audio_type)); + + WebRtcOpus_DecoderInit(opus_decoder_); + + EXPECT_EQ(decode_samples_per_channel, + WebRtcOpus_Decode(opus_decoder_, bitstream_, encoded_bytes_, + output_data_decode, &audio_type)); + + // Free memory. + delete[] output_data_decode; + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); +} + +TEST_P(OpusTest, OpusEnableDisableFec) { + // Test without creating encoder memory. + EXPECT_EQ(-1, WebRtcOpus_EnableFec(opus_encoder_)); + EXPECT_EQ(-1, WebRtcOpus_DisableFec(opus_encoder_)); + + // Create encoder memory. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + + EXPECT_EQ(0, WebRtcOpus_EnableFec(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DisableFec(opus_encoder_)); + + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); +} + +TEST_P(OpusTest, OpusEnableDisableDtx) { + // Test without creating encoder memory. + EXPECT_EQ(-1, WebRtcOpus_EnableDtx(opus_encoder_)); + EXPECT_EQ(-1, WebRtcOpus_DisableDtx(opus_encoder_)); + + // Create encoder memory. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + + opus_int32 dtx; + + // DTX is off by default. + ENCODER_CTL(opus_encoder_, OPUS_GET_DTX(&dtx)); + EXPECT_EQ(0, dtx); + + // Test to enable DTX. + EXPECT_EQ(0, WebRtcOpus_EnableDtx(opus_encoder_)); + ENCODER_CTL(opus_encoder_, OPUS_GET_DTX(&dtx)); + EXPECT_EQ(1, dtx); + + // Test to disable DTX. + EXPECT_EQ(0, WebRtcOpus_DisableDtx(opus_encoder_)); + ENCODER_CTL(opus_encoder_, OPUS_GET_DTX(&dtx)); + EXPECT_EQ(0, dtx); + + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); +} + +TEST_P(OpusTest, OpusDtxOff) { + TestDtxEffect(false, 10); + TestDtxEffect(false, 20); + TestDtxEffect(false, 40); +} + +TEST_P(OpusTest, OpusDtxOn) { + if (channels_ > 2 || application_ != 0) { + // DTX does not work with OPUS_APPLICATION_AUDIO at low complexity settings. + // TODO(webrtc:10218): adapt the test to the sizes and order of multi-stream + // DTX packets. + return; + } + TestDtxEffect(true, 10); + TestDtxEffect(true, 20); + TestDtxEffect(true, 40); +} + +TEST_P(OpusTest, OpusCbrOff) { + TestCbrEffect(false, 10); + TestCbrEffect(false, 20); + TestCbrEffect(false, 40); +} + +TEST_P(OpusTest, OpusCbrOn) { + TestCbrEffect(true, 10); + TestCbrEffect(true, 20); + TestCbrEffect(true, 40); +} + +TEST_P(OpusTest, OpusSetPacketLossRate) { + // Test without creating encoder memory. + EXPECT_EQ(-1, WebRtcOpus_SetPacketLossRate(opus_encoder_, 50)); + + // Create encoder memory. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + + EXPECT_EQ(0, WebRtcOpus_SetPacketLossRate(opus_encoder_, 50)); + EXPECT_EQ(-1, WebRtcOpus_SetPacketLossRate(opus_encoder_, -1)); + EXPECT_EQ(-1, WebRtcOpus_SetPacketLossRate(opus_encoder_, 101)); + + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); +} + +TEST_P(OpusTest, OpusSetMaxPlaybackRate) { + // Test without creating encoder memory. + EXPECT_EQ(-1, WebRtcOpus_SetMaxPlaybackRate(opus_encoder_, 20000)); + + // Create encoder memory. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + + SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_FULLBAND, 48000); + SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_FULLBAND, 24001); + SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_SUPERWIDEBAND, 24000); + SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_SUPERWIDEBAND, 16001); + SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_WIDEBAND, 16000); + SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_WIDEBAND, 12001); + SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_MEDIUMBAND, 12000); + SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_MEDIUMBAND, 8001); + SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_NARROWBAND, 8000); + SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_NARROWBAND, 4000); + + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); +} + +// Test PLC. +TEST_P(OpusTest, OpusDecodePlc) { + PrepareSpeechData(20, 20); + + // Create encoder memory. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_, + decoder_sample_rate_hz_); + + // Set bitrate. + EXPECT_EQ( + 0, WebRtcOpus_SetBitRate(opus_encoder_, channels_ == 1 ? 32000 : 64000)); + + // Check number of channels for decoder. + EXPECT_EQ(channels_, WebRtcOpus_DecoderChannels(opus_decoder_)); + + // Encode & decode. + int16_t audio_type; + const int decode_samples_per_channel = + SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20); + int16_t* output_data_decode = + new int16_t[decode_samples_per_channel * channels_]; + EXPECT_EQ(decode_samples_per_channel, + EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(), + opus_decoder_, output_data_decode, &audio_type)); + + // Call decoder PLC. + constexpr int kPlcDurationMs = 10; + const int plc_samples = decoder_sample_rate_hz_ * kPlcDurationMs / 1000; + int16_t* plc_buffer = new int16_t[plc_samples * channels_]; + EXPECT_EQ(plc_samples, + WebRtcOpus_Decode(opus_decoder_, NULL, 0, plc_buffer, &audio_type)); + + // Free memory. + delete[] plc_buffer; + delete[] output_data_decode; + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); +} + +// Duration estimation. +TEST_P(OpusTest, OpusDurationEstimation) { + PrepareSpeechData(20, 20); + + // Create. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_, + decoder_sample_rate_hz_); + + // 10 ms. We use only first 10 ms of a 20 ms block. + auto speech_block = speech_data_.GetNextBlock(); + int encoded_bytes_int = WebRtcOpus_Encode( + opus_encoder_, speech_block.data(), + rtc::CheckedDivExact(speech_block.size(), 2 * channels_), kMaxBytes, + bitstream_); + EXPECT_GE(encoded_bytes_int, 0); + EXPECT_EQ(SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/10), + WebRtcOpus_DurationEst(opus_decoder_, bitstream_, + static_cast<size_t>(encoded_bytes_int))); + + // 20 ms + speech_block = speech_data_.GetNextBlock(); + encoded_bytes_int = + WebRtcOpus_Encode(opus_encoder_, speech_block.data(), + rtc::CheckedDivExact(speech_block.size(), channels_), + kMaxBytes, bitstream_); + EXPECT_GE(encoded_bytes_int, 0); + EXPECT_EQ(SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20), + WebRtcOpus_DurationEst(opus_decoder_, bitstream_, + static_cast<size_t>(encoded_bytes_int))); + + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); +} + +TEST_P(OpusTest, OpusDecodeRepacketized) { + if (channels_ > 2) { + // As per the Opus documentation + // https://mf4.xiph.org/jenkins/view/opus/job/opus/ws/doc/html/group__opus__repacketizer.html#details, + // multiple streams are not supported. + return; + } + constexpr size_t kPackets = 6; + + PrepareSpeechData(20, 20 * kPackets); + + // Create encoder memory. + CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_, + use_multistream_, encoder_sample_rate_hz_); + ASSERT_NE(nullptr, opus_encoder_); + CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_, + decoder_sample_rate_hz_); + ASSERT_NE(nullptr, opus_decoder_); + + // Set bitrate. + EXPECT_EQ( + 0, WebRtcOpus_SetBitRate(opus_encoder_, channels_ == 1 ? 32000 : 64000)); + + // Check number of channels for decoder. + EXPECT_EQ(channels_, WebRtcOpus_DecoderChannels(opus_decoder_)); + + // Encode & decode. + int16_t audio_type; + const int decode_samples_per_channel = + SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20); + std::unique_ptr<int16_t[]> output_data_decode( + new int16_t[kPackets * decode_samples_per_channel * channels_]); + OpusRepacketizer* rp = opus_repacketizer_create(); + + size_t num_packets = 0; + constexpr size_t kMaxCycles = 100; + for (size_t idx = 0; idx < kMaxCycles; ++idx) { + auto speech_block = speech_data_.GetNextBlock(); + encoded_bytes_ = + WebRtcOpus_Encode(opus_encoder_, speech_block.data(), + rtc::CheckedDivExact(speech_block.size(), channels_), + kMaxBytes, bitstream_); + if (opus_repacketizer_cat(rp, bitstream_, + rtc::checked_cast<opus_int32>(encoded_bytes_)) == + OPUS_OK) { + ++num_packets; + if (num_packets == kPackets) { + break; + } + } else { + // Opus repacketizer cannot guarantee a success. We try again if it fails. + opus_repacketizer_init(rp); + num_packets = 0; + } + } + EXPECT_EQ(kPackets, num_packets); + + encoded_bytes_ = opus_repacketizer_out(rp, bitstream_, kMaxBytes); + + EXPECT_EQ(decode_samples_per_channel * kPackets, + static_cast<size_t>(WebRtcOpus_DurationEst( + opus_decoder_, bitstream_, encoded_bytes_))); + + EXPECT_EQ(decode_samples_per_channel * kPackets, + static_cast<size_t>( + WebRtcOpus_Decode(opus_decoder_, bitstream_, encoded_bytes_, + output_data_decode.get(), &audio_type))); + + // Free memory. + opus_repacketizer_destroy(rp); + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); +} + +TEST(OpusVadTest, CeltUnknownStatus) { + const uint8_t celt[] = {0x80}; + EXPECT_EQ(WebRtcOpus_PacketHasVoiceActivity(celt, 1), -1); +} + +TEST(OpusVadTest, Mono20msVadSet) { + uint8_t silk20msMonoVad[] = {0x78, 0x80}; + EXPECT_TRUE(WebRtcOpus_PacketHasVoiceActivity(silk20msMonoVad, 2)); +} + +TEST(OpusVadTest, Mono20MsVadUnset) { + uint8_t silk20msMonoSilence[] = {0x78, 0x00}; + EXPECT_FALSE(WebRtcOpus_PacketHasVoiceActivity(silk20msMonoSilence, 2)); +} + +TEST(OpusVadTest, Stereo20MsVadOnSideChannel) { + uint8_t silk20msStereoVadSideChannel[] = {0x78 | 0x04, 0x20}; + EXPECT_TRUE( + WebRtcOpus_PacketHasVoiceActivity(silk20msStereoVadSideChannel, 2)); +} + +TEST(OpusVadTest, TwoOpusMonoFramesVadOnSecond) { + uint8_t twoMonoFrames[] = {0x78 | 0x1, 0x00, 0x80}; + EXPECT_TRUE(WebRtcOpus_PacketHasVoiceActivity(twoMonoFrames, 3)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/BUILD.gn b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/BUILD.gn new file mode 100644 index 0000000000..8bc0bf5e0e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/BUILD.gn @@ -0,0 +1,55 @@ +# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../../../webrtc.gni") + +visibility = [ + ":*", + "../../../:*", +] + +if (rtc_include_tests) { + rtc_library("test") { + testonly = true + + sources = [ + "audio_ring_buffer.cc", + "audio_ring_buffer.h", + "blocker.cc", + "blocker.h", + "lapped_transform.cc", + "lapped_transform.h", + ] + + deps = [ + "../../../../../common_audio", + "../../../../../common_audio:common_audio_c", + "../../../../../rtc_base:checks", + "../../../../../rtc_base/memory:aligned_malloc", + ] + } + + rtc_library("test_unittest") { + testonly = true + + sources = [ + "audio_ring_buffer_unittest.cc", + "blocker_unittest.cc", + "lapped_transform_unittest.cc", + ] + + deps = [ + ":test", + "../../../../../common_audio", + "../../../../../common_audio:common_audio_c", + "../../../../../rtc_base:macromagic", + "../../../../../test:test_support", + "//testing/gtest", + ] + } +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/audio_ring_buffer.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/audio_ring_buffer.cc new file mode 100644 index 0000000000..2a71b43d2c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/audio_ring_buffer.cc @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/test/audio_ring_buffer.h" + +#include "common_audio/ring_buffer.h" +#include "rtc_base/checks.h" + +// This is a simple multi-channel wrapper over the ring_buffer.h C interface. + +namespace webrtc { + +AudioRingBuffer::AudioRingBuffer(size_t channels, size_t max_frames) { + buffers_.reserve(channels); + for (size_t i = 0; i < channels; ++i) + buffers_.push_back(WebRtc_CreateBuffer(max_frames, sizeof(float))); +} + +AudioRingBuffer::~AudioRingBuffer() { + for (auto* buf : buffers_) + WebRtc_FreeBuffer(buf); +} + +void AudioRingBuffer::Write(const float* const* data, + size_t channels, + size_t frames) { + RTC_DCHECK_EQ(buffers_.size(), channels); + for (size_t i = 0; i < channels; ++i) { + const size_t written = WebRtc_WriteBuffer(buffers_[i], data[i], frames); + RTC_CHECK_EQ(written, frames); + } +} + +void AudioRingBuffer::Read(float* const* data, size_t channels, size_t frames) { + RTC_DCHECK_EQ(buffers_.size(), channels); + for (size_t i = 0; i < channels; ++i) { + const size_t read = + WebRtc_ReadBuffer(buffers_[i], nullptr, data[i], frames); + RTC_CHECK_EQ(read, frames); + } +} + +size_t AudioRingBuffer::ReadFramesAvailable() const { + // All buffers have the same amount available. + return WebRtc_available_read(buffers_[0]); +} + +size_t AudioRingBuffer::WriteFramesAvailable() const { + // All buffers have the same amount available. + return WebRtc_available_write(buffers_[0]); +} + +void AudioRingBuffer::MoveReadPositionForward(size_t frames) { + for (auto* buf : buffers_) { + const size_t moved = + static_cast<size_t>(WebRtc_MoveReadPtr(buf, static_cast<int>(frames))); + RTC_CHECK_EQ(moved, frames); + } +} + +void AudioRingBuffer::MoveReadPositionBackward(size_t frames) { + for (auto* buf : buffers_) { + const size_t moved = static_cast<size_t>( + -WebRtc_MoveReadPtr(buf, -static_cast<int>(frames))); + RTC_CHECK_EQ(moved, frames); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/audio_ring_buffer.h b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/audio_ring_buffer.h new file mode 100644 index 0000000000..a280ca2410 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/audio_ring_buffer.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_TEST_AUDIO_RING_BUFFER_H_ +#define MODULES_AUDIO_CODING_CODECS_OPUS_TEST_AUDIO_RING_BUFFER_H_ + +#include <stddef.h> + +#include <memory> +#include <vector> + +struct RingBuffer; + +namespace webrtc { + +// A ring buffer tailored for float deinterleaved audio. Any operation that +// cannot be performed as requested will cause a crash (e.g. insufficient data +// in the buffer to fulfill a read request.) +class AudioRingBuffer final { + public: + // Specify the number of channels and maximum number of frames the buffer will + // contain. + AudioRingBuffer(size_t channels, size_t max_frames); + ~AudioRingBuffer(); + + // Copies `data` to the buffer and advances the write pointer. `channels` must + // be the same as at creation time. + void Write(const float* const* data, size_t channels, size_t frames); + + // Copies from the buffer to `data` and advances the read pointer. `channels` + // must be the same as at creation time. + void Read(float* const* data, size_t channels, size_t frames); + + size_t ReadFramesAvailable() const; + size_t WriteFramesAvailable() const; + + // Moves the read position. The forward version advances the read pointer + // towards the write pointer and the backward verison withdraws the read + // pointer away from the write pointer (i.e. flushing and stuffing the buffer + // respectively.) + void MoveReadPositionForward(size_t frames); + void MoveReadPositionBackward(size_t frames); + + private: + // TODO(kwiberg): Use std::vector<std::unique_ptr<RingBuffer>> instead. + std::vector<RingBuffer*> buffers_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_OPUS_TEST_AUDIO_RING_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/audio_ring_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/audio_ring_buffer_unittest.cc new file mode 100644 index 0000000000..6dbc8ee9fe --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/audio_ring_buffer_unittest.cc @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/test/audio_ring_buffer.h" + +#include <memory> + +#include "common_audio/channel_buffer.h" +#include "test/gtest.h" + +namespace webrtc { + +class AudioRingBufferTest + : public ::testing::TestWithParam< ::testing::tuple<int, int, int, int> > { +}; + +void ReadAndWriteTest(const ChannelBuffer<float>& input, + size_t num_write_chunk_frames, + size_t num_read_chunk_frames, + size_t buffer_frames, + ChannelBuffer<float>* output) { + const size_t num_channels = input.num_channels(); + const size_t total_frames = input.num_frames(); + AudioRingBuffer buf(num_channels, buffer_frames); + std::unique_ptr<float*[]> slice(new float*[num_channels]); + + size_t input_pos = 0; + size_t output_pos = 0; + while (input_pos + buf.WriteFramesAvailable() < total_frames) { + // Write until the buffer is as full as possible. + while (buf.WriteFramesAvailable() >= num_write_chunk_frames) { + buf.Write(input.Slice(slice.get(), input_pos), num_channels, + num_write_chunk_frames); + input_pos += num_write_chunk_frames; + } + // Read until the buffer is as empty as possible. + while (buf.ReadFramesAvailable() >= num_read_chunk_frames) { + EXPECT_LT(output_pos, total_frames); + buf.Read(output->Slice(slice.get(), output_pos), num_channels, + num_read_chunk_frames); + output_pos += num_read_chunk_frames; + } + } + + // Write and read the last bit. + if (input_pos < total_frames) { + buf.Write(input.Slice(slice.get(), input_pos), num_channels, + total_frames - input_pos); + } + if (buf.ReadFramesAvailable()) { + buf.Read(output->Slice(slice.get(), output_pos), num_channels, + buf.ReadFramesAvailable()); + } + EXPECT_EQ(0u, buf.ReadFramesAvailable()); +} + +TEST_P(AudioRingBufferTest, ReadDataMatchesWrittenData) { + const size_t kFrames = 5000; + const size_t num_channels = ::testing::get<3>(GetParam()); + + // Initialize the input data to an increasing sequence. + ChannelBuffer<float> input(kFrames, static_cast<int>(num_channels)); + for (size_t i = 0; i < num_channels; ++i) + for (size_t j = 0; j < kFrames; ++j) + input.channels()[i][j] = (i + 1) * (j + 1); + + ChannelBuffer<float> output(kFrames, static_cast<int>(num_channels)); + ReadAndWriteTest(input, ::testing::get<0>(GetParam()), + ::testing::get<1>(GetParam()), ::testing::get<2>(GetParam()), + &output); + + // Verify the read data matches the input. + for (size_t i = 0; i < num_channels; ++i) + for (size_t j = 0; j < kFrames; ++j) + EXPECT_EQ(input.channels()[i][j], output.channels()[i][j]); +} + +INSTANTIATE_TEST_SUITE_P( + AudioRingBufferTest, + AudioRingBufferTest, + ::testing::Combine(::testing::Values(10, 20, 42), // num_write_chunk_frames + ::testing::Values(1, 10, 17), // num_read_chunk_frames + ::testing::Values(100, 256), // buffer_frames + ::testing::Values(1, 4))); // num_channels + +TEST_F(AudioRingBufferTest, MoveReadPosition) { + const size_t kNumChannels = 1; + const float kInputArray[] = {1, 2, 3, 4}; + const size_t kNumFrames = sizeof(kInputArray) / sizeof(*kInputArray); + ChannelBuffer<float> input(kNumFrames, kNumChannels); + input.SetDataForTesting(kInputArray, kNumFrames); + AudioRingBuffer buf(kNumChannels, kNumFrames); + buf.Write(input.channels(), kNumChannels, kNumFrames); + + buf.MoveReadPositionForward(3); + ChannelBuffer<float> output(1, kNumChannels); + buf.Read(output.channels(), kNumChannels, 1); + EXPECT_EQ(4, output.channels()[0][0]); + buf.MoveReadPositionBackward(3); + buf.Read(output.channels(), kNumChannels, 1); + EXPECT_EQ(2, output.channels()[0][0]); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/blocker.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/blocker.cc new file mode 100644 index 0000000000..33406cead9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/blocker.cc @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/test/blocker.h" + +#include <string.h> + +#include "rtc_base/checks.h" + +namespace { + +// Adds `a` and `b` frame by frame into `result` (basically matrix addition). +void AddFrames(const float* const* a, + size_t a_start_index, + const float* const* b, + int b_start_index, + size_t num_frames, + size_t num_channels, + float* const* result, + size_t result_start_index) { + for (size_t i = 0; i < num_channels; ++i) { + for (size_t j = 0; j < num_frames; ++j) { + result[i][j + result_start_index] = + a[i][j + a_start_index] + b[i][j + b_start_index]; + } + } +} + +// Copies `src` into `dst` channel by channel. +void CopyFrames(const float* const* src, + size_t src_start_index, + size_t num_frames, + size_t num_channels, + float* const* dst, + size_t dst_start_index) { + for (size_t i = 0; i < num_channels; ++i) { + memcpy(&dst[i][dst_start_index], &src[i][src_start_index], + num_frames * sizeof(dst[i][dst_start_index])); + } +} + +// Moves `src` into `dst` channel by channel. +void MoveFrames(const float* const* src, + size_t src_start_index, + size_t num_frames, + size_t num_channels, + float* const* dst, + size_t dst_start_index) { + for (size_t i = 0; i < num_channels; ++i) { + memmove(&dst[i][dst_start_index], &src[i][src_start_index], + num_frames * sizeof(dst[i][dst_start_index])); + } +} + +void ZeroOut(float* const* buffer, + size_t starting_idx, + size_t num_frames, + size_t num_channels) { + for (size_t i = 0; i < num_channels; ++i) { + memset(&buffer[i][starting_idx], 0, + num_frames * sizeof(buffer[i][starting_idx])); + } +} + +// Pointwise multiplies each channel of `frames` with `window`. Results are +// stored in `frames`. +void ApplyWindow(const float* window, + size_t num_frames, + size_t num_channels, + float* const* frames) { + for (size_t i = 0; i < num_channels; ++i) { + for (size_t j = 0; j < num_frames; ++j) { + frames[i][j] = frames[i][j] * window[j]; + } + } +} + +size_t gcd(size_t a, size_t b) { + size_t tmp; + while (b) { + tmp = a; + a = b; + b = tmp % b; + } + return a; +} + +} // namespace + +namespace webrtc { + +Blocker::Blocker(size_t chunk_size, + size_t block_size, + size_t num_input_channels, + size_t num_output_channels, + const float* window, + size_t shift_amount, + BlockerCallback* callback) + : chunk_size_(chunk_size), + block_size_(block_size), + num_input_channels_(num_input_channels), + num_output_channels_(num_output_channels), + initial_delay_(block_size_ - gcd(chunk_size, shift_amount)), + frame_offset_(0), + input_buffer_(num_input_channels_, chunk_size_ + initial_delay_), + output_buffer_(chunk_size_ + initial_delay_, num_output_channels_), + input_block_(block_size_, num_input_channels_), + output_block_(block_size_, num_output_channels_), + window_(new float[block_size_]), + shift_amount_(shift_amount), + callback_(callback) { + RTC_CHECK_LE(num_output_channels_, num_input_channels_); + RTC_CHECK_LE(shift_amount_, block_size_); + + memcpy(window_.get(), window, block_size_ * sizeof(*window_.get())); + input_buffer_.MoveReadPositionBackward(initial_delay_); +} + +Blocker::~Blocker() = default; + +// When block_size < chunk_size the input and output buffers look like this: +// +// delay* chunk_size chunk_size + delay* +// buffer: <-------------|---------------------|---------------|> +// _a_ _b_ _c_ +// +// On each call to ProcessChunk(): +// 1. New input gets read into sections _b_ and _c_ of the input buffer. +// 2. We block starting from frame_offset. +// 3. We block until we reach a block `bl` that doesn't contain any frames +// from sections _a_ or _b_ of the input buffer. +// 4. We window the current block, fire the callback for processing, window +// again, and overlap/add to the output buffer. +// 5. We copy sections _a_ and _b_ of the output buffer into output. +// 6. For both the input and the output buffers, we copy section _c_ into +// section _a_. +// 7. We set the new frame_offset to be the difference between the first frame +// of `bl` and the border between sections _b_ and _c_. +// +// When block_size > chunk_size the input and output buffers look like this: +// +// chunk_size delay* chunk_size + delay* +// buffer: <-------------|---------------------|---------------|> +// _a_ _b_ _c_ +// +// On each call to ProcessChunk(): +// The procedure is the same as above, except for: +// 1. New input gets read into section _c_ of the input buffer. +// 3. We block until we reach a block `bl` that doesn't contain any frames +// from section _a_ of the input buffer. +// 5. We copy section _a_ of the output buffer into output. +// 6. For both the input and the output buffers, we copy sections _b_ and _c_ +// into section _a_ and _b_. +// 7. We set the new frame_offset to be the difference between the first frame +// of `bl` and the border between sections _a_ and _b_. +// +// * delay here refers to inintial_delay_ +// +// TODO(claguna): Look at using ring buffers to eliminate some copies. +void Blocker::ProcessChunk(const float* const* input, + size_t chunk_size, + size_t num_input_channels, + size_t num_output_channels, + float* const* output) { + RTC_CHECK_EQ(chunk_size, chunk_size_); + RTC_CHECK_EQ(num_input_channels, num_input_channels_); + RTC_CHECK_EQ(num_output_channels, num_output_channels_); + + input_buffer_.Write(input, num_input_channels, chunk_size_); + size_t first_frame_in_block = frame_offset_; + + // Loop through blocks. + while (first_frame_in_block < chunk_size_) { + input_buffer_.Read(input_block_.channels(), num_input_channels, + block_size_); + input_buffer_.MoveReadPositionBackward(block_size_ - shift_amount_); + + ApplyWindow(window_.get(), block_size_, num_input_channels_, + input_block_.channels()); + callback_->ProcessBlock(input_block_.channels(), block_size_, + num_input_channels_, num_output_channels_, + output_block_.channels()); + ApplyWindow(window_.get(), block_size_, num_output_channels_, + output_block_.channels()); + + AddFrames(output_buffer_.channels(), first_frame_in_block, + output_block_.channels(), 0, block_size_, num_output_channels_, + output_buffer_.channels(), first_frame_in_block); + + first_frame_in_block += shift_amount_; + } + + // Copy output buffer to output + CopyFrames(output_buffer_.channels(), 0, chunk_size_, num_output_channels_, + output, 0); + + // Copy output buffer [chunk_size_, chunk_size_ + initial_delay] + // to output buffer [0, initial_delay], zero the rest. + MoveFrames(output_buffer_.channels(), chunk_size, initial_delay_, + num_output_channels_, output_buffer_.channels(), 0); + ZeroOut(output_buffer_.channels(), initial_delay_, chunk_size_, + num_output_channels_); + + // Calculate new starting frames. + frame_offset_ = first_frame_in_block - chunk_size_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/blocker.h b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/blocker.h new file mode 100644 index 0000000000..59b7e29621 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/blocker.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_TEST_BLOCKER_H_ +#define MODULES_AUDIO_CODING_CODECS_OPUS_TEST_BLOCKER_H_ + +#include <memory> + +#include "common_audio/channel_buffer.h" +#include "modules/audio_coding/codecs/opus/test/audio_ring_buffer.h" + +namespace webrtc { + +// The callback function to process audio in the time domain. Input has already +// been windowed, and output will be windowed. The number of input channels +// must be >= the number of output channels. +class BlockerCallback { + public: + virtual ~BlockerCallback() {} + + virtual void ProcessBlock(const float* const* input, + size_t num_frames, + size_t num_input_channels, + size_t num_output_channels, + float* const* output) = 0; +}; + +// The main purpose of Blocker is to abstract away the fact that often we +// receive a different number of audio frames than our transform takes. For +// example, most FFTs work best when the fft-size is a power of 2, but suppose +// we receive 20ms of audio at a sample rate of 48000. That comes to 960 frames +// of audio, which is not a power of 2. Blocker allows us to specify the +// transform and all other necessary processing via the Process() callback +// function without any constraints on the transform-size +// (read: `block_size_`) or received-audio-size (read: `chunk_size_`). +// We handle this for the multichannel audio case, allowing for different +// numbers of input and output channels (for example, beamforming takes 2 or +// more input channels and returns 1 output channel). Audio signals are +// represented as deinterleaved floats in the range [-1, 1]. +// +// Blocker is responsible for: +// - blocking audio while handling potential discontinuities on the edges +// of chunks +// - windowing blocks before sending them to Process() +// - windowing processed blocks, and overlap-adding them together before +// sending back a processed chunk +// +// To use blocker: +// 1. Impelment a BlockerCallback object `bc`. +// 2. Instantiate a Blocker object `b`, passing in `bc`. +// 3. As you receive audio, call b.ProcessChunk() to get processed audio. +// +// A small amount of delay is added to the first received chunk to deal with +// the difference in chunk/block sizes. This delay is <= chunk_size. +// +// Ownership of window is retained by the caller. That is, Blocker makes a +// copy of window and does not attempt to delete it. +class Blocker { + public: + Blocker(size_t chunk_size, + size_t block_size, + size_t num_input_channels, + size_t num_output_channels, + const float* window, + size_t shift_amount, + BlockerCallback* callback); + ~Blocker(); + + void ProcessChunk(const float* const* input, + size_t chunk_size, + size_t num_input_channels, + size_t num_output_channels, + float* const* output); + + size_t initial_delay() const { return initial_delay_; } + + private: + const size_t chunk_size_; + const size_t block_size_; + const size_t num_input_channels_; + const size_t num_output_channels_; + + // The number of frames of delay to add at the beginning of the first chunk. + const size_t initial_delay_; + + // The frame index into the input buffer where the first block should be read + // from. This is necessary because shift_amount_ is not necessarily a + // multiple of chunk_size_, so blocks won't line up at the start of the + // buffer. + size_t frame_offset_; + + // Since blocks nearly always overlap, there are certain blocks that require + // frames from the end of one chunk and the beginning of the next chunk. The + // input and output buffers are responsible for saving those frames between + // calls to ProcessChunk(). + // + // Both contain |initial delay| + `chunk_size` frames. The input is a fairly + // standard FIFO, but due to the overlap-add it's harder to use an + // AudioRingBuffer for the output. + AudioRingBuffer input_buffer_; + ChannelBuffer<float> output_buffer_; + + // Space for the input block (can't wrap because of windowing). + ChannelBuffer<float> input_block_; + + // Space for the output block (can't wrap because of overlap/add). + ChannelBuffer<float> output_block_; + + std::unique_ptr<float[]> window_; + + // The amount of frames between the start of contiguous blocks. For example, + // `shift_amount_` = `block_size_` / 2 for a Hann window. + size_t shift_amount_; + + BlockerCallback* callback_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_OPUS_TEST_BLOCKER_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/blocker_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/blocker_unittest.cc new file mode 100644 index 0000000000..9c8e789ba9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/blocker_unittest.cc @@ -0,0 +1,293 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/test/blocker.h" + +#include <memory> + +#include "rtc_base/arraysize.h" +#include "test/gtest.h" + +namespace { + +// Callback Function to add 3 to every sample in the signal. +class PlusThreeBlockerCallback : public webrtc::BlockerCallback { + public: + void ProcessBlock(const float* const* input, + size_t num_frames, + size_t num_input_channels, + size_t num_output_channels, + float* const* output) override { + for (size_t i = 0; i < num_output_channels; ++i) { + for (size_t j = 0; j < num_frames; ++j) { + output[i][j] = input[i][j] + 3; + } + } + } +}; + +// No-op Callback Function. +class CopyBlockerCallback : public webrtc::BlockerCallback { + public: + void ProcessBlock(const float* const* input, + size_t num_frames, + size_t num_input_channels, + size_t num_output_channels, + float* const* output) override { + for (size_t i = 0; i < num_output_channels; ++i) { + for (size_t j = 0; j < num_frames; ++j) { + output[i][j] = input[i][j]; + } + } + } +}; + +} // namespace + +namespace webrtc { + +// Tests blocking with a window that multiplies the signal by 2, a callback +// that adds 3 to each sample in the signal, and different combinations of chunk +// size, block size, and shift amount. +class BlockerTest : public ::testing::Test { + protected: + void RunTest(Blocker* blocker, + size_t chunk_size, + size_t num_frames, + const float* const* input, + float* const* input_chunk, + float* const* output, + float* const* output_chunk, + size_t num_input_channels, + size_t num_output_channels) { + size_t start = 0; + size_t end = chunk_size - 1; + while (end < num_frames) { + CopyTo(input_chunk, 0, start, num_input_channels, chunk_size, input); + blocker->ProcessChunk(input_chunk, chunk_size, num_input_channels, + num_output_channels, output_chunk); + CopyTo(output, start, 0, num_output_channels, chunk_size, output_chunk); + + start += chunk_size; + end += chunk_size; + } + } + + void ValidateSignalEquality(const float* const* expected, + const float* const* actual, + size_t num_channels, + size_t num_frames) { + for (size_t i = 0; i < num_channels; ++i) { + for (size_t j = 0; j < num_frames; ++j) { + EXPECT_FLOAT_EQ(expected[i][j], actual[i][j]); + } + } + } + + void ValidateInitialDelay(const float* const* output, + size_t num_channels, + size_t num_frames, + size_t initial_delay) { + for (size_t i = 0; i < num_channels; ++i) { + for (size_t j = 0; j < num_frames; ++j) { + if (j < initial_delay) { + EXPECT_FLOAT_EQ(output[i][j], 0.f); + } else { + EXPECT_GT(output[i][j], 0.f); + } + } + } + } + + static void CopyTo(float* const* dst, + size_t start_index_dst, + size_t start_index_src, + size_t num_channels, + size_t num_frames, + const float* const* src) { + for (size_t i = 0; i < num_channels; ++i) { + memcpy(&dst[i][start_index_dst], &src[i][start_index_src], + num_frames * sizeof(float)); + } + } +}; + +TEST_F(BlockerTest, TestBlockerMutuallyPrimeChunkandBlockSize) { + const size_t kNumInputChannels = 3; + const size_t kNumOutputChannels = 2; + const size_t kNumFrames = 10; + const size_t kBlockSize = 4; + const size_t kChunkSize = 5; + const size_t kShiftAmount = 2; + + const float kInput[kNumInputChannels][kNumFrames] = { + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, + {3, 3, 3, 3, 3, 3, 3, 3, 3, 3}}; + ChannelBuffer<float> input_cb(kNumFrames, kNumInputChannels); + input_cb.SetDataForTesting(kInput[0], sizeof(kInput) / sizeof(**kInput)); + + const float kExpectedOutput[kNumInputChannels][kNumFrames] = { + {6, 6, 12, 20, 20, 20, 20, 20, 20, 20}, + {6, 6, 12, 28, 28, 28, 28, 28, 28, 28}}; + ChannelBuffer<float> expected_output_cb(kNumFrames, kNumInputChannels); + expected_output_cb.SetDataForTesting( + kExpectedOutput[0], sizeof(kExpectedOutput) / sizeof(**kExpectedOutput)); + + const float kWindow[kBlockSize] = {2.f, 2.f, 2.f, 2.f}; + + ChannelBuffer<float> actual_output_cb(kNumFrames, kNumOutputChannels); + ChannelBuffer<float> input_chunk_cb(kChunkSize, kNumInputChannels); + ChannelBuffer<float> output_chunk_cb(kChunkSize, kNumOutputChannels); + + PlusThreeBlockerCallback callback; + Blocker blocker(kChunkSize, kBlockSize, kNumInputChannels, kNumOutputChannels, + kWindow, kShiftAmount, &callback); + + RunTest(&blocker, kChunkSize, kNumFrames, input_cb.channels(), + input_chunk_cb.channels(), actual_output_cb.channels(), + output_chunk_cb.channels(), kNumInputChannels, kNumOutputChannels); + + ValidateSignalEquality(expected_output_cb.channels(), + actual_output_cb.channels(), kNumOutputChannels, + kNumFrames); +} + +TEST_F(BlockerTest, TestBlockerMutuallyPrimeShiftAndBlockSize) { + const size_t kNumInputChannels = 3; + const size_t kNumOutputChannels = 2; + const size_t kNumFrames = 12; + const size_t kBlockSize = 4; + const size_t kChunkSize = 6; + const size_t kShiftAmount = 3; + + const float kInput[kNumInputChannels][kNumFrames] = { + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, + {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}}; + ChannelBuffer<float> input_cb(kNumFrames, kNumInputChannels); + input_cb.SetDataForTesting(kInput[0], sizeof(kInput) / sizeof(**kInput)); + + const float kExpectedOutput[kNumOutputChannels][kNumFrames] = { + {6, 10, 10, 20, 10, 10, 20, 10, 10, 20, 10, 10}, + {6, 14, 14, 28, 14, 14, 28, 14, 14, 28, 14, 14}}; + ChannelBuffer<float> expected_output_cb(kNumFrames, kNumOutputChannels); + expected_output_cb.SetDataForTesting( + kExpectedOutput[0], sizeof(kExpectedOutput) / sizeof(**kExpectedOutput)); + + const float kWindow[kBlockSize] = {2.f, 2.f, 2.f, 2.f}; + + ChannelBuffer<float> actual_output_cb(kNumFrames, kNumOutputChannels); + ChannelBuffer<float> input_chunk_cb(kChunkSize, kNumInputChannels); + ChannelBuffer<float> output_chunk_cb(kChunkSize, kNumOutputChannels); + + PlusThreeBlockerCallback callback; + Blocker blocker(kChunkSize, kBlockSize, kNumInputChannels, kNumOutputChannels, + kWindow, kShiftAmount, &callback); + + RunTest(&blocker, kChunkSize, kNumFrames, input_cb.channels(), + input_chunk_cb.channels(), actual_output_cb.channels(), + output_chunk_cb.channels(), kNumInputChannels, kNumOutputChannels); + + ValidateSignalEquality(expected_output_cb.channels(), + actual_output_cb.channels(), kNumOutputChannels, + kNumFrames); +} + +TEST_F(BlockerTest, TestBlockerNoOverlap) { + const size_t kNumInputChannels = 3; + const size_t kNumOutputChannels = 2; + const size_t kNumFrames = 12; + const size_t kBlockSize = 4; + const size_t kChunkSize = 4; + const size_t kShiftAmount = 4; + + const float kInput[kNumInputChannels][kNumFrames] = { + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, + {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}}; + ChannelBuffer<float> input_cb(kNumFrames, kNumInputChannels); + input_cb.SetDataForTesting(kInput[0], sizeof(kInput) / sizeof(**kInput)); + + const float kExpectedOutput[kNumOutputChannels][kNumFrames] = { + {10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10}, + {14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14}}; + ChannelBuffer<float> expected_output_cb(kNumFrames, kNumOutputChannels); + expected_output_cb.SetDataForTesting( + kExpectedOutput[0], sizeof(kExpectedOutput) / sizeof(**kExpectedOutput)); + + const float kWindow[kBlockSize] = {2.f, 2.f, 2.f, 2.f}; + + ChannelBuffer<float> actual_output_cb(kNumFrames, kNumOutputChannels); + ChannelBuffer<float> input_chunk_cb(kChunkSize, kNumInputChannels); + ChannelBuffer<float> output_chunk_cb(kChunkSize, kNumOutputChannels); + + PlusThreeBlockerCallback callback; + Blocker blocker(kChunkSize, kBlockSize, kNumInputChannels, kNumOutputChannels, + kWindow, kShiftAmount, &callback); + + RunTest(&blocker, kChunkSize, kNumFrames, input_cb.channels(), + input_chunk_cb.channels(), actual_output_cb.channels(), + output_chunk_cb.channels(), kNumInputChannels, kNumOutputChannels); + + ValidateSignalEquality(expected_output_cb.channels(), + actual_output_cb.channels(), kNumOutputChannels, + kNumFrames); +} + +TEST_F(BlockerTest, InitialDelaysAreMinimum) { + const size_t kNumInputChannels = 3; + const size_t kNumOutputChannels = 2; + const size_t kNumFrames = 1280; + const size_t kChunkSize[] = {80, 80, 80, 80, 80, 80, + 160, 160, 160, 160, 160, 160}; + const size_t kBlockSize[] = {64, 64, 64, 128, 128, 128, + 128, 128, 128, 256, 256, 256}; + const size_t kShiftAmount[] = {16, 32, 64, 32, 64, 128, + 32, 64, 128, 64, 128, 256}; + const size_t kInitialDelay[] = {48, 48, 48, 112, 112, 112, + 96, 96, 96, 224, 224, 224}; + + float input[kNumInputChannels][kNumFrames]; + for (size_t i = 0; i < kNumInputChannels; ++i) { + for (size_t j = 0; j < kNumFrames; ++j) { + input[i][j] = i + 1; + } + } + ChannelBuffer<float> input_cb(kNumFrames, kNumInputChannels); + input_cb.SetDataForTesting(input[0], sizeof(input) / sizeof(**input)); + + ChannelBuffer<float> output_cb(kNumFrames, kNumOutputChannels); + + CopyBlockerCallback callback; + + for (size_t i = 0; i < arraysize(kChunkSize); ++i) { + std::unique_ptr<float[]> window(new float[kBlockSize[i]]); + for (size_t j = 0; j < kBlockSize[i]; ++j) { + window[j] = 1.f; + } + + ChannelBuffer<float> input_chunk_cb(kChunkSize[i], kNumInputChannels); + ChannelBuffer<float> output_chunk_cb(kChunkSize[i], kNumOutputChannels); + + Blocker blocker(kChunkSize[i], kBlockSize[i], kNumInputChannels, + kNumOutputChannels, window.get(), kShiftAmount[i], + &callback); + + RunTest(&blocker, kChunkSize[i], kNumFrames, input_cb.channels(), + input_chunk_cb.channels(), output_cb.channels(), + output_chunk_cb.channels(), kNumInputChannels, kNumOutputChannels); + + ValidateInitialDelay(output_cb.channels(), kNumOutputChannels, kNumFrames, + kInitialDelay[i]); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/lapped_transform.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/lapped_transform.cc new file mode 100644 index 0000000000..b1a6526bba --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/lapped_transform.cc @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/test/lapped_transform.h" + +#include <algorithm> +#include <cstdlib> +#include <cstring> + +#include "common_audio/real_fourier.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +void LappedTransform::BlockThunk::ProcessBlock(const float* const* input, + size_t num_frames, + size_t num_input_channels, + size_t num_output_channels, + float* const* output) { + RTC_CHECK_EQ(num_input_channels, parent_->num_in_channels_); + RTC_CHECK_EQ(num_output_channels, parent_->num_out_channels_); + RTC_CHECK_EQ(parent_->block_length_, num_frames); + + for (size_t i = 0; i < num_input_channels; ++i) { + memcpy(parent_->real_buf_.Row(i), input[i], num_frames * sizeof(*input[0])); + parent_->fft_->Forward(parent_->real_buf_.Row(i), + parent_->cplx_pre_.Row(i)); + } + + size_t block_length = + RealFourier::ComplexLength(RealFourier::FftOrder(num_frames)); + RTC_CHECK_EQ(parent_->cplx_length_, block_length); + parent_->block_processor_->ProcessAudioBlock( + parent_->cplx_pre_.Array(), num_input_channels, parent_->cplx_length_, + num_output_channels, parent_->cplx_post_.Array()); + + for (size_t i = 0; i < num_output_channels; ++i) { + parent_->fft_->Inverse(parent_->cplx_post_.Row(i), + parent_->real_buf_.Row(i)); + memcpy(output[i], parent_->real_buf_.Row(i), + num_frames * sizeof(*input[0])); + } +} + +LappedTransform::LappedTransform(size_t num_in_channels, + size_t num_out_channels, + size_t chunk_length, + const float* window, + size_t block_length, + size_t shift_amount, + Callback* callback) + : blocker_callback_(this), + num_in_channels_(num_in_channels), + num_out_channels_(num_out_channels), + block_length_(block_length), + chunk_length_(chunk_length), + block_processor_(callback), + blocker_(chunk_length_, + block_length_, + num_in_channels_, + num_out_channels_, + window, + shift_amount, + &blocker_callback_), + fft_(RealFourier::Create(RealFourier::FftOrder(block_length_))), + cplx_length_(RealFourier::ComplexLength(fft_->order())), + real_buf_(num_in_channels, + block_length_, + RealFourier::kFftBufferAlignment), + cplx_pre_(num_in_channels, + cplx_length_, + RealFourier::kFftBufferAlignment), + cplx_post_(num_out_channels, + cplx_length_, + RealFourier::kFftBufferAlignment) { + RTC_CHECK(num_in_channels_ > 0); + RTC_CHECK_GT(block_length_, 0); + RTC_CHECK_GT(chunk_length_, 0); + RTC_CHECK(block_processor_); + + // block_length_ power of 2? + RTC_CHECK_EQ(0, block_length_ & (block_length_ - 1)); +} + +LappedTransform::~LappedTransform() = default; + +void LappedTransform::ProcessChunk(const float* const* in_chunk, + float* const* out_chunk) { + blocker_.ProcessChunk(in_chunk, chunk_length_, num_in_channels_, + num_out_channels_, out_chunk); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/lapped_transform.h b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/lapped_transform.h new file mode 100644 index 0000000000..bb25c34a9e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/lapped_transform.h @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_TEST_LAPPED_TRANSFORM_H_ +#define MODULES_AUDIO_CODING_CODECS_OPUS_TEST_LAPPED_TRANSFORM_H_ + +#include <complex> +#include <memory> + +#include "common_audio/real_fourier.h" +#include "modules/audio_coding/codecs/opus/test/blocker.h" +#include "rtc_base/memory/aligned_malloc.h" + +namespace webrtc { + +// Wrapper class for aligned arrays. Every row (and the first dimension) are +// aligned to the given byte alignment. +template <typename T> +class AlignedArray { + public: + AlignedArray(size_t rows, size_t cols, size_t alignment) + : rows_(rows), cols_(cols) { + RTC_CHECK_GT(alignment, 0); + head_row_ = + static_cast<T**>(AlignedMalloc(rows_ * sizeof(*head_row_), alignment)); + for (size_t i = 0; i < rows_; ++i) { + head_row_[i] = static_cast<T*>( + AlignedMalloc(cols_ * sizeof(**head_row_), alignment)); + } + } + + ~AlignedArray() { + for (size_t i = 0; i < rows_; ++i) { + AlignedFree(head_row_[i]); + } + AlignedFree(head_row_); + } + + T* const* Array() { return head_row_; } + + const T* const* Array() const { return head_row_; } + + T* Row(size_t row) { + RTC_CHECK_LE(row, rows_); + return head_row_[row]; + } + + const T* Row(size_t row) const { + RTC_CHECK_LE(row, rows_); + return head_row_[row]; + } + + private: + size_t rows_; + size_t cols_; + T** head_row_; +}; + +// Helper class for audio processing modules which operate on frequency domain +// input derived from the windowed time domain audio stream. +// +// The input audio chunk is sliced into possibly overlapping blocks, multiplied +// by a window and transformed with an FFT implementation. The transformed data +// is supplied to the given callback for processing. The processed output is +// then inverse transformed into the time domain and spliced back into a chunk +// which constitutes the final output of this processing module. +class LappedTransform { + public: + class Callback { + public: + virtual ~Callback() {} + + virtual void ProcessAudioBlock(const std::complex<float>* const* in_block, + size_t num_in_channels, + size_t frames, + size_t num_out_channels, + std::complex<float>* const* out_block) = 0; + }; + + // Construct a transform instance. `chunk_length` is the number of samples in + // each channel. `window` defines the window, owned by the caller (a copy is + // made internally); `window` should have length equal to `block_length`. + // `block_length` defines the length of a block, in samples. + // `shift_amount` is in samples. `callback` is the caller-owned audio + // processing function called for each block of the input chunk. + LappedTransform(size_t num_in_channels, + size_t num_out_channels, + size_t chunk_length, + const float* window, + size_t block_length, + size_t shift_amount, + Callback* callback); + ~LappedTransform(); + + // Main audio processing helper method. Internally slices `in_chunk` into + // blocks, transforms them to frequency domain, calls the callback for each + // block and returns a de-blocked time domain chunk of audio through + // `out_chunk`. Both buffers are caller-owned. + void ProcessChunk(const float* const* in_chunk, float* const* out_chunk); + + // Get the chunk length. + // + // The chunk length is the number of samples per channel that must be passed + // to ProcessChunk via the parameter in_chunk. + // + // Returns the same chunk_length passed to the LappedTransform constructor. + size_t chunk_length() const { return chunk_length_; } + + // Get the number of input channels. + // + // This is the number of arrays that must be passed to ProcessChunk via + // in_chunk. + // + // Returns the same num_in_channels passed to the LappedTransform constructor. + size_t num_in_channels() const { return num_in_channels_; } + + // Get the number of output channels. + // + // This is the number of arrays that must be passed to ProcessChunk via + // out_chunk. + // + // Returns the same num_out_channels passed to the LappedTransform + // constructor. + size_t num_out_channels() const { return num_out_channels_; } + + // Returns the initial delay. + // + // This is the delay introduced by the `blocker_` to be able to get and return + // chunks of `chunk_length`, but process blocks of `block_length`. + size_t initial_delay() const { return blocker_.initial_delay(); } + + private: + // Internal middleware callback, given to the blocker. Transforms each block + // and hands it over to the processing method given at construction time. + class BlockThunk : public BlockerCallback { + public: + explicit BlockThunk(LappedTransform* parent) : parent_(parent) {} + + void ProcessBlock(const float* const* input, + size_t num_frames, + size_t num_input_channels, + size_t num_output_channels, + float* const* output) override; + + private: + LappedTransform* const parent_; + } blocker_callback_; + + const size_t num_in_channels_; + const size_t num_out_channels_; + + const size_t block_length_; + const size_t chunk_length_; + + Callback* const block_processor_; + Blocker blocker_; + + // TODO(alessiob): Replace RealFourier with a different FFT library. + std::unique_ptr<RealFourier> fft_; + const size_t cplx_length_; + AlignedArray<float> real_buf_; + AlignedArray<std::complex<float> > cplx_pre_; + AlignedArray<std::complex<float> > cplx_post_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_OPUS_TEST_LAPPED_TRANSFORM_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/lapped_transform_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/lapped_transform_unittest.cc new file mode 100644 index 0000000000..1003ed52e5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/opus/test/lapped_transform_unittest.cc @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/test/lapped_transform.h" + +#include <algorithm> +#include <cmath> +#include <cstring> + +#include "test/gtest.h" + +using std::complex; + +namespace { + +class NoopCallback : public webrtc::LappedTransform::Callback { + public: + NoopCallback() : block_num_(0) {} + + void ProcessAudioBlock(const complex<float>* const* in_block, + size_t in_channels, + size_t frames, + size_t out_channels, + complex<float>* const* out_block) override { + RTC_CHECK_EQ(in_channels, out_channels); + for (size_t i = 0; i < out_channels; ++i) { + memcpy(out_block[i], in_block[i], sizeof(**in_block) * frames); + } + ++block_num_; + } + + size_t block_num() { return block_num_; } + + private: + size_t block_num_; +}; + +class FftCheckerCallback : public webrtc::LappedTransform::Callback { + public: + FftCheckerCallback() : block_num_(0) {} + + void ProcessAudioBlock(const complex<float>* const* in_block, + size_t in_channels, + size_t frames, + size_t out_channels, + complex<float>* const* out_block) override { + RTC_CHECK_EQ(in_channels, out_channels); + + size_t full_length = (frames - 1) * 2; + ++block_num_; + + if (block_num_ > 0) { + ASSERT_NEAR(in_block[0][0].real(), static_cast<float>(full_length), + 1e-5f); + ASSERT_NEAR(in_block[0][0].imag(), 0.0f, 1e-5f); + for (size_t i = 1; i < frames; ++i) { + ASSERT_NEAR(in_block[0][i].real(), 0.0f, 1e-5f); + ASSERT_NEAR(in_block[0][i].imag(), 0.0f, 1e-5f); + } + } + } + + size_t block_num() { return block_num_; } + + private: + size_t block_num_; +}; + +void SetFloatArray(float value, int rows, int cols, float* const* array) { + for (int i = 0; i < rows; ++i) { + for (int j = 0; j < cols; ++j) { + array[i][j] = value; + } + } +} + +} // namespace + +namespace webrtc { + +TEST(LappedTransformTest, Windowless) { + const size_t kChannels = 3; + const size_t kChunkLength = 512; + const size_t kBlockLength = 64; + const size_t kShiftAmount = 64; + NoopCallback noop; + + // Rectangular window. + float window[kBlockLength]; + std::fill(window, &window[kBlockLength], 1.0f); + + LappedTransform trans(kChannels, kChannels, kChunkLength, window, + kBlockLength, kShiftAmount, &noop); + float in_buffer[kChannels][kChunkLength]; + float* in_chunk[kChannels]; + float out_buffer[kChannels][kChunkLength]; + float* out_chunk[kChannels]; + + in_chunk[0] = in_buffer[0]; + in_chunk[1] = in_buffer[1]; + in_chunk[2] = in_buffer[2]; + out_chunk[0] = out_buffer[0]; + out_chunk[1] = out_buffer[1]; + out_chunk[2] = out_buffer[2]; + SetFloatArray(2.0f, kChannels, kChunkLength, in_chunk); + SetFloatArray(-1.0f, kChannels, kChunkLength, out_chunk); + + trans.ProcessChunk(in_chunk, out_chunk); + + for (size_t i = 0; i < kChannels; ++i) { + for (size_t j = 0; j < kChunkLength; ++j) { + ASSERT_NEAR(out_chunk[i][j], 2.0f, 1e-5f); + } + } + + ASSERT_EQ(kChunkLength / kBlockLength, noop.block_num()); +} + +TEST(LappedTransformTest, IdentityProcessor) { + const size_t kChunkLength = 512; + const size_t kBlockLength = 64; + const size_t kShiftAmount = 32; + NoopCallback noop; + + // Identity window for |overlap = block_size / 2|. + float window[kBlockLength]; + std::fill(window, &window[kBlockLength], std::sqrt(0.5f)); + + LappedTransform trans(1, 1, kChunkLength, window, kBlockLength, kShiftAmount, + &noop); + float in_buffer[kChunkLength]; + float* in_chunk = in_buffer; + float out_buffer[kChunkLength]; + float* out_chunk = out_buffer; + + SetFloatArray(2.0f, 1, kChunkLength, &in_chunk); + SetFloatArray(-1.0f, 1, kChunkLength, &out_chunk); + + trans.ProcessChunk(&in_chunk, &out_chunk); + + for (size_t i = 0; i < kChunkLength; ++i) { + ASSERT_NEAR(out_chunk[i], (i < kBlockLength - kShiftAmount) ? 0.0f : 2.0f, + 1e-5f); + } + + ASSERT_EQ(kChunkLength / kShiftAmount, noop.block_num()); +} + +TEST(LappedTransformTest, Callbacks) { + const size_t kChunkLength = 512; + const size_t kBlockLength = 64; + FftCheckerCallback call; + + // Rectangular window. + float window[kBlockLength]; + std::fill(window, &window[kBlockLength], 1.0f); + + LappedTransform trans(1, 1, kChunkLength, window, kBlockLength, kBlockLength, + &call); + float in_buffer[kChunkLength]; + float* in_chunk = in_buffer; + float out_buffer[kChunkLength]; + float* out_chunk = out_buffer; + + SetFloatArray(1.0f, 1, kChunkLength, &in_chunk); + SetFloatArray(-1.0f, 1, kChunkLength, &out_chunk); + + trans.ProcessChunk(&in_chunk, &out_chunk); + + ASSERT_EQ(kChunkLength / kBlockLength, call.block_num()); +} + +TEST(LappedTransformTest, chunk_length) { + const size_t kBlockLength = 64; + FftCheckerCallback call; + const float window[kBlockLength] = {}; + + // Make sure that chunk_length returns the same value passed to the + // LappedTransform constructor. + { + const size_t kExpectedChunkLength = 512; + const LappedTransform trans(1, 1, kExpectedChunkLength, window, + kBlockLength, kBlockLength, &call); + + EXPECT_EQ(kExpectedChunkLength, trans.chunk_length()); + } + { + const size_t kExpectedChunkLength = 160; + const LappedTransform trans(1, 1, kExpectedChunkLength, window, + kBlockLength, kBlockLength, &call); + + EXPECT_EQ(kExpectedChunkLength, trans.chunk_length()); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.cc b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.cc new file mode 100644 index 0000000000..7761efe8b3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.cc @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h" + +#include <utility> + +#include "modules/audio_coding/codecs/legacy_encoded_audio_frame.h" +#include "modules/audio_coding/codecs/pcm16b/pcm16b.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +AudioDecoderPcm16B::AudioDecoderPcm16B(int sample_rate_hz, size_t num_channels) + : sample_rate_hz_(sample_rate_hz), num_channels_(num_channels) { + RTC_DCHECK(sample_rate_hz == 8000 || sample_rate_hz == 16000 || + sample_rate_hz == 32000 || sample_rate_hz == 48000) + << "Unsupported sample rate " << sample_rate_hz; + RTC_DCHECK_GE(num_channels, 1); +} + +void AudioDecoderPcm16B::Reset() {} + +int AudioDecoderPcm16B::SampleRateHz() const { + return sample_rate_hz_; +} + +size_t AudioDecoderPcm16B::Channels() const { + return num_channels_; +} + +int AudioDecoderPcm16B::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { + RTC_DCHECK_EQ(sample_rate_hz_, sample_rate_hz); + // Adjust the encoded length down to ensure the same number of samples in each + // channel. + const size_t encoded_len_adjusted = + PacketDuration(encoded, encoded_len) * 2 * + Channels(); // 2 bytes per sample per channel + size_t ret = WebRtcPcm16b_Decode(encoded, encoded_len_adjusted, decoded); + *speech_type = ConvertSpeechType(1); + return static_cast<int>(ret); +} + +std::vector<AudioDecoder::ParseResult> AudioDecoderPcm16B::ParsePayload( + rtc::Buffer&& payload, + uint32_t timestamp) { + const int samples_per_ms = rtc::CheckedDivExact(sample_rate_hz_, 1000); + return LegacyEncodedAudioFrame::SplitBySamples( + this, std::move(payload), timestamp, samples_per_ms * 2 * num_channels_, + samples_per_ms); +} + +int AudioDecoderPcm16B::PacketDuration(const uint8_t* encoded, + size_t encoded_len) const { + // Two encoded byte per sample per channel. + return static_cast<int>(encoded_len / (2 * Channels())); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h new file mode 100644 index 0000000000..6f50161d3f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_PCM16B_AUDIO_DECODER_PCM16B_H_ +#define MODULES_AUDIO_CODING_CODECS_PCM16B_AUDIO_DECODER_PCM16B_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <vector> + +#include "api/audio_codecs/audio_decoder.h" +#include "rtc_base/buffer.h" + +namespace webrtc { + +class AudioDecoderPcm16B final : public AudioDecoder { + public: + AudioDecoderPcm16B(int sample_rate_hz, size_t num_channels); + + AudioDecoderPcm16B(const AudioDecoderPcm16B&) = delete; + AudioDecoderPcm16B& operator=(const AudioDecoderPcm16B&) = delete; + + void Reset() override; + std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload, + uint32_t timestamp) override; + int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override; + int SampleRateHz() const override; + size_t Channels() const override; + + protected: + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + + private: + const int sample_rate_hz_; + const size_t num_channels_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_PCM16B_AUDIO_DECODER_PCM16B_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.cc b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.cc new file mode 100644 index 0000000000..9445b1ee3e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.h" + +#include "modules/audio_coding/codecs/pcm16b/pcm16b.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +size_t AudioEncoderPcm16B::EncodeCall(const int16_t* audio, + size_t input_len, + uint8_t* encoded) { + return WebRtcPcm16b_Encode(audio, input_len, encoded); +} + +size_t AudioEncoderPcm16B::BytesPerSample() const { + return 2; +} + +AudioEncoder::CodecType AudioEncoderPcm16B::GetCodecType() const { + return CodecType::kOther; +} + +bool AudioEncoderPcm16B::Config::IsOk() const { + if ((sample_rate_hz != 8000) && (sample_rate_hz != 16000) && + (sample_rate_hz != 32000) && (sample_rate_hz != 48000)) + return false; + return AudioEncoderPcm::Config::IsOk(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.h b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.h new file mode 100644 index 0000000000..c363b40b3f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_PCM16B_AUDIO_ENCODER_PCM16B_H_ +#define MODULES_AUDIO_CODING_CODECS_PCM16B_AUDIO_ENCODER_PCM16B_H_ + +#include "modules/audio_coding/codecs/g711/audio_encoder_pcm.h" + +namespace webrtc { + +class AudioEncoderPcm16B final : public AudioEncoderPcm { + public: + struct Config : public AudioEncoderPcm::Config { + public: + Config() : AudioEncoderPcm::Config(107), sample_rate_hz(8000) {} + bool IsOk() const; + + int sample_rate_hz; + }; + + explicit AudioEncoderPcm16B(const Config& config) + : AudioEncoderPcm(config, config.sample_rate_hz) {} + + AudioEncoderPcm16B(const AudioEncoderPcm16B&) = delete; + AudioEncoderPcm16B& operator=(const AudioEncoderPcm16B&) = delete; + + protected: + size_t EncodeCall(const int16_t* audio, + size_t input_len, + uint8_t* encoded) override; + + size_t BytesPerSample() const override; + + AudioEncoder::CodecType GetCodecType() const override; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_PCM16B_AUDIO_ENCODER_PCM16B_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b.c b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b.c new file mode 100644 index 0000000000..2f6dce5f41 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b.c @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/pcm16b/pcm16b.h" + +size_t WebRtcPcm16b_Encode(const int16_t* speech, + size_t len, + uint8_t* encoded) { + size_t i; + for (i = 0; i < len; ++i) { + uint16_t s = speech[i]; + encoded[2 * i] = s >> 8; + encoded[2 * i + 1] = s; + } + return 2 * len; +} + +size_t WebRtcPcm16b_Decode(const uint8_t* encoded, + size_t len, + int16_t* speech) { + size_t i; + for (i = 0; i < len / 2; ++i) + speech[i] = encoded[2 * i] << 8 | encoded[2 * i + 1]; + return len / 2; +} diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b.h b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b.h new file mode 100644 index 0000000000..75d1efda3b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_PCM16B_PCM16B_H_ +#define MODULES_AUDIO_CODING_CODECS_PCM16B_PCM16B_H_ +/* + * Define the fixpoint numeric formats + */ + +#include <stddef.h> +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/**************************************************************************** + * WebRtcPcm16b_Encode(...) + * + * "Encode" a sample vector to 16 bit linear (Encoded standard is big endian) + * + * Input: + * - speech : Input speech vector + * - len : Number of samples in speech vector + * + * Output: + * - encoded : Encoded data vector (big endian 16 bit) + * + * Returned value : Length (in bytes) of coded data. + * Always equal to twice the len input parameter. + */ + +size_t WebRtcPcm16b_Encode(const int16_t* speech, size_t len, uint8_t* encoded); + +/**************************************************************************** + * WebRtcPcm16b_Decode(...) + * + * "Decode" a vector to 16 bit linear (Encoded standard is big endian) + * + * Input: + * - encoded : Encoded data vector (big endian 16 bit) + * - len : Number of bytes in encoded + * + * Output: + * - speech : Decoded speech vector + * + * Returned value : Samples in speech + */ + +size_t WebRtcPcm16b_Decode(const uint8_t* encoded, size_t len, int16_t* speech); + +#ifdef __cplusplus +} +#endif + +#endif /* MODULES_AUDIO_CODING_CODECS_PCM16B_PCM16B_H_ */ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b_common.cc b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b_common.cc new file mode 100644 index 0000000000..ecf91b45ac --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b_common.cc @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/pcm16b/pcm16b_common.h" + +#include <stdint.h> + +#include <initializer_list> + +namespace webrtc { + +void Pcm16BAppendSupportedCodecSpecs(std::vector<AudioCodecSpec>* specs) { + for (uint8_t num_channels : {1, 2}) { + for (int sample_rate_hz : {8000, 16000, 32000}) { + specs->push_back( + {{"L16", sample_rate_hz, num_channels}, + {sample_rate_hz, num_channels, sample_rate_hz * num_channels * 16}}); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b_common.h b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b_common.h new file mode 100644 index 0000000000..3fae717ff3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/pcm16b/pcm16b_common.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_PCM16B_PCM16B_COMMON_H_ +#define MODULES_AUDIO_CODING_CODECS_PCM16B_PCM16B_COMMON_H_ + +#include <vector> + +#include "api/audio_codecs/audio_format.h" + +namespace webrtc { +void Pcm16BAppendSupportedCodecSpecs(std::vector<AudioCodecSpec>* specs); +} + +#endif // MODULES_AUDIO_CODING_CODECS_PCM16B_PCM16B_COMMON_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/red/audio_encoder_copy_red.cc b/third_party/libwebrtc/modules/audio_coding/codecs/red/audio_encoder_copy_red.cc new file mode 100644 index 0000000000..724bba52d6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/red/audio_encoder_copy_red.cc @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/red/audio_encoder_copy_red.h" + +#include <string.h> + +#include <utility> +#include <vector> + +#include "absl/strings/string_view.h" +#include "rtc_base/byte_order.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { +static constexpr const int kRedMaxPacketSize = + 1 << 10; // RED packets must be less than 1024 bytes to fit the 10 bit + // block length. +static constexpr const size_t kRedMaxTimestampDelta = + 1 << 14; // RED packets can encode a timestamp delta of 14 bits. +static constexpr const size_t kAudioMaxRtpPacketLen = + 1200; // The typical MTU is 1200 bytes. + +static constexpr size_t kRedHeaderLength = 4; // 4 bytes RED header. +static constexpr size_t kRedLastHeaderLength = + 1; // reduced size for last RED header. + +static constexpr size_t kRedNumberOfRedundantEncodings = + 1; // The level of redundancy we support. + +AudioEncoderCopyRed::Config::Config() = default; +AudioEncoderCopyRed::Config::Config(Config&&) = default; +AudioEncoderCopyRed::Config::~Config() = default; + +size_t GetMaxRedundancyFromFieldTrial(const FieldTrialsView& field_trials) { + const std::string red_trial = + field_trials.Lookup("WebRTC-Audio-Red-For-Opus"); + size_t redundancy = 0; + if (sscanf(red_trial.c_str(), "Enabled-%zu", &redundancy) != 1 || + redundancy > 9) { + return kRedNumberOfRedundantEncodings; + } + return redundancy; +} + +AudioEncoderCopyRed::AudioEncoderCopyRed(Config&& config, + const FieldTrialsView& field_trials) + : speech_encoder_(std::move(config.speech_encoder)), + primary_encoded_(0, kAudioMaxRtpPacketLen), + max_packet_length_(kAudioMaxRtpPacketLen), + red_payload_type_(config.payload_type) { + RTC_CHECK(speech_encoder_) << "Speech encoder not provided."; + + auto number_of_redundant_encodings = + GetMaxRedundancyFromFieldTrial(field_trials); + for (size_t i = 0; i < number_of_redundant_encodings; i++) { + std::pair<EncodedInfo, rtc::Buffer> redundant; + redundant.second.EnsureCapacity(kAudioMaxRtpPacketLen); + redundant_encodings_.push_front(std::move(redundant)); + } +} + +AudioEncoderCopyRed::~AudioEncoderCopyRed() = default; + +int AudioEncoderCopyRed::SampleRateHz() const { + return speech_encoder_->SampleRateHz(); +} + +size_t AudioEncoderCopyRed::NumChannels() const { + return speech_encoder_->NumChannels(); +} + +int AudioEncoderCopyRed::RtpTimestampRateHz() const { + return speech_encoder_->RtpTimestampRateHz(); +} + +size_t AudioEncoderCopyRed::Num10MsFramesInNextPacket() const { + return speech_encoder_->Num10MsFramesInNextPacket(); +} + +size_t AudioEncoderCopyRed::Max10MsFramesInAPacket() const { + return speech_encoder_->Max10MsFramesInAPacket(); +} + +int AudioEncoderCopyRed::GetTargetBitrate() const { + return speech_encoder_->GetTargetBitrate(); +} + +AudioEncoder::EncodedInfo AudioEncoderCopyRed::EncodeImpl( + uint32_t rtp_timestamp, + rtc::ArrayView<const int16_t> audio, + rtc::Buffer* encoded) { + primary_encoded_.Clear(); + EncodedInfo info = + speech_encoder_->Encode(rtp_timestamp, audio, &primary_encoded_); + RTC_CHECK(info.redundant.empty()) << "Cannot use nested redundant encoders."; + RTC_DCHECK_EQ(primary_encoded_.size(), info.encoded_bytes); + + if (info.encoded_bytes == 0 || info.encoded_bytes >= kRedMaxPacketSize) { + return info; + } + RTC_DCHECK_GT(max_packet_length_, info.encoded_bytes); + + size_t header_length_bytes = kRedLastHeaderLength; + size_t bytes_available = max_packet_length_ - info.encoded_bytes; + auto it = redundant_encodings_.begin(); + + // Determine how much redundancy we can fit into our packet by + // iterating forward. This is determined both by the length as well + // as the timestamp difference. The latter can occur with opus DTX which + // has timestamp gaps of 400ms which exceeds REDs timestamp delta field size. + for (; it != redundant_encodings_.end(); it++) { + if (bytes_available < kRedHeaderLength + it->first.encoded_bytes) { + break; + } + if (it->first.encoded_bytes == 0) { + break; + } + if (rtp_timestamp - it->first.encoded_timestamp >= kRedMaxTimestampDelta) { + break; + } + bytes_available -= kRedHeaderLength + it->first.encoded_bytes; + header_length_bytes += kRedHeaderLength; + } + + // Allocate room for RFC 2198 header. + encoded->SetSize(header_length_bytes); + + // Iterate backwards and append the data. + size_t header_offset = 0; + while (it-- != redundant_encodings_.begin()) { + encoded->AppendData(it->second); + + const uint32_t timestamp_delta = + info.encoded_timestamp - it->first.encoded_timestamp; + encoded->data()[header_offset] = it->first.payload_type | 0x80; + rtc::SetBE16(static_cast<uint8_t*>(encoded->data()) + header_offset + 1, + (timestamp_delta << 2) | (it->first.encoded_bytes >> 8)); + encoded->data()[header_offset + 3] = it->first.encoded_bytes & 0xff; + header_offset += kRedHeaderLength; + info.redundant.push_back(it->first); + } + + // `info` will be implicitly cast to an EncodedInfoLeaf struct, effectively + // discarding the (empty) vector of redundant information. This is + // intentional. + if (header_length_bytes > kRedHeaderLength) { + info.redundant.push_back(info); + RTC_DCHECK_EQ(info.speech, + info.redundant[info.redundant.size() - 1].speech); + } + + encoded->AppendData(primary_encoded_); + RTC_DCHECK_EQ(header_offset, header_length_bytes - 1); + encoded->data()[header_offset] = info.payload_type; + + // Shift the redundant encodings. + auto rit = redundant_encodings_.rbegin(); + for (auto next = std::next(rit); next != redundant_encodings_.rend(); + rit++, next = std::next(rit)) { + rit->first = next->first; + rit->second.SetData(next->second); + } + it = redundant_encodings_.begin(); + if (it != redundant_encodings_.end()) { + it->first = info; + it->second.SetData(primary_encoded_); + } + + // Update main EncodedInfo. + info.payload_type = red_payload_type_; + info.encoded_bytes = encoded->size(); + return info; +} + +void AudioEncoderCopyRed::Reset() { + speech_encoder_->Reset(); + auto number_of_redundant_encodings = redundant_encodings_.size(); + redundant_encodings_.clear(); + for (size_t i = 0; i < number_of_redundant_encodings; i++) { + std::pair<EncodedInfo, rtc::Buffer> redundant; + redundant.second.EnsureCapacity(kAudioMaxRtpPacketLen); + redundant_encodings_.push_front(std::move(redundant)); + } +} + +bool AudioEncoderCopyRed::SetFec(bool enable) { + return speech_encoder_->SetFec(enable); +} + +bool AudioEncoderCopyRed::SetDtx(bool enable) { + return speech_encoder_->SetDtx(enable); +} + +bool AudioEncoderCopyRed::GetDtx() const { + return speech_encoder_->GetDtx(); +} + +bool AudioEncoderCopyRed::SetApplication(Application application) { + return speech_encoder_->SetApplication(application); +} + +void AudioEncoderCopyRed::SetMaxPlaybackRate(int frequency_hz) { + speech_encoder_->SetMaxPlaybackRate(frequency_hz); +} + +bool AudioEncoderCopyRed::EnableAudioNetworkAdaptor( + const std::string& config_string, + RtcEventLog* event_log) { + return speech_encoder_->EnableAudioNetworkAdaptor(config_string, event_log); +} + +void AudioEncoderCopyRed::DisableAudioNetworkAdaptor() { + speech_encoder_->DisableAudioNetworkAdaptor(); +} + +void AudioEncoderCopyRed::OnReceivedUplinkPacketLossFraction( + float uplink_packet_loss_fraction) { + speech_encoder_->OnReceivedUplinkPacketLossFraction( + uplink_packet_loss_fraction); +} + +void AudioEncoderCopyRed::OnReceivedUplinkBandwidth( + int target_audio_bitrate_bps, + absl::optional<int64_t> bwe_period_ms) { + speech_encoder_->OnReceivedUplinkBandwidth(target_audio_bitrate_bps, + bwe_period_ms); +} + +void AudioEncoderCopyRed::OnReceivedUplinkAllocation( + BitrateAllocationUpdate update) { + speech_encoder_->OnReceivedUplinkAllocation(update); +} + +absl::optional<std::pair<TimeDelta, TimeDelta>> +AudioEncoderCopyRed::GetFrameLengthRange() const { + return speech_encoder_->GetFrameLengthRange(); +} + +void AudioEncoderCopyRed::OnReceivedRtt(int rtt_ms) { + speech_encoder_->OnReceivedRtt(rtt_ms); +} + +void AudioEncoderCopyRed::OnReceivedOverhead(size_t overhead_bytes_per_packet) { + max_packet_length_ = kAudioMaxRtpPacketLen - overhead_bytes_per_packet; + return speech_encoder_->OnReceivedOverhead(overhead_bytes_per_packet); +} + +void AudioEncoderCopyRed::SetReceiverFrameLengthRange(int min_frame_length_ms, + int max_frame_length_ms) { + return speech_encoder_->SetReceiverFrameLengthRange(min_frame_length_ms, + max_frame_length_ms); +} + +ANAStats AudioEncoderCopyRed::GetANAStats() const { + return speech_encoder_->GetANAStats(); +} + +rtc::ArrayView<std::unique_ptr<AudioEncoder>> +AudioEncoderCopyRed::ReclaimContainedEncoders() { + return rtc::ArrayView<std::unique_ptr<AudioEncoder>>(&speech_encoder_, 1); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/red/audio_encoder_copy_red.h b/third_party/libwebrtc/modules/audio_coding/codecs/red/audio_encoder_copy_red.h new file mode 100644 index 0000000000..359b5eaa17 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/red/audio_encoder_copy_red.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_RED_AUDIO_ENCODER_COPY_RED_H_ +#define MODULES_AUDIO_CODING_CODECS_RED_AUDIO_ENCODER_COPY_RED_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <list> +#include <memory> +#include <utility> + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio_codecs/audio_encoder.h" +#include "api/field_trials_view.h" +#include "api/units/time_delta.h" +#include "rtc_base/buffer.h" + +namespace webrtc { + +// This class implements redundant audio coding as described in +// https://tools.ietf.org/html/rfc2198 +// The class object will have an underlying AudioEncoder object that performs +// the actual encodings. The current class will gather the N latest encodings +// from the underlying codec into one packet. Currently N is hard-coded to 2. + +class AudioEncoderCopyRed final : public AudioEncoder { + public: + struct Config { + Config(); + Config(Config&&); + ~Config(); + int payload_type; + std::unique_ptr<AudioEncoder> speech_encoder; + }; + + AudioEncoderCopyRed(Config&& config, const FieldTrialsView& field_trials); + + ~AudioEncoderCopyRed() override; + + AudioEncoderCopyRed(const AudioEncoderCopyRed&) = delete; + AudioEncoderCopyRed& operator=(const AudioEncoderCopyRed&) = delete; + + int SampleRateHz() const override; + size_t NumChannels() const override; + int RtpTimestampRateHz() const override; + size_t Num10MsFramesInNextPacket() const override; + size_t Max10MsFramesInAPacket() const override; + int GetTargetBitrate() const override; + + void Reset() override; + bool SetFec(bool enable) override; + + bool SetDtx(bool enable) override; + bool GetDtx() const override; + + bool SetApplication(Application application) override; + void SetMaxPlaybackRate(int frequency_hz) override; + bool EnableAudioNetworkAdaptor(const std::string& config_string, + RtcEventLog* event_log) override; + void DisableAudioNetworkAdaptor() override; + void OnReceivedUplinkPacketLossFraction( + float uplink_packet_loss_fraction) override; + void OnReceivedUplinkBandwidth( + int target_audio_bitrate_bps, + absl::optional<int64_t> bwe_period_ms) override; + void OnReceivedUplinkAllocation(BitrateAllocationUpdate update) override; + void OnReceivedRtt(int rtt_ms) override; + void OnReceivedOverhead(size_t overhead_bytes_per_packet) override; + void SetReceiverFrameLengthRange(int min_frame_length_ms, + int max_frame_length_ms) override; + ANAStats GetANAStats() const override; + absl::optional<std::pair<TimeDelta, TimeDelta>> GetFrameLengthRange() + const override; + rtc::ArrayView<std::unique_ptr<AudioEncoder>> ReclaimContainedEncoders() + override; + + protected: + EncodedInfo EncodeImpl(uint32_t rtp_timestamp, + rtc::ArrayView<const int16_t> audio, + rtc::Buffer* encoded) override; + + private: + std::unique_ptr<AudioEncoder> speech_encoder_; + rtc::Buffer primary_encoded_; + size_t max_packet_length_; + int red_payload_type_; + std::list<std::pair<EncodedInfo, rtc::Buffer>> redundant_encodings_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_RED_AUDIO_ENCODER_COPY_RED_H_ diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/red/audio_encoder_copy_red_unittest.cc b/third_party/libwebrtc/modules/audio_coding/codecs/red/audio_encoder_copy_red_unittest.cc new file mode 100644 index 0000000000..795a996624 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/red/audio_encoder_copy_red_unittest.cc @@ -0,0 +1,641 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/red/audio_encoder_copy_red.h" + +#include <memory> +#include <vector> + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/field_trial.h" +#include "test/gtest.h" +#include "test/mock_audio_encoder.h" +#include "test/scoped_key_value_config.h" +#include "test/testsupport/rtc_expect_death.h" + +using ::testing::_; +using ::testing::Eq; +using ::testing::InSequence; +using ::testing::Invoke; +using ::testing::MockFunction; +using ::testing::Not; +using ::testing::Optional; +using ::testing::Return; +using ::testing::SetArgPointee; + +namespace webrtc { + +namespace { +static const size_t kMaxNumSamples = 48 * 10 * 2; // 10 ms @ 48 kHz stereo. +static const size_t kRedLastHeaderLength = + 1; // 1 byte RED header for the last element. +} + +class AudioEncoderCopyRedTest : public ::testing::Test { + protected: + AudioEncoderCopyRedTest() + : mock_encoder_(new MockAudioEncoder), + timestamp_(4711), + sample_rate_hz_(16000), + num_audio_samples_10ms(sample_rate_hz_ / 100), + red_payload_type_(200) { + AudioEncoderCopyRed::Config config; + config.payload_type = red_payload_type_; + config.speech_encoder = std::unique_ptr<AudioEncoder>(mock_encoder_); + red_.reset(new AudioEncoderCopyRed(std::move(config), field_trials_)); + memset(audio_, 0, sizeof(audio_)); + EXPECT_CALL(*mock_encoder_, NumChannels()).WillRepeatedly(Return(1U)); + EXPECT_CALL(*mock_encoder_, SampleRateHz()) + .WillRepeatedly(Return(sample_rate_hz_)); + } + + void TearDown() override { red_.reset(); } + + void Encode() { + ASSERT_TRUE(red_.get() != NULL); + encoded_.Clear(); + encoded_info_ = red_->Encode( + timestamp_, + rtc::ArrayView<const int16_t>(audio_, num_audio_samples_10ms), + &encoded_); + timestamp_ += rtc::checked_cast<uint32_t>(num_audio_samples_10ms); + } + + test::ScopedKeyValueConfig field_trials_; + MockAudioEncoder* mock_encoder_; + std::unique_ptr<AudioEncoderCopyRed> red_; + uint32_t timestamp_; + int16_t audio_[kMaxNumSamples]; + const int sample_rate_hz_; + size_t num_audio_samples_10ms; + rtc::Buffer encoded_; + AudioEncoder::EncodedInfo encoded_info_; + const int red_payload_type_; +}; + +TEST_F(AudioEncoderCopyRedTest, CreateAndDestroy) {} + +TEST_F(AudioEncoderCopyRedTest, CheckSampleRatePropagation) { + EXPECT_CALL(*mock_encoder_, SampleRateHz()).WillOnce(Return(17)); + EXPECT_EQ(17, red_->SampleRateHz()); +} + +TEST_F(AudioEncoderCopyRedTest, CheckNumChannelsPropagation) { + EXPECT_CALL(*mock_encoder_, NumChannels()).WillOnce(Return(17U)); + EXPECT_EQ(17U, red_->NumChannels()); +} + +TEST_F(AudioEncoderCopyRedTest, CheckFrameSizePropagation) { + EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket()) + .WillOnce(Return(17U)); + EXPECT_EQ(17U, red_->Num10MsFramesInNextPacket()); +} + +TEST_F(AudioEncoderCopyRedTest, CheckMaxFrameSizePropagation) { + EXPECT_CALL(*mock_encoder_, Max10MsFramesInAPacket()).WillOnce(Return(17U)); + EXPECT_EQ(17U, red_->Max10MsFramesInAPacket()); +} + +TEST_F(AudioEncoderCopyRedTest, CheckTargetAudioBitratePropagation) { + EXPECT_CALL(*mock_encoder_, + OnReceivedUplinkBandwidth(4711, absl::optional<int64_t>())); + red_->OnReceivedUplinkBandwidth(4711, absl::nullopt); +} + +TEST_F(AudioEncoderCopyRedTest, CheckPacketLossFractionPropagation) { + EXPECT_CALL(*mock_encoder_, OnReceivedUplinkPacketLossFraction(0.5)); + red_->OnReceivedUplinkPacketLossFraction(0.5); +} + +TEST_F(AudioEncoderCopyRedTest, CheckGetFrameLengthRangePropagation) { + auto expected_range = + std::make_pair(TimeDelta::Millis(20), TimeDelta::Millis(20)); + EXPECT_CALL(*mock_encoder_, GetFrameLengthRange()) + .WillRepeatedly(Return(absl::make_optional(expected_range))); + EXPECT_THAT(red_->GetFrameLengthRange(), Optional(Eq(expected_range))); +} + +// Checks that the an Encode() call is immediately propagated to the speech +// encoder. +TEST_F(AudioEncoderCopyRedTest, CheckImmediateEncode) { + // Interleaving the EXPECT_CALL sequence with expectations on the MockFunction + // check ensures that exactly one call to EncodeImpl happens in each + // Encode call. + InSequence s; + MockFunction<void(int check_point_id)> check; + for (int i = 1; i <= 6; ++i) { + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillRepeatedly(Return(AudioEncoder::EncodedInfo())); + EXPECT_CALL(check, Call(i)); + Encode(); + check.Call(i); + } +} + +// Checks that no output is produced if the underlying codec doesn't emit any +// new data, even if the RED codec is loaded with a secondary encoding. +TEST_F(AudioEncoderCopyRedTest, CheckNoOutput) { + static const size_t kEncodedSize = 17; + static const size_t kHeaderLenBytes = 5; + { + InSequence s; + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(kEncodedSize))) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(0))) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(kEncodedSize))); + } + + // Start with one Encode() call that will produce output. + Encode(); + // First call is a special case, since it does not include a secondary + // payload. + EXPECT_EQ(0u, encoded_info_.redundant.size()); + EXPECT_EQ(kEncodedSize + kRedLastHeaderLength, encoded_info_.encoded_bytes); + + // Next call to the speech encoder will not produce any output. + Encode(); + EXPECT_EQ(0u, encoded_info_.encoded_bytes); + + // Final call to the speech encoder will produce output. + Encode(); + EXPECT_EQ(2 * kEncodedSize + kHeaderLenBytes, encoded_info_.encoded_bytes); + ASSERT_EQ(2u, encoded_info_.redundant.size()); +} + +// Checks that the correct payload sizes are populated into the redundancy +// information for a redundancy level of 1. +TEST_F(AudioEncoderCopyRedTest, CheckPayloadSizes1) { + // Let the mock encoder return payload sizes 1, 2, 3, ..., 10 for the sequence + // of calls. + static const int kNumPackets = 10; + InSequence s; + for (int encode_size = 1; encode_size <= kNumPackets; ++encode_size) { + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(encode_size))); + } + + // First call is a special case, since it does not include a secondary + // payload. + Encode(); + EXPECT_EQ(0u, encoded_info_.redundant.size()); + EXPECT_EQ(kRedLastHeaderLength + 1u, encoded_info_.encoded_bytes); + + for (size_t i = 2; i <= kNumPackets; ++i) { + Encode(); + ASSERT_EQ(2u, encoded_info_.redundant.size()); + EXPECT_EQ(i, encoded_info_.redundant[1].encoded_bytes); + EXPECT_EQ(i - 1, encoded_info_.redundant[0].encoded_bytes); + EXPECT_EQ(5 + i + (i - 1), encoded_info_.encoded_bytes); + } +} + +// Checks that the correct payload sizes are populated into the redundancy +// information for a redundancy level of 0. +TEST_F(AudioEncoderCopyRedTest, CheckPayloadSizes0) { + webrtc::test::ScopedKeyValueConfig field_trials( + field_trials_, "WebRTC-Audio-Red-For-Opus/Enabled-0/"); + // Recreate the RED encoder to take the new field trial setting into account. + AudioEncoderCopyRed::Config config; + config.payload_type = red_payload_type_; + config.speech_encoder = std::move(red_->ReclaimContainedEncoders()[0]); + red_.reset(new AudioEncoderCopyRed(std::move(config), field_trials)); + + // Let the mock encoder return payload sizes 1, 2, 3, ..., 10 for the sequence + // of calls. + static const int kNumPackets = 10; + InSequence s; + for (int encode_size = 1; encode_size <= kNumPackets; ++encode_size) { + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(encode_size))); + } + + for (size_t i = 1; i <= kNumPackets; ++i) { + Encode(); + ASSERT_EQ(0u, encoded_info_.redundant.size()); + EXPECT_EQ(1 + i, encoded_info_.encoded_bytes); + } +} +// Checks that the correct payload sizes are populated into the redundancy +// information for a redundancy level of 2. +TEST_F(AudioEncoderCopyRedTest, CheckPayloadSizes2) { + webrtc::test::ScopedKeyValueConfig field_trials( + field_trials_, "WebRTC-Audio-Red-For-Opus/Enabled-2/"); + // Recreate the RED encoder to take the new field trial setting into account. + AudioEncoderCopyRed::Config config; + config.payload_type = red_payload_type_; + config.speech_encoder = std::move(red_->ReclaimContainedEncoders()[0]); + red_.reset(new AudioEncoderCopyRed(std::move(config), field_trials)); + + // Let the mock encoder return payload sizes 1, 2, 3, ..., 10 for the sequence + // of calls. + static const int kNumPackets = 10; + InSequence s; + for (int encode_size = 1; encode_size <= kNumPackets; ++encode_size) { + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(encode_size))); + } + + // First call is a special case, since it does not include a secondary + // payload. + Encode(); + EXPECT_EQ(0u, encoded_info_.redundant.size()); + EXPECT_EQ(kRedLastHeaderLength + 1u, encoded_info_.encoded_bytes); + + // Second call is also special since it does not include a tertiary + // payload. + Encode(); + EXPECT_EQ(2u, encoded_info_.redundant.size()); + EXPECT_EQ(8u, encoded_info_.encoded_bytes); + + for (size_t i = 3; i <= kNumPackets; ++i) { + Encode(); + ASSERT_EQ(3u, encoded_info_.redundant.size()); + EXPECT_EQ(i, encoded_info_.redundant[2].encoded_bytes); + EXPECT_EQ(i - 1, encoded_info_.redundant[1].encoded_bytes); + EXPECT_EQ(i - 2, encoded_info_.redundant[0].encoded_bytes); + EXPECT_EQ(9 + i + (i - 1) + (i - 2), encoded_info_.encoded_bytes); + } +} + +// Checks that the correct payload sizes are populated into the redundancy +// information for a redundancy level of 3. +TEST_F(AudioEncoderCopyRedTest, CheckPayloadSizes3) { + webrtc::test::ScopedKeyValueConfig field_trials( + field_trials_, "WebRTC-Audio-Red-For-Opus/Enabled-3/"); + // Recreate the RED encoder to take the new field trial setting into account. + AudioEncoderCopyRed::Config config; + config.payload_type = red_payload_type_; + config.speech_encoder = std::move(red_->ReclaimContainedEncoders()[0]); + red_.reset(new AudioEncoderCopyRed(std::move(config), field_trials_)); + + // Let the mock encoder return payload sizes 1, 2, 3, ..., 10 for the sequence + // of calls. + static const int kNumPackets = 10; + InSequence s; + for (int encode_size = 1; encode_size <= kNumPackets; ++encode_size) { + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(encode_size))); + } + + // First call is a special case, since it does not include a secondary + // payload. + Encode(); + EXPECT_EQ(0u, encoded_info_.redundant.size()); + EXPECT_EQ(kRedLastHeaderLength + 1u, encoded_info_.encoded_bytes); + + // Second call is also special since it does not include a tertiary + // payload. + Encode(); + EXPECT_EQ(2u, encoded_info_.redundant.size()); + EXPECT_EQ(8u, encoded_info_.encoded_bytes); + + // Third call is also special since it does not include a quaternary + // payload. + Encode(); + EXPECT_EQ(3u, encoded_info_.redundant.size()); + EXPECT_EQ(15u, encoded_info_.encoded_bytes); + + for (size_t i = 4; i <= kNumPackets; ++i) { + Encode(); + ASSERT_EQ(4u, encoded_info_.redundant.size()); + EXPECT_EQ(i, encoded_info_.redundant[3].encoded_bytes); + EXPECT_EQ(i - 1, encoded_info_.redundant[2].encoded_bytes); + EXPECT_EQ(i - 2, encoded_info_.redundant[1].encoded_bytes); + EXPECT_EQ(i - 3, encoded_info_.redundant[0].encoded_bytes); + EXPECT_EQ(13 + i + (i - 1) + (i - 2) + (i - 3), + encoded_info_.encoded_bytes); + } +} + +// Checks that the correct timestamps are returned. +TEST_F(AudioEncoderCopyRedTest, CheckTimestamps) { + uint32_t primary_timestamp = timestamp_; + AudioEncoder::EncodedInfo info; + info.encoded_bytes = 17; + info.encoded_timestamp = timestamp_; + + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + + // First call is a special case, since it does not include a secondary + // payload. + Encode(); + EXPECT_EQ(primary_timestamp, encoded_info_.encoded_timestamp); + + uint32_t secondary_timestamp = primary_timestamp; + primary_timestamp = timestamp_; + info.encoded_timestamp = timestamp_; + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + + Encode(); + ASSERT_EQ(2u, encoded_info_.redundant.size()); + EXPECT_EQ(primary_timestamp, encoded_info_.redundant[1].encoded_timestamp); + EXPECT_EQ(secondary_timestamp, encoded_info_.redundant[0].encoded_timestamp); + EXPECT_EQ(primary_timestamp, encoded_info_.encoded_timestamp); +} + +// Checks that the primary and secondary payloads are written correctly. +TEST_F(AudioEncoderCopyRedTest, CheckPayloads) { + // Let the mock encoder write payloads with increasing values. The first + // payload will have values 0, 1, 2, ..., kPayloadLenBytes - 1. + static const size_t kPayloadLenBytes = 5; + static const size_t kHeaderLenBytes = 5; + uint8_t payload[kPayloadLenBytes]; + for (uint8_t i = 0; i < kPayloadLenBytes; ++i) { + payload[i] = i; + } + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillRepeatedly(Invoke(MockAudioEncoder::CopyEncoding(payload))); + + // First call is a special case, since it does not include a secondary + // payload. + Encode(); + EXPECT_EQ(kRedLastHeaderLength + kPayloadLenBytes, + encoded_info_.encoded_bytes); + for (size_t i = 0; i < kPayloadLenBytes; ++i) { + EXPECT_EQ(i, encoded_.data()[kRedLastHeaderLength + i]); + } + + for (int j = 0; j < 1; ++j) { + // Increment all values of the payload by 10. + for (size_t i = 0; i < kPayloadLenBytes; ++i) + payload[i] += 10; + + Encode(); + ASSERT_EQ(2u, encoded_info_.redundant.size()); + EXPECT_EQ(kPayloadLenBytes, encoded_info_.redundant[0].encoded_bytes); + EXPECT_EQ(kPayloadLenBytes, encoded_info_.redundant[1].encoded_bytes); + for (size_t i = 0; i < kPayloadLenBytes; ++i) { + // Check secondary payload. + EXPECT_EQ(j * 10 + i, encoded_.data()[kHeaderLenBytes + i]); + + // Check primary payload. + EXPECT_EQ((j + 1) * 10 + i, + encoded_.data()[kHeaderLenBytes + i + kPayloadLenBytes]); + } + } +} + +// Checks correct propagation of payload type. +TEST_F(AudioEncoderCopyRedTest, CheckPayloadType) { + const int primary_payload_type = red_payload_type_ + 1; + AudioEncoder::EncodedInfo info; + info.encoded_bytes = 17; + info.payload_type = primary_payload_type; + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + + // First call is a special case, since it does not include a secondary + // payload. + Encode(); + ASSERT_EQ(0u, encoded_info_.redundant.size()); + + const int secondary_payload_type = red_payload_type_ + 2; + info.payload_type = secondary_payload_type; + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + + Encode(); + ASSERT_EQ(2u, encoded_info_.redundant.size()); + EXPECT_EQ(secondary_payload_type, encoded_info_.redundant[1].payload_type); + EXPECT_EQ(primary_payload_type, encoded_info_.redundant[0].payload_type); + EXPECT_EQ(red_payload_type_, encoded_info_.payload_type); +} + +TEST_F(AudioEncoderCopyRedTest, CheckRFC2198Header) { + const int primary_payload_type = red_payload_type_ + 1; + AudioEncoder::EncodedInfo info; + info.encoded_bytes = 10; + info.encoded_timestamp = timestamp_; + info.payload_type = primary_payload_type; + + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); + info.encoded_timestamp = timestamp_; // update timestamp. + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); // Second call will produce a redundant encoding. + + EXPECT_EQ(encoded_.size(), + 5u + 2 * 10u); // header size + two encoded payloads. + EXPECT_EQ(encoded_[0], primary_payload_type | 0x80); + + uint32_t timestamp_delta = encoded_info_.encoded_timestamp - + encoded_info_.redundant[0].encoded_timestamp; + // Timestamp delta is encoded as a 14 bit value. + EXPECT_EQ(encoded_[1], timestamp_delta >> 6); + EXPECT_EQ(static_cast<uint8_t>(encoded_[2] >> 2), timestamp_delta & 0x3f); + // Redundant length is encoded as 10 bit value. + EXPECT_EQ(encoded_[2] & 0x3u, encoded_info_.redundant[1].encoded_bytes >> 8); + EXPECT_EQ(encoded_[3], encoded_info_.redundant[1].encoded_bytes & 0xff); + EXPECT_EQ(encoded_[4], primary_payload_type); + + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); // Third call will produce a redundant encoding with double + // redundancy. + + EXPECT_EQ(encoded_.size(), + 5u + 2 * 10u); // header size + two encoded payloads. + EXPECT_EQ(encoded_[0], primary_payload_type | 0x80); + + timestamp_delta = encoded_info_.encoded_timestamp - + encoded_info_.redundant[0].encoded_timestamp; + // Timestamp delta is encoded as a 14 bit value. + EXPECT_EQ(encoded_[1], timestamp_delta >> 6); + EXPECT_EQ(static_cast<uint8_t>(encoded_[2] >> 2), timestamp_delta & 0x3f); + // Redundant length is encoded as 10 bit value. + EXPECT_EQ(encoded_[2] & 0x3u, encoded_info_.redundant[1].encoded_bytes >> 8); + EXPECT_EQ(encoded_[3], encoded_info_.redundant[1].encoded_bytes & 0xff); + + EXPECT_EQ(encoded_[4], primary_payload_type | 0x80); + timestamp_delta = encoded_info_.encoded_timestamp - + encoded_info_.redundant[1].encoded_timestamp; +} + +// Variant with a redundancy of 0. +TEST_F(AudioEncoderCopyRedTest, CheckRFC2198Header0) { + webrtc::test::ScopedKeyValueConfig field_trials( + field_trials_, "WebRTC-Audio-Red-For-Opus/Enabled-0/"); + // Recreate the RED encoder to take the new field trial setting into account. + AudioEncoderCopyRed::Config config; + config.payload_type = red_payload_type_; + config.speech_encoder = std::move(red_->ReclaimContainedEncoders()[0]); + red_.reset(new AudioEncoderCopyRed(std::move(config), field_trials)); + + const int primary_payload_type = red_payload_type_ + 1; + AudioEncoder::EncodedInfo info; + info.encoded_bytes = 10; + info.encoded_timestamp = timestamp_; + info.payload_type = primary_payload_type; + + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); + info.encoded_timestamp = timestamp_; // update timestamp. + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); // Second call will not produce a redundant encoding. + + EXPECT_EQ(encoded_.size(), + 1u + 1 * 10u); // header size + one encoded payloads. + EXPECT_EQ(encoded_[0], primary_payload_type); +} +// Variant with a redundancy of 2. +TEST_F(AudioEncoderCopyRedTest, CheckRFC2198Header2) { + webrtc::test::ScopedKeyValueConfig field_trials( + field_trials_, "WebRTC-Audio-Red-For-Opus/Enabled-2/"); + // Recreate the RED encoder to take the new field trial setting into account. + AudioEncoderCopyRed::Config config; + config.payload_type = red_payload_type_; + config.speech_encoder = std::move(red_->ReclaimContainedEncoders()[0]); + red_.reset(new AudioEncoderCopyRed(std::move(config), field_trials)); + + const int primary_payload_type = red_payload_type_ + 1; + AudioEncoder::EncodedInfo info; + info.encoded_bytes = 10; + info.encoded_timestamp = timestamp_; + info.payload_type = primary_payload_type; + + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); + info.encoded_timestamp = timestamp_; // update timestamp. + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); // Second call will produce a redundant encoding. + + EXPECT_EQ(encoded_.size(), + 5u + 2 * 10u); // header size + two encoded payloads. + EXPECT_EQ(encoded_[0], primary_payload_type | 0x80); + + uint32_t timestamp_delta = encoded_info_.encoded_timestamp - + encoded_info_.redundant[0].encoded_timestamp; + // Timestamp delta is encoded as a 14 bit value. + EXPECT_EQ(encoded_[1], timestamp_delta >> 6); + EXPECT_EQ(static_cast<uint8_t>(encoded_[2] >> 2), timestamp_delta & 0x3f); + // Redundant length is encoded as 10 bit value. + EXPECT_EQ(encoded_[2] & 0x3u, encoded_info_.redundant[1].encoded_bytes >> 8); + EXPECT_EQ(encoded_[3], encoded_info_.redundant[1].encoded_bytes & 0xff); + EXPECT_EQ(encoded_[4], primary_payload_type); + + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); // Third call will produce a redundant encoding with double + // redundancy. + + EXPECT_EQ(encoded_.size(), + 9u + 3 * 10u); // header size + three encoded payloads. + EXPECT_EQ(encoded_[0], primary_payload_type | 0x80); + + timestamp_delta = encoded_info_.encoded_timestamp - + encoded_info_.redundant[0].encoded_timestamp; + // Timestamp delta is encoded as a 14 bit value. + EXPECT_EQ(encoded_[1], timestamp_delta >> 6); + EXPECT_EQ(static_cast<uint8_t>(encoded_[2] >> 2), timestamp_delta & 0x3f); + // Redundant length is encoded as 10 bit value. + EXPECT_EQ(encoded_[2] & 0x3u, encoded_info_.redundant[1].encoded_bytes >> 8); + EXPECT_EQ(encoded_[3], encoded_info_.redundant[1].encoded_bytes & 0xff); + + EXPECT_EQ(encoded_[4], primary_payload_type | 0x80); + timestamp_delta = encoded_info_.encoded_timestamp - + encoded_info_.redundant[1].encoded_timestamp; + // Timestamp delta is encoded as a 14 bit value. + EXPECT_EQ(encoded_[5], timestamp_delta >> 6); + EXPECT_EQ(static_cast<uint8_t>(encoded_[6] >> 2), timestamp_delta & 0x3f); + // Redundant length is encoded as 10 bit value. + EXPECT_EQ(encoded_[6] & 0x3u, encoded_info_.redundant[1].encoded_bytes >> 8); + EXPECT_EQ(encoded_[7], encoded_info_.redundant[1].encoded_bytes & 0xff); + EXPECT_EQ(encoded_[8], primary_payload_type); +} + +TEST_F(AudioEncoderCopyRedTest, RespectsPayloadMTU) { + const int primary_payload_type = red_payload_type_ + 1; + AudioEncoder::EncodedInfo info; + info.encoded_bytes = 600; + info.encoded_timestamp = timestamp_; + info.payload_type = primary_payload_type; + + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); + info.encoded_timestamp = timestamp_; // update timestamp. + info.encoded_bytes = 500; + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); // Second call will produce a redundant encoding. + + EXPECT_EQ(encoded_.size(), 5u + 600u + 500u); + + info.encoded_timestamp = timestamp_; // update timestamp. + info.encoded_bytes = 400; + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); // Third call will drop the oldest packet. + EXPECT_EQ(encoded_.size(), 5u + 500u + 400u); +} + +TEST_F(AudioEncoderCopyRedTest, LargeTimestampGap) { + const int primary_payload_type = red_payload_type_ + 1; + AudioEncoder::EncodedInfo info; + info.encoded_bytes = 100; + info.encoded_timestamp = timestamp_; + info.payload_type = primary_payload_type; + + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); + // Update timestamp to simulate a 400ms gap like the one + // opus DTX causes. + timestamp_ += 19200; + info.encoded_timestamp = timestamp_; // update timestamp. + info.encoded_bytes = 200; + EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)) + .WillOnce(Invoke(MockAudioEncoder::FakeEncoding(info))); + Encode(); + + // The old packet will be dropped. + EXPECT_EQ(encoded_.size(), 1u + 200u); +} + +#if GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// This test fixture tests various error conditions that makes the +// AudioEncoderCng die via CHECKs. +class AudioEncoderCopyRedDeathTest : public AudioEncoderCopyRedTest { + protected: + AudioEncoderCopyRedDeathTest() : AudioEncoderCopyRedTest() {} +}; + +TEST_F(AudioEncoderCopyRedDeathTest, WrongFrameSize) { + num_audio_samples_10ms *= 2; // 20 ms frame. + RTC_EXPECT_DEATH(Encode(), ""); + num_audio_samples_10ms = 0; // Zero samples. + RTC_EXPECT_DEATH(Encode(), ""); +} + +TEST_F(AudioEncoderCopyRedDeathTest, NullSpeechEncoder) { + test::ScopedKeyValueConfig field_trials; + AudioEncoderCopyRed* red = NULL; + AudioEncoderCopyRed::Config config; + config.speech_encoder = NULL; + RTC_EXPECT_DEATH( + red = new AudioEncoderCopyRed(std::move(config), field_trials), + "Speech encoder not provided."); + // The delete operation is needed to avoid leak reports from memcheck. + delete red; +} + +#endif // GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/tools/audio_codec_speed_test.cc b/third_party/libwebrtc/modules/audio_coding/codecs/tools/audio_codec_speed_test.cc new file mode 100644 index 0000000000..537e6fcede --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/tools/audio_codec_speed_test.cc @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/tools/audio_codec_speed_test.h" + +#include "rtc_base/checks.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +using ::std::get; + +namespace webrtc { + +AudioCodecSpeedTest::AudioCodecSpeedTest(int block_duration_ms, + int input_sampling_khz, + int output_sampling_khz) + : block_duration_ms_(block_duration_ms), + input_sampling_khz_(input_sampling_khz), + output_sampling_khz_(output_sampling_khz), + input_length_sample_( + static_cast<size_t>(block_duration_ms_ * input_sampling_khz_)), + output_length_sample_( + static_cast<size_t>(block_duration_ms_ * output_sampling_khz_)), + data_pointer_(0), + loop_length_samples_(0), + max_bytes_(0), + encoded_bytes_(0), + encoding_time_ms_(0.0), + decoding_time_ms_(0.0), + out_file_(NULL) {} + +void AudioCodecSpeedTest::SetUp() { + channels_ = get<0>(GetParam()); + bit_rate_ = get<1>(GetParam()); + in_filename_ = test::ResourcePath(get<2>(GetParam()), get<3>(GetParam())); + save_out_data_ = get<4>(GetParam()); + + FILE* fp = fopen(in_filename_.c_str(), "rb"); + RTC_DCHECK(fp); + + // Obtain file size. + fseek(fp, 0, SEEK_END); + loop_length_samples_ = ftell(fp) / sizeof(int16_t); + rewind(fp); + + // Allocate memory to contain the whole file. + in_data_.reset( + new int16_t[loop_length_samples_ + input_length_sample_ * channels_]); + + data_pointer_ = 0; + + // Copy the file into the buffer. + ASSERT_EQ(fread(&in_data_[0], sizeof(int16_t), loop_length_samples_, fp), + loop_length_samples_); + fclose(fp); + + // Add an extra block length of samples to the end of the array, starting + // over again from the beginning of the array. This is done to simplify + // the reading process when reading over the end of the loop. + memcpy(&in_data_[loop_length_samples_], &in_data_[0], + input_length_sample_ * channels_ * sizeof(int16_t)); + + max_bytes_ = input_length_sample_ * channels_ * sizeof(int16_t); + out_data_.reset(new int16_t[output_length_sample_ * channels_]); + bit_stream_.reset(new uint8_t[max_bytes_]); + + if (save_out_data_) { + std::string out_filename = + ::testing::UnitTest::GetInstance()->current_test_info()->name(); + + // Erase '/' + size_t found; + while ((found = out_filename.find('/')) != std::string::npos) + out_filename.replace(found, 1, "_"); + + out_filename = test::OutputPath() + out_filename + ".pcm"; + + out_file_ = fopen(out_filename.c_str(), "wb"); + RTC_DCHECK(out_file_); + + printf("Output to be saved in %s.\n", out_filename.c_str()); + } +} + +void AudioCodecSpeedTest::TearDown() { + if (save_out_data_) { + fclose(out_file_); + } +} + +void AudioCodecSpeedTest::EncodeDecode(size_t audio_duration_sec) { + size_t time_now_ms = 0; + float time_ms; + + printf("Coding %d kHz-sampled %zu-channel audio at %d bps ...\n", + input_sampling_khz_, channels_, bit_rate_); + + while (time_now_ms < audio_duration_sec * 1000) { + // Encode & decode. + time_ms = EncodeABlock(&in_data_[data_pointer_], &bit_stream_[0], + max_bytes_, &encoded_bytes_); + encoding_time_ms_ += time_ms; + time_ms = DecodeABlock(&bit_stream_[0], encoded_bytes_, &out_data_[0]); + decoding_time_ms_ += time_ms; + if (save_out_data_) { + fwrite(&out_data_[0], sizeof(int16_t), output_length_sample_ * channels_, + out_file_); + } + data_pointer_ = (data_pointer_ + input_length_sample_ * channels_) % + loop_length_samples_; + time_now_ms += block_duration_ms_; + } + + printf("Encoding: %.2f%% real time,\nDecoding: %.2f%% real time.\n", + (encoding_time_ms_ / audio_duration_sec) / 10.0, + (decoding_time_ms_ / audio_duration_sec) / 10.0); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_coding/codecs/tools/audio_codec_speed_test.h b/third_party/libwebrtc/modules/audio_coding/codecs/tools/audio_codec_speed_test.h new file mode 100644 index 0000000000..c5f1d7c259 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_coding/codecs/tools/audio_codec_speed_test.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_CODECS_TOOLS_AUDIO_CODEC_SPEED_TEST_H_ +#define MODULES_AUDIO_CODING_CODECS_TOOLS_AUDIO_CODEC_SPEED_TEST_H_ + +#include <memory> +#include <string> + +#include "test/gtest.h" + +namespace webrtc { + +// Define coding parameter as +// <channels, bit_rate, file_name, extension, if_save_output>. +typedef std::tuple<size_t, int, std::string, std::string, bool> coding_param; + +class AudioCodecSpeedTest : public ::testing::TestWithParam<coding_param> { + protected: + AudioCodecSpeedTest(int block_duration_ms, + int input_sampling_khz, + int output_sampling_khz); + virtual void SetUp(); + virtual void TearDown(); + + // EncodeABlock(...) does the following: + // 1. encodes a block of audio, saved in `in_data`, + // 2. save the bit stream to `bit_stream` of `max_bytes` bytes in size, + // 3. assign `encoded_bytes` with the length of the bit stream (in bytes), + // 4. return the cost of time (in millisecond) spent on actual encoding. + virtual float EncodeABlock(int16_t* in_data, + uint8_t* bit_stream, + size_t max_bytes, + size_t* encoded_bytes) = 0; + + // DecodeABlock(...) does the following: + // 1. decodes the bit stream in `bit_stream` with a length of `encoded_bytes` + // (in bytes), + // 2. save the decoded audio in `out_data`, + // 3. return the cost of time (in millisecond) spent on actual decoding. + virtual float DecodeABlock(const uint8_t* bit_stream, + size_t encoded_bytes, + int16_t* out_data) = 0; + + // Encoding and decode an audio of `audio_duration` (in seconds) and + // record the runtime for encoding and decoding separately. + void EncodeDecode(size_t audio_duration); + + int block_duration_ms_; + int input_sampling_khz_; + int output_sampling_khz_; + + // Number of samples-per-channel in a frame. + size_t input_length_sample_; + + // Expected output number of samples-per-channel in a frame. + size_t output_length_sample_; + + std::unique_ptr<int16_t[]> in_data_; + std::unique_ptr<int16_t[]> out_data_; + size_t data_pointer_; + size_t loop_length_samples_; + std::unique_ptr<uint8_t[]> bit_stream_; + + // Maximum number of bytes in output bitstream for a frame of audio. + size_t max_bytes_; + + size_t encoded_bytes_; + float encoding_time_ms_; + float decoding_time_ms_; + FILE* out_file_; + + size_t channels_; + + // Bit rate is in bit-per-second. + int bit_rate_; + + std::string in_filename_; + + // Determines whether to save the output to file. + bool save_out_data_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_CODECS_TOOLS_AUDIO_CODEC_SPEED_TEST_H_ |