diff options
Diffstat (limited to 'third_party/libwebrtc/audio/utility')
11 files changed, 2780 insertions, 0 deletions
diff --git a/third_party/libwebrtc/audio/utility/BUILD.gn b/third_party/libwebrtc/audio/utility/BUILD.gn new file mode 100644 index 0000000000..983b6286e4 --- /dev/null +++ b/third_party/libwebrtc/audio/utility/BUILD.gn @@ -0,0 +1,56 @@ +# Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +import("../../webrtc.gni") + +group("utility") { + deps = [ ":audio_frame_operations" ] +} + +rtc_library("audio_frame_operations") { + visibility = [ "*" ] + sources = [ + "audio_frame_operations.cc", + "audio_frame_operations.h", + "channel_mixer.cc", + "channel_mixer.h", + "channel_mixing_matrix.cc", + "channel_mixing_matrix.h", + ] + + deps = [ + "../../api/audio:audio_frame_api", + "../../common_audio", + "../../rtc_base:checks", + "../../rtc_base:logging", + "../../rtc_base:safe_conversions", + "../../system_wrappers:field_trial", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/base:core_headers" ] +} + +if (rtc_include_tests) { + rtc_library("utility_tests") { + testonly = true + sources = [ + "audio_frame_operations_unittest.cc", + "channel_mixer_unittest.cc", + "channel_mixing_matrix_unittest.cc", + ] + deps = [ + ":audio_frame_operations", + "../../api/audio:audio_frame_api", + "../../rtc_base:checks", + "../../rtc_base:logging", + "../../rtc_base:macromagic", + "../../rtc_base:stringutils", + "../../test:field_trial", + "../../test:test_support", + "//testing/gtest", + ] + } +} diff --git a/third_party/libwebrtc/audio/utility/audio_frame_operations.cc b/third_party/libwebrtc/audio/utility/audio_frame_operations.cc new file mode 100644 index 0000000000..1b936c239b --- /dev/null +++ b/third_party/libwebrtc/audio/utility/audio_frame_operations.cc @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/utility/audio_frame_operations.h" + +#include <string.h> + +#include <algorithm> +#include <cstdint> +#include <utility> + +#include "common_audio/include/audio_util.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { +namespace { + +// 2.7ms @ 48kHz, 4ms @ 32kHz, 8ms @ 16kHz. +const size_t kMuteFadeFrames = 128; +const float kMuteFadeInc = 1.0f / kMuteFadeFrames; + +} // namespace + +void AudioFrameOperations::Add(const AudioFrame& frame_to_add, + AudioFrame* result_frame) { + // Sanity check. + RTC_DCHECK(result_frame); + RTC_DCHECK_GT(result_frame->num_channels_, 0); + RTC_DCHECK_EQ(result_frame->num_channels_, frame_to_add.num_channels_); + + bool no_previous_data = result_frame->muted(); + if (result_frame->samples_per_channel_ != frame_to_add.samples_per_channel_) { + // Special case we have no data to start with. + RTC_DCHECK_EQ(result_frame->samples_per_channel_, 0); + result_frame->samples_per_channel_ = frame_to_add.samples_per_channel_; + no_previous_data = true; + } + + if (result_frame->vad_activity_ == AudioFrame::kVadActive || + frame_to_add.vad_activity_ == AudioFrame::kVadActive) { + result_frame->vad_activity_ = AudioFrame::kVadActive; + } else if (result_frame->vad_activity_ == AudioFrame::kVadUnknown || + frame_to_add.vad_activity_ == AudioFrame::kVadUnknown) { + result_frame->vad_activity_ = AudioFrame::kVadUnknown; + } + + if (result_frame->speech_type_ != frame_to_add.speech_type_) + result_frame->speech_type_ = AudioFrame::kUndefined; + + if (!frame_to_add.muted()) { + const int16_t* in_data = frame_to_add.data(); + int16_t* out_data = result_frame->mutable_data(); + size_t length = + frame_to_add.samples_per_channel_ * frame_to_add.num_channels_; + if (no_previous_data) { + std::copy(in_data, in_data + length, out_data); + } else { + for (size_t i = 0; i < length; i++) { + const int32_t wrap_guard = static_cast<int32_t>(out_data[i]) + + static_cast<int32_t>(in_data[i]); + out_data[i] = rtc::saturated_cast<int16_t>(wrap_guard); + } + } + } +} + +int AudioFrameOperations::MonoToStereo(AudioFrame* frame) { + if (frame->num_channels_ != 1) { + return -1; + } + UpmixChannels(2, frame); + return 0; +} + +int AudioFrameOperations::StereoToMono(AudioFrame* frame) { + if (frame->num_channels_ != 2) { + return -1; + } + DownmixChannels(1, frame); + return frame->num_channels_ == 1 ? 0 : -1; +} + +void AudioFrameOperations::QuadToStereo(const int16_t* src_audio, + size_t samples_per_channel, + int16_t* dst_audio) { + for (size_t i = 0; i < samples_per_channel; i++) { + dst_audio[i * 2] = + (static_cast<int32_t>(src_audio[4 * i]) + src_audio[4 * i + 1]) >> 1; + dst_audio[i * 2 + 1] = + (static_cast<int32_t>(src_audio[4 * i + 2]) + src_audio[4 * i + 3]) >> + 1; + } +} + +int AudioFrameOperations::QuadToStereo(AudioFrame* frame) { + if (frame->num_channels_ != 4) { + return -1; + } + + RTC_DCHECK_LE(frame->samples_per_channel_ * 4, + AudioFrame::kMaxDataSizeSamples); + + if (!frame->muted()) { + QuadToStereo(frame->data(), frame->samples_per_channel_, + frame->mutable_data()); + } + frame->num_channels_ = 2; + + return 0; +} + +void AudioFrameOperations::DownmixChannels(const int16_t* src_audio, + size_t src_channels, + size_t samples_per_channel, + size_t dst_channels, + int16_t* dst_audio) { + if (src_channels > 1 && dst_channels == 1) { + DownmixInterleavedToMono(src_audio, samples_per_channel, src_channels, + dst_audio); + return; + } else if (src_channels == 4 && dst_channels == 2) { + QuadToStereo(src_audio, samples_per_channel, dst_audio); + return; + } + + RTC_DCHECK_NOTREACHED() << "src_channels: " << src_channels + << ", dst_channels: " << dst_channels; +} + +void AudioFrameOperations::DownmixChannels(size_t dst_channels, + AudioFrame* frame) { + RTC_DCHECK_LE(frame->samples_per_channel_ * frame->num_channels_, + AudioFrame::kMaxDataSizeSamples); + if (frame->num_channels_ > 1 && dst_channels == 1) { + if (!frame->muted()) { + DownmixInterleavedToMono(frame->data(), frame->samples_per_channel_, + frame->num_channels_, frame->mutable_data()); + } + frame->num_channels_ = 1; + } else if (frame->num_channels_ == 4 && dst_channels == 2) { + int err = QuadToStereo(frame); + RTC_DCHECK_EQ(err, 0); + } else { + RTC_DCHECK_NOTREACHED() << "src_channels: " << frame->num_channels_ + << ", dst_channels: " << dst_channels; + } +} + +void AudioFrameOperations::UpmixChannels(size_t target_number_of_channels, + AudioFrame* frame) { + RTC_DCHECK_EQ(frame->num_channels_, 1); + RTC_DCHECK_LE(frame->samples_per_channel_ * target_number_of_channels, + AudioFrame::kMaxDataSizeSamples); + + if (frame->num_channels_ != 1 || + frame->samples_per_channel_ * target_number_of_channels > + AudioFrame::kMaxDataSizeSamples) { + return; + } + + if (!frame->muted()) { + // Up-mixing done in place. Going backwards through the frame ensure nothing + // is irrevocably overwritten. + int16_t* frame_data = frame->mutable_data(); + for (int i = frame->samples_per_channel_ - 1; i >= 0; i--) { + for (size_t j = 0; j < target_number_of_channels; ++j) { + frame_data[target_number_of_channels * i + j] = frame_data[i]; + } + } + } + frame->num_channels_ = target_number_of_channels; +} + +void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) { + RTC_DCHECK(frame); + if (frame->num_channels_ != 2 || frame->muted()) { + return; + } + + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) { + std::swap(frame_data[i], frame_data[i + 1]); + } +} + +void AudioFrameOperations::Mute(AudioFrame* frame, + bool previous_frame_muted, + bool current_frame_muted) { + RTC_DCHECK(frame); + if (!previous_frame_muted && !current_frame_muted) { + // Not muted, don't touch. + } else if (previous_frame_muted && current_frame_muted) { + // Frame fully muted. + size_t total_samples = frame->samples_per_channel_ * frame->num_channels_; + RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples, total_samples); + frame->Mute(); + } else { + // Fade is a no-op on a muted frame. + if (frame->muted()) { + return; + } + + // Limit number of samples to fade, if frame isn't long enough. + size_t count = kMuteFadeFrames; + float inc = kMuteFadeInc; + if (frame->samples_per_channel_ < kMuteFadeFrames) { + count = frame->samples_per_channel_; + if (count > 0) { + inc = 1.0f / count; + } + } + + size_t start = 0; + size_t end = count; + float start_g = 0.0f; + if (current_frame_muted) { + // Fade out the last `count` samples of frame. + RTC_DCHECK(!previous_frame_muted); + start = frame->samples_per_channel_ - count; + end = frame->samples_per_channel_; + start_g = 1.0f; + inc = -inc; + } else { + // Fade in the first `count` samples of frame. + RTC_DCHECK(previous_frame_muted); + } + + // Perform fade. + int16_t* frame_data = frame->mutable_data(); + size_t channels = frame->num_channels_; + for (size_t j = 0; j < channels; ++j) { + float g = start_g; + for (size_t i = start * channels; i < end * channels; i += channels) { + g += inc; + frame_data[i + j] *= g; + } + } + } +} + +void AudioFrameOperations::Mute(AudioFrame* frame) { + Mute(frame, true, true); +} + +void AudioFrameOperations::ApplyHalfGain(AudioFrame* frame) { + RTC_DCHECK(frame); + RTC_DCHECK_GT(frame->num_channels_, 0); + if (frame->num_channels_ < 1 || frame->muted()) { + return; + } + + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_; + i++) { + frame_data[i] = frame_data[i] >> 1; + } +} + +int AudioFrameOperations::Scale(float left, float right, AudioFrame* frame) { + if (frame->num_channels_ != 2) { + return -1; + } else if (frame->muted()) { + return 0; + } + + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel_; i++) { + frame_data[2 * i] = static_cast<int16_t>(left * frame_data[2 * i]); + frame_data[2 * i + 1] = static_cast<int16_t>(right * frame_data[2 * i + 1]); + } + return 0; +} + +int AudioFrameOperations::ScaleWithSat(float scale, AudioFrame* frame) { + if (frame->muted()) { + return 0; + } + + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_; + i++) { + frame_data[i] = rtc::saturated_cast<int16_t>(scale * frame_data[i]); + } + return 0; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/utility/audio_frame_operations.h b/third_party/libwebrtc/audio/utility/audio_frame_operations.h new file mode 100644 index 0000000000..2a5f29f4f5 --- /dev/null +++ b/third_party/libwebrtc/audio/utility/audio_frame_operations.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_UTILITY_AUDIO_FRAME_OPERATIONS_H_ +#define AUDIO_UTILITY_AUDIO_FRAME_OPERATIONS_H_ + +#include <stddef.h> +#include <stdint.h> + +#include "absl/base/attributes.h" +#include "api/audio/audio_frame.h" + +namespace webrtc { + +// TODO(andrew): consolidate this with utility.h and audio_frame_manipulator.h. +// Change reference parameters to pointers. Consider using a namespace rather +// than a class. +class AudioFrameOperations { + public: + // Add samples in `frame_to_add` with samples in `result_frame` + // putting the results in `results_frame`. The fields + // `vad_activity_` and `speech_type_` of the result frame are + // updated. If `result_frame` is empty (`samples_per_channel_`==0), + // the samples in `frame_to_add` are added to it. The number of + // channels and number of samples per channel must match except when + // `result_frame` is empty. + static void Add(const AudioFrame& frame_to_add, AudioFrame* result_frame); + + // `frame.num_channels_` will be updated. This version checks for sufficient + // buffer size and that `num_channels_` is mono. Use UpmixChannels + // instead. TODO(bugs.webrtc.org/8649): remove. + ABSL_DEPRECATED("bugs.webrtc.org/8649") + static int MonoToStereo(AudioFrame* frame); + + // `frame.num_channels_` will be updated. This version checks that + // `num_channels_` is stereo. Use DownmixChannels + // instead. TODO(bugs.webrtc.org/8649): remove. + ABSL_DEPRECATED("bugs.webrtc.org/8649") + static int StereoToMono(AudioFrame* frame); + + // Downmixes 4 channels `src_audio` to stereo `dst_audio`. This is an in-place + // operation, meaning `src_audio` and `dst_audio` may point to the same + // buffer. + static void QuadToStereo(const int16_t* src_audio, + size_t samples_per_channel, + int16_t* dst_audio); + + // `frame.num_channels_` will be updated. This version checks that + // `num_channels_` is 4 channels. + static int QuadToStereo(AudioFrame* frame); + + // Downmixes `src_channels` `src_audio` to `dst_channels` `dst_audio`. + // This is an in-place operation, meaning `src_audio` and `dst_audio` + // may point to the same buffer. Supported channel combinations are + // Stereo to Mono, Quad to Mono, and Quad to Stereo. + static void DownmixChannels(const int16_t* src_audio, + size_t src_channels, + size_t samples_per_channel, + size_t dst_channels, + int16_t* dst_audio); + + // `frame.num_channels_` will be updated. This version checks that + // `num_channels_` and `dst_channels` are valid and performs relevant downmix. + // Supported channel combinations are N channels to Mono, and Quad to Stereo. + static void DownmixChannels(size_t dst_channels, AudioFrame* frame); + + // `frame.num_channels_` will be updated. This version checks that + // `num_channels_` and `dst_channels` are valid and performs relevant + // downmix. Supported channel combinations are Mono to N + // channels. The single channel is replicated. + static void UpmixChannels(size_t target_number_of_channels, + AudioFrame* frame); + + // Swap the left and right channels of `frame`. Fails silently if `frame` is + // not stereo. + static void SwapStereoChannels(AudioFrame* frame); + + // Conditionally zero out contents of `frame` for implementing audio mute: + // `previous_frame_muted` && `current_frame_muted` - Zero out whole frame. + // `previous_frame_muted` && !`current_frame_muted` - Fade-in at frame start. + // !`previous_frame_muted` && `current_frame_muted` - Fade-out at frame end. + // !`previous_frame_muted` && !`current_frame_muted` - Leave frame untouched. + static void Mute(AudioFrame* frame, + bool previous_frame_muted, + bool current_frame_muted); + + // Zero out contents of frame. + static void Mute(AudioFrame* frame); + + // Halve samples in `frame`. + static void ApplyHalfGain(AudioFrame* frame); + + static int Scale(float left, float right, AudioFrame* frame); + + static int ScaleWithSat(float scale, AudioFrame* frame); +}; + +} // namespace webrtc + +#endif // AUDIO_UTILITY_AUDIO_FRAME_OPERATIONS_H_ diff --git a/third_party/libwebrtc/audio/utility/audio_frame_operations_gn/moz.build b/third_party/libwebrtc/audio/utility/audio_frame_operations_gn/moz.build new file mode 100644 index 0000000000..e215792f64 --- /dev/null +++ b/third_party/libwebrtc/audio/utility/audio_frame_operations_gn/moz.build @@ -0,0 +1,238 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/audio/utility/audio_frame_operations.cc", + "/third_party/libwebrtc/audio/utility/channel_mixer.cc", + "/third_party/libwebrtc/audio/utility/channel_mixing_matrix.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_LIBEVENT"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_ENABLE_LIBEVENT"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_LIBEVENT"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["RTC_ENABLE_WIN_WGC"] = True + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["TARGET_CPU"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["TARGET_CPU"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["TARGET_CPU"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["TARGET_CPU"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["TARGET_CPU"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["TARGET_CPU"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["OS_TARGET"] == "Android" and CONFIG["TARGET_CPU"] == "arm": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["OS_TARGET"] == "Android" and CONFIG["TARGET_CPU"] == "x86": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "aarch64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "arm": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "x86": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["OS_TARGET"] == "Linux" and CONFIG["TARGET_CPU"] == "x86_64": + + DEFINES["_GNU_SOURCE"] = True + +Library("audio_frame_operations_gn") diff --git a/third_party/libwebrtc/audio/utility/audio_frame_operations_unittest.cc b/third_party/libwebrtc/audio/utility/audio_frame_operations_unittest.cc new file mode 100644 index 0000000000..1a2c16e45f --- /dev/null +++ b/third_party/libwebrtc/audio/utility/audio_frame_operations_unittest.cc @@ -0,0 +1,622 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/utility/audio_frame_operations.h" + +#include "rtc_base/checks.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +class AudioFrameOperationsTest : public ::testing::Test { + protected: + AudioFrameOperationsTest() { + // Set typical values. + frame_.samples_per_channel_ = 320; + frame_.num_channels_ = 2; + } + + AudioFrame frame_; +}; + +class AudioFrameOperationsDeathTest : public AudioFrameOperationsTest {}; + +void SetFrameData(int16_t ch1, + int16_t ch2, + int16_t ch3, + int16_t ch4, + AudioFrame* frame) { + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel_ * 4; i += 4) { + frame_data[i] = ch1; + frame_data[i + 1] = ch2; + frame_data[i + 2] = ch3; + frame_data[i + 3] = ch4; + } +} + +void SetFrameData(int16_t left, int16_t right, AudioFrame* frame) { + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) { + frame_data[i] = left; + frame_data[i + 1] = right; + } +} + +void SetFrameData(int16_t data, AudioFrame* frame) { + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_; + i++) { + frame_data[i] = data; + } +} + +void VerifyFramesAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) { + EXPECT_EQ(frame1.num_channels_, frame2.num_channels_); + EXPECT_EQ(frame1.samples_per_channel_, frame2.samples_per_channel_); + const int16_t* frame1_data = frame1.data(); + const int16_t* frame2_data = frame2.data(); + for (size_t i = 0; i < frame1.samples_per_channel_ * frame1.num_channels_; + i++) { + EXPECT_EQ(frame1_data[i], frame2_data[i]); + } + EXPECT_EQ(frame1.muted(), frame2.muted()); +} + +void InitFrame(AudioFrame* frame, + size_t channels, + size_t samples_per_channel, + int16_t left_data, + int16_t right_data) { + RTC_DCHECK(frame); + RTC_DCHECK_GE(2, channels); + RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples, + samples_per_channel * channels); + frame->samples_per_channel_ = samples_per_channel; + frame->num_channels_ = channels; + if (channels == 2) { + SetFrameData(left_data, right_data, frame); + } else if (channels == 1) { + SetFrameData(left_data, frame); + } +} + +int16_t GetChannelData(const AudioFrame& frame, size_t channel, size_t index) { + RTC_DCHECK_LT(channel, frame.num_channels_); + RTC_DCHECK_LT(index, frame.samples_per_channel_); + return frame.data()[index * frame.num_channels_ + channel]; +} + +void VerifyFrameDataBounds(const AudioFrame& frame, + size_t channel, + int16_t max, + int16_t min) { + for (size_t i = 0; i < frame.samples_per_channel_; ++i) { + int16_t s = GetChannelData(frame, channel, i); + EXPECT_LE(min, s); + EXPECT_GE(max, s); + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +TEST_F(AudioFrameOperationsDeathTest, MonoToStereoFailsWithBadParameters) { + EXPECT_DEATH(AudioFrameOperations::UpmixChannels(2, &frame_), ""); + frame_.samples_per_channel_ = AudioFrame::kMaxDataSizeSamples; + frame_.num_channels_ = 1; + EXPECT_DEATH(AudioFrameOperations::UpmixChannels(2, &frame_), ""); +} +#endif + +TEST_F(AudioFrameOperationsTest, MonoToStereoSucceeds) { + frame_.num_channels_ = 1; + SetFrameData(1, &frame_); + + AudioFrameOperations::UpmixChannels(2, &frame_); + EXPECT_EQ(2u, frame_.num_channels_); + + AudioFrame stereo_frame; + stereo_frame.samples_per_channel_ = 320; + stereo_frame.num_channels_ = 2; + SetFrameData(1, 1, &stereo_frame); + VerifyFramesAreEqual(stereo_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, MonoToStereoMuted) { + frame_.num_channels_ = 1; + ASSERT_TRUE(frame_.muted()); + AudioFrameOperations::UpmixChannels(2, &frame_); + EXPECT_EQ(2u, frame_.num_channels_); + EXPECT_TRUE(frame_.muted()); +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +TEST_F(AudioFrameOperationsDeathTest, StereoToMonoFailsWithBadParameters) { + frame_.num_channels_ = 1; + EXPECT_DEATH(AudioFrameOperations::DownmixChannels(1, &frame_), ""); +} +#endif + +TEST_F(AudioFrameOperationsTest, StereoToMonoSucceeds) { + SetFrameData(4, 2, &frame_); + AudioFrameOperations::DownmixChannels(1, &frame_); + EXPECT_EQ(1u, frame_.num_channels_); + + AudioFrame mono_frame; + mono_frame.samples_per_channel_ = 320; + mono_frame.num_channels_ = 1; + SetFrameData(3, &mono_frame); + VerifyFramesAreEqual(mono_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, StereoToMonoMuted) { + ASSERT_TRUE(frame_.muted()); + AudioFrameOperations::DownmixChannels(1, &frame_); + EXPECT_EQ(1u, frame_.num_channels_); + EXPECT_TRUE(frame_.muted()); +} + +TEST_F(AudioFrameOperationsTest, StereoToMonoBufferSucceeds) { + AudioFrame target_frame; + SetFrameData(4, 2, &frame_); + + target_frame.num_channels_ = 1; + target_frame.samples_per_channel_ = frame_.samples_per_channel_; + + AudioFrameOperations::DownmixChannels(frame_.data(), 2, + frame_.samples_per_channel_, 1, + target_frame.mutable_data()); + + AudioFrame mono_frame; + mono_frame.samples_per_channel_ = 320; + mono_frame.num_channels_ = 1; + SetFrameData(3, &mono_frame); + VerifyFramesAreEqual(mono_frame, target_frame); +} + +TEST_F(AudioFrameOperationsTest, StereoToMonoDoesNotWrapAround) { + SetFrameData(-32768, -32768, &frame_); + AudioFrameOperations::DownmixChannels(1, &frame_); + EXPECT_EQ(1u, frame_.num_channels_); + AudioFrame mono_frame; + mono_frame.samples_per_channel_ = 320; + mono_frame.num_channels_ = 1; + SetFrameData(-32768, &mono_frame); + VerifyFramesAreEqual(mono_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, QuadToMonoSucceeds) { + frame_.num_channels_ = 4; + SetFrameData(4, 2, 6, 8, &frame_); + + AudioFrameOperations::DownmixChannels(1, &frame_); + EXPECT_EQ(1u, frame_.num_channels_); + + AudioFrame mono_frame; + mono_frame.samples_per_channel_ = 320; + mono_frame.num_channels_ = 1; + SetFrameData(5, &mono_frame); + VerifyFramesAreEqual(mono_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, QuadToMonoMuted) { + frame_.num_channels_ = 4; + ASSERT_TRUE(frame_.muted()); + AudioFrameOperations::DownmixChannels(1, &frame_); + EXPECT_EQ(1u, frame_.num_channels_); + EXPECT_TRUE(frame_.muted()); +} + +TEST_F(AudioFrameOperationsTest, QuadToMonoBufferSucceeds) { + AudioFrame target_frame; + frame_.num_channels_ = 4; + SetFrameData(4, 2, 6, 8, &frame_); + + target_frame.num_channels_ = 1; + target_frame.samples_per_channel_ = frame_.samples_per_channel_; + + AudioFrameOperations::DownmixChannels(frame_.data(), 4, + frame_.samples_per_channel_, 1, + target_frame.mutable_data()); + AudioFrame mono_frame; + mono_frame.samples_per_channel_ = 320; + mono_frame.num_channels_ = 1; + SetFrameData(5, &mono_frame); + VerifyFramesAreEqual(mono_frame, target_frame); +} + +TEST_F(AudioFrameOperationsTest, QuadToMonoDoesNotWrapAround) { + frame_.num_channels_ = 4; + SetFrameData(-32768, -32768, -32768, -32768, &frame_); + AudioFrameOperations::DownmixChannels(1, &frame_); + EXPECT_EQ(1u, frame_.num_channels_); + + AudioFrame mono_frame; + mono_frame.samples_per_channel_ = 320; + mono_frame.num_channels_ = 1; + SetFrameData(-32768, &mono_frame); + VerifyFramesAreEqual(mono_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, QuadToStereoFailsWithBadParameters) { + frame_.num_channels_ = 1; + EXPECT_EQ(-1, AudioFrameOperations::QuadToStereo(&frame_)); + frame_.num_channels_ = 2; + EXPECT_EQ(-1, AudioFrameOperations::QuadToStereo(&frame_)); +} + +TEST_F(AudioFrameOperationsTest, QuadToStereoSucceeds) { + frame_.num_channels_ = 4; + SetFrameData(4, 2, 6, 8, &frame_); + EXPECT_EQ(0, AudioFrameOperations::QuadToStereo(&frame_)); + + AudioFrame stereo_frame; + stereo_frame.samples_per_channel_ = 320; + stereo_frame.num_channels_ = 2; + SetFrameData(3, 7, &stereo_frame); + VerifyFramesAreEqual(stereo_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, QuadToStereoMuted) { + frame_.num_channels_ = 4; + ASSERT_TRUE(frame_.muted()); + EXPECT_EQ(0, AudioFrameOperations::QuadToStereo(&frame_)); + EXPECT_TRUE(frame_.muted()); +} + +TEST_F(AudioFrameOperationsTest, QuadToStereoBufferSucceeds) { + AudioFrame target_frame; + frame_.num_channels_ = 4; + SetFrameData(4, 2, 6, 8, &frame_); + + target_frame.num_channels_ = 2; + target_frame.samples_per_channel_ = frame_.samples_per_channel_; + + AudioFrameOperations::QuadToStereo(frame_.data(), frame_.samples_per_channel_, + target_frame.mutable_data()); + AudioFrame stereo_frame; + stereo_frame.samples_per_channel_ = 320; + stereo_frame.num_channels_ = 2; + SetFrameData(3, 7, &stereo_frame); + VerifyFramesAreEqual(stereo_frame, target_frame); +} + +TEST_F(AudioFrameOperationsTest, QuadToStereoDoesNotWrapAround) { + frame_.num_channels_ = 4; + SetFrameData(-32768, -32768, -32768, -32768, &frame_); + EXPECT_EQ(0, AudioFrameOperations::QuadToStereo(&frame_)); + + AudioFrame stereo_frame; + stereo_frame.samples_per_channel_ = 320; + stereo_frame.num_channels_ = 2; + SetFrameData(-32768, -32768, &stereo_frame); + VerifyFramesAreEqual(stereo_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, SwapStereoChannelsSucceedsOnStereo) { + SetFrameData(0, 1, &frame_); + + AudioFrame swapped_frame; + swapped_frame.samples_per_channel_ = 320; + swapped_frame.num_channels_ = 2; + SetFrameData(1, 0, &swapped_frame); + + AudioFrameOperations::SwapStereoChannels(&frame_); + VerifyFramesAreEqual(swapped_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, SwapStereoChannelsMuted) { + ASSERT_TRUE(frame_.muted()); + AudioFrameOperations::SwapStereoChannels(&frame_); + EXPECT_TRUE(frame_.muted()); +} + +TEST_F(AudioFrameOperationsTest, SwapStereoChannelsFailsOnMono) { + frame_.num_channels_ = 1; + // Set data to "stereo", despite it being a mono frame. + SetFrameData(0, 1, &frame_); + + AudioFrame orig_frame; + orig_frame.CopyFrom(frame_); + AudioFrameOperations::SwapStereoChannels(&frame_); + // Verify that no swap occurred. + VerifyFramesAreEqual(orig_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, MuteDisabled) { + SetFrameData(1000, -1000, &frame_); + AudioFrameOperations::Mute(&frame_, false, false); + + AudioFrame muted_frame; + muted_frame.samples_per_channel_ = 320; + muted_frame.num_channels_ = 2; + SetFrameData(1000, -1000, &muted_frame); + VerifyFramesAreEqual(muted_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, MuteEnabled) { + SetFrameData(1000, -1000, &frame_); + AudioFrameOperations::Mute(&frame_, true, true); + + AudioFrame muted_frame; + muted_frame.samples_per_channel_ = frame_.samples_per_channel_; + muted_frame.num_channels_ = frame_.num_channels_; + ASSERT_TRUE(muted_frame.muted()); + VerifyFramesAreEqual(muted_frame, frame_); +} + +// Verify that *beginning* to mute works for short and long (>128) frames, mono +// and stereo. Beginning mute should yield a ramp down to zero. +TEST_F(AudioFrameOperationsTest, MuteBeginMonoLong) { + InitFrame(&frame_, 1, 228, 1000, -1000); + AudioFrameOperations::Mute(&frame_, false, true); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + EXPECT_EQ(1000, GetChannelData(frame_, 0, 99)); + EXPECT_EQ(992, GetChannelData(frame_, 0, 100)); + EXPECT_EQ(7, GetChannelData(frame_, 0, 226)); + EXPECT_EQ(0, GetChannelData(frame_, 0, 227)); +} + +TEST_F(AudioFrameOperationsTest, MuteBeginMonoShort) { + InitFrame(&frame_, 1, 93, 1000, -1000); + AudioFrameOperations::Mute(&frame_, false, true); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + EXPECT_EQ(989, GetChannelData(frame_, 0, 0)); + EXPECT_EQ(978, GetChannelData(frame_, 0, 1)); + EXPECT_EQ(10, GetChannelData(frame_, 0, 91)); + EXPECT_EQ(0, GetChannelData(frame_, 0, 92)); +} + +TEST_F(AudioFrameOperationsTest, MuteBeginStereoLong) { + InitFrame(&frame_, 2, 228, 1000, -1000); + AudioFrameOperations::Mute(&frame_, false, true); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + VerifyFrameDataBounds(frame_, 1, 0, -1000); + EXPECT_EQ(1000, GetChannelData(frame_, 0, 99)); + EXPECT_EQ(-1000, GetChannelData(frame_, 1, 99)); + EXPECT_EQ(992, GetChannelData(frame_, 0, 100)); + EXPECT_EQ(-992, GetChannelData(frame_, 1, 100)); + EXPECT_EQ(7, GetChannelData(frame_, 0, 226)); + EXPECT_EQ(-7, GetChannelData(frame_, 1, 226)); + EXPECT_EQ(0, GetChannelData(frame_, 0, 227)); + EXPECT_EQ(0, GetChannelData(frame_, 1, 227)); +} + +TEST_F(AudioFrameOperationsTest, MuteBeginStereoShort) { + InitFrame(&frame_, 2, 93, 1000, -1000); + AudioFrameOperations::Mute(&frame_, false, true); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + VerifyFrameDataBounds(frame_, 1, 0, -1000); + EXPECT_EQ(989, GetChannelData(frame_, 0, 0)); + EXPECT_EQ(-989, GetChannelData(frame_, 1, 0)); + EXPECT_EQ(978, GetChannelData(frame_, 0, 1)); + EXPECT_EQ(-978, GetChannelData(frame_, 1, 1)); + EXPECT_EQ(10, GetChannelData(frame_, 0, 91)); + EXPECT_EQ(-10, GetChannelData(frame_, 1, 91)); + EXPECT_EQ(0, GetChannelData(frame_, 0, 92)); + EXPECT_EQ(0, GetChannelData(frame_, 1, 92)); +} + +// Verify that *ending* to mute works for short and long (>128) frames, mono +// and stereo. Ending mute should yield a ramp up from zero. +TEST_F(AudioFrameOperationsTest, MuteEndMonoLong) { + InitFrame(&frame_, 1, 228, 1000, -1000); + AudioFrameOperations::Mute(&frame_, true, false); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + EXPECT_EQ(7, GetChannelData(frame_, 0, 0)); + EXPECT_EQ(15, GetChannelData(frame_, 0, 1)); + EXPECT_EQ(1000, GetChannelData(frame_, 0, 127)); + EXPECT_EQ(1000, GetChannelData(frame_, 0, 128)); +} + +TEST_F(AudioFrameOperationsTest, MuteEndMonoShort) { + InitFrame(&frame_, 1, 93, 1000, -1000); + AudioFrameOperations::Mute(&frame_, true, false); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + EXPECT_EQ(10, GetChannelData(frame_, 0, 0)); + EXPECT_EQ(21, GetChannelData(frame_, 0, 1)); + EXPECT_EQ(989, GetChannelData(frame_, 0, 91)); + EXPECT_EQ(999, GetChannelData(frame_, 0, 92)); +} + +TEST_F(AudioFrameOperationsTest, MuteEndStereoLong) { + InitFrame(&frame_, 2, 228, 1000, -1000); + AudioFrameOperations::Mute(&frame_, true, false); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + VerifyFrameDataBounds(frame_, 1, 0, -1000); + EXPECT_EQ(7, GetChannelData(frame_, 0, 0)); + EXPECT_EQ(-7, GetChannelData(frame_, 1, 0)); + EXPECT_EQ(15, GetChannelData(frame_, 0, 1)); + EXPECT_EQ(-15, GetChannelData(frame_, 1, 1)); + EXPECT_EQ(1000, GetChannelData(frame_, 0, 127)); + EXPECT_EQ(-1000, GetChannelData(frame_, 1, 127)); + EXPECT_EQ(1000, GetChannelData(frame_, 0, 128)); + EXPECT_EQ(-1000, GetChannelData(frame_, 1, 128)); +} + +TEST_F(AudioFrameOperationsTest, MuteEndStereoShort) { + InitFrame(&frame_, 2, 93, 1000, -1000); + AudioFrameOperations::Mute(&frame_, true, false); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + VerifyFrameDataBounds(frame_, 1, 0, -1000); + EXPECT_EQ(10, GetChannelData(frame_, 0, 0)); + EXPECT_EQ(-10, GetChannelData(frame_, 1, 0)); + EXPECT_EQ(21, GetChannelData(frame_, 0, 1)); + EXPECT_EQ(-21, GetChannelData(frame_, 1, 1)); + EXPECT_EQ(989, GetChannelData(frame_, 0, 91)); + EXPECT_EQ(-989, GetChannelData(frame_, 1, 91)); + EXPECT_EQ(999, GetChannelData(frame_, 0, 92)); + EXPECT_EQ(-999, GetChannelData(frame_, 1, 92)); +} + +TEST_F(AudioFrameOperationsTest, MuteBeginAlreadyMuted) { + ASSERT_TRUE(frame_.muted()); + AudioFrameOperations::Mute(&frame_, false, true); + EXPECT_TRUE(frame_.muted()); +} + +TEST_F(AudioFrameOperationsTest, MuteEndAlreadyMuted) { + ASSERT_TRUE(frame_.muted()); + AudioFrameOperations::Mute(&frame_, true, false); + EXPECT_TRUE(frame_.muted()); +} + +TEST_F(AudioFrameOperationsTest, ApplyHalfGainSucceeds) { + SetFrameData(2, &frame_); + + AudioFrame half_gain_frame; + half_gain_frame.num_channels_ = frame_.num_channels_; + half_gain_frame.samples_per_channel_ = frame_.samples_per_channel_; + SetFrameData(1, &half_gain_frame); + + AudioFrameOperations::ApplyHalfGain(&frame_); + VerifyFramesAreEqual(half_gain_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, ApplyHalfGainMuted) { + ASSERT_TRUE(frame_.muted()); + AudioFrameOperations::ApplyHalfGain(&frame_); + EXPECT_TRUE(frame_.muted()); +} + +// TODO(andrew): should not allow negative scales. +TEST_F(AudioFrameOperationsTest, DISABLED_ScaleFailsWithBadParameters) { + frame_.num_channels_ = 1; + EXPECT_EQ(-1, AudioFrameOperations::Scale(1.0, 1.0, &frame_)); + + frame_.num_channels_ = 3; + EXPECT_EQ(-1, AudioFrameOperations::Scale(1.0, 1.0, &frame_)); + + frame_.num_channels_ = 2; + EXPECT_EQ(-1, AudioFrameOperations::Scale(-1.0, 1.0, &frame_)); + EXPECT_EQ(-1, AudioFrameOperations::Scale(1.0, -1.0, &frame_)); +} + +// TODO(andrew): fix the wraparound bug. We should always saturate. +TEST_F(AudioFrameOperationsTest, DISABLED_ScaleDoesNotWrapAround) { + SetFrameData(4000, -4000, &frame_); + EXPECT_EQ(0, AudioFrameOperations::Scale(10.0, 10.0, &frame_)); + + AudioFrame clipped_frame; + clipped_frame.samples_per_channel_ = 320; + clipped_frame.num_channels_ = 2; + SetFrameData(32767, -32768, &clipped_frame); + VerifyFramesAreEqual(clipped_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, ScaleSucceeds) { + SetFrameData(1, -1, &frame_); + EXPECT_EQ(0, AudioFrameOperations::Scale(2.0, 3.0, &frame_)); + + AudioFrame scaled_frame; + scaled_frame.samples_per_channel_ = 320; + scaled_frame.num_channels_ = 2; + SetFrameData(2, -3, &scaled_frame); + VerifyFramesAreEqual(scaled_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, ScaleMuted) { + ASSERT_TRUE(frame_.muted()); + EXPECT_EQ(0, AudioFrameOperations::Scale(2.0, 3.0, &frame_)); + EXPECT_TRUE(frame_.muted()); +} + +// TODO(andrew): should fail with a negative scale. +TEST_F(AudioFrameOperationsTest, DISABLED_ScaleWithSatFailsWithBadParameters) { + EXPECT_EQ(-1, AudioFrameOperations::ScaleWithSat(-1.0, &frame_)); +} + +TEST_F(AudioFrameOperationsTest, ScaleWithSatDoesNotWrapAround) { + frame_.num_channels_ = 1; + SetFrameData(4000, &frame_); + EXPECT_EQ(0, AudioFrameOperations::ScaleWithSat(10.0, &frame_)); + + AudioFrame clipped_frame; + clipped_frame.samples_per_channel_ = 320; + clipped_frame.num_channels_ = 1; + SetFrameData(32767, &clipped_frame); + VerifyFramesAreEqual(clipped_frame, frame_); + + SetFrameData(-4000, &frame_); + EXPECT_EQ(0, AudioFrameOperations::ScaleWithSat(10.0, &frame_)); + SetFrameData(-32768, &clipped_frame); + VerifyFramesAreEqual(clipped_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, ScaleWithSatSucceeds) { + frame_.num_channels_ = 1; + SetFrameData(1, &frame_); + EXPECT_EQ(0, AudioFrameOperations::ScaleWithSat(2.0, &frame_)); + + AudioFrame scaled_frame; + scaled_frame.samples_per_channel_ = 320; + scaled_frame.num_channels_ = 1; + SetFrameData(2, &scaled_frame); + VerifyFramesAreEqual(scaled_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, ScaleWithSatMuted) { + ASSERT_TRUE(frame_.muted()); + EXPECT_EQ(0, AudioFrameOperations::ScaleWithSat(2.0, &frame_)); + EXPECT_TRUE(frame_.muted()); +} + +TEST_F(AudioFrameOperationsTest, AddingXToEmptyGivesX) { + // When samples_per_channel_ is 0, the frame counts as empty and zero. + AudioFrame frame_to_add_to; + frame_to_add_to.mutable_data(); // Unmute the frame. + ASSERT_FALSE(frame_to_add_to.muted()); + frame_to_add_to.samples_per_channel_ = 0; + frame_to_add_to.num_channels_ = frame_.num_channels_; + + SetFrameData(1000, &frame_); + AudioFrameOperations::Add(frame_, &frame_to_add_to); + VerifyFramesAreEqual(frame_, frame_to_add_to); +} + +TEST_F(AudioFrameOperationsTest, AddingXToMutedGivesX) { + AudioFrame frame_to_add_to; + ASSERT_TRUE(frame_to_add_to.muted()); + frame_to_add_to.samples_per_channel_ = frame_.samples_per_channel_; + frame_to_add_to.num_channels_ = frame_.num_channels_; + + SetFrameData(1000, &frame_); + AudioFrameOperations::Add(frame_, &frame_to_add_to); + VerifyFramesAreEqual(frame_, frame_to_add_to); +} + +TEST_F(AudioFrameOperationsTest, AddingMutedToXGivesX) { + AudioFrame frame_to_add_to; + frame_to_add_to.samples_per_channel_ = frame_.samples_per_channel_; + frame_to_add_to.num_channels_ = frame_.num_channels_; + SetFrameData(1000, &frame_to_add_to); + + AudioFrame frame_copy; + frame_copy.CopyFrom(frame_to_add_to); + + ASSERT_TRUE(frame_.muted()); + AudioFrameOperations::Add(frame_, &frame_to_add_to); + VerifyFramesAreEqual(frame_copy, frame_to_add_to); +} + +TEST_F(AudioFrameOperationsTest, AddingTwoFramesProducesTheirSum) { + AudioFrame frame_to_add_to; + frame_to_add_to.samples_per_channel_ = frame_.samples_per_channel_; + frame_to_add_to.num_channels_ = frame_.num_channels_; + SetFrameData(1000, &frame_to_add_to); + SetFrameData(2000, &frame_); + + AudioFrameOperations::Add(frame_, &frame_to_add_to); + SetFrameData(frame_.data()[0] + 1000, &frame_); + VerifyFramesAreEqual(frame_, frame_to_add_to); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/utility/channel_mixer.cc b/third_party/libwebrtc/audio/utility/channel_mixer.cc new file mode 100644 index 0000000000..0f1e663873 --- /dev/null +++ b/third_party/libwebrtc/audio/utility/channel_mixer.cc @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/utility/channel_mixer.h" + +#include "audio/utility/channel_mixing_matrix.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +ChannelMixer::ChannelMixer(ChannelLayout input_layout, + ChannelLayout output_layout) + : input_layout_(input_layout), + output_layout_(output_layout), + input_channels_(ChannelLayoutToChannelCount(input_layout)), + output_channels_(ChannelLayoutToChannelCount(output_layout)) { + // Create the transformation matrix. + ChannelMixingMatrix matrix_builder(input_layout_, input_channels_, + output_layout_, output_channels_); + remapping_ = matrix_builder.CreateTransformationMatrix(&matrix_); +} + +ChannelMixer::~ChannelMixer() = default; + +void ChannelMixer::Transform(AudioFrame* frame) { + RTC_DCHECK(frame); + RTC_DCHECK_EQ(matrix_[0].size(), static_cast<size_t>(input_channels_)); + RTC_DCHECK_EQ(matrix_.size(), static_cast<size_t>(output_channels_)); + + // Leave the audio frame intact if the channel layouts for in and out are + // identical. + if (input_layout_ == output_layout_) { + return; + } + + if (IsUpMixing()) { + RTC_CHECK_LE(frame->samples_per_channel() * output_channels_, + frame->max_16bit_samples()); + } + + // Only change the number of output channels if the audio frame is muted. + if (frame->muted()) { + frame->num_channels_ = output_channels_; + frame->channel_layout_ = output_layout_; + return; + } + + const int16_t* in_audio = frame->data(); + + // Only allocate fresh memory at first access or if the required size has + // increased. + // TODO(henrika): we might be able to do downmixing in-place and thereby avoid + // extra memory allocation and a memcpy. + const size_t num_elements = frame->samples_per_channel() * output_channels_; + if (audio_vector_ == nullptr || num_elements > audio_vector_size_) { + audio_vector_.reset(new int16_t[num_elements]); + audio_vector_size_ = num_elements; + } + int16_t* out_audio = audio_vector_.get(); + + // Modify the number of channels by creating a weighted sum of input samples + // where the weights (scale factors) for each output sample are given by the + // transformation matrix. + for (size_t i = 0; i < frame->samples_per_channel(); i++) { + for (size_t output_ch = 0; output_ch < output_channels_; ++output_ch) { + float acc_value = 0.0f; + for (size_t input_ch = 0; input_ch < input_channels_; ++input_ch) { + const float scale = matrix_[output_ch][input_ch]; + // Scale should always be positive. + RTC_DCHECK_GE(scale, 0); + // Each output sample is a weighted sum of input samples. + acc_value += scale * in_audio[i * input_channels_ + input_ch]; + } + const size_t index = output_channels_ * i + output_ch; + RTC_CHECK_LE(index, audio_vector_size_); + out_audio[index] = rtc::saturated_cast<int16_t>(acc_value); + } + } + + // Update channel information. + frame->num_channels_ = output_channels_; + frame->channel_layout_ = output_layout_; + + // Copy the output result to the audio frame in `frame`. + memcpy( + frame->mutable_data(), out_audio, + sizeof(int16_t) * frame->samples_per_channel() * frame->num_channels()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/utility/channel_mixer.h b/third_party/libwebrtc/audio/utility/channel_mixer.h new file mode 100644 index 0000000000..2dea8eb45b --- /dev/null +++ b/third_party/libwebrtc/audio/utility/channel_mixer.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_UTILITY_CHANNEL_MIXER_H_ +#define AUDIO_UTILITY_CHANNEL_MIXER_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <memory> +#include <vector> + +#include "api/audio/audio_frame.h" +#include "api/audio/channel_layout.h" + +namespace webrtc { + +// ChannelMixer is for converting audio between channel layouts. The conversion +// matrix is built upon construction and used during each Transform() call. The +// algorithm works by generating a conversion matrix mapping each output channel +// to list of input channels. The transform renders all of the output channels, +// with each output channel rendered according to a weighted sum of the relevant +// input channels as defined in the matrix. +// This file is derived from Chromium's media/base/channel_mixer.h. +class ChannelMixer { + public: + // To mix two channels into one and preserve loudness, we must apply + // (1 / sqrt(2)) gain to each. + static constexpr float kHalfPower = 0.707106781186547524401f; + + ChannelMixer(ChannelLayout input_layout, ChannelLayout output_layout); + ~ChannelMixer(); + + // Transforms all input channels corresponding to the selected `input_layout` + // to the number of channels in the selected `output_layout`. + // Example usage (downmix from stereo to mono): + // + // ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_MONO); + // AudioFrame frame; + // frame.samples_per_channel_ = 160; + // frame.num_channels_ = 2; + // EXPECT_EQ(2u, frame.channels()); + // mixer.Transform(&frame); + // EXPECT_EQ(1u, frame.channels()); + // + void Transform(AudioFrame* frame); + + private: + bool IsUpMixing() const { return output_channels_ > input_channels_; } + + // Selected channel layouts. + const ChannelLayout input_layout_; + const ChannelLayout output_layout_; + + // Channel counts for input and output. + const size_t input_channels_; + const size_t output_channels_; + + // 2D matrix of output channels to input channels. + std::vector<std::vector<float> > matrix_; + + // 1D array used as temporary storage during the transformation. + std::unique_ptr<int16_t[]> audio_vector_; + + // Number of elements allocated for `audio_vector_`. + size_t audio_vector_size_ = 0; + + // Optimization case for when we can simply remap the input channels to output + // channels, i.e., when all scaling factors in `matrix_` equals 1.0. + bool remapping_; + + // Delete the copy constructor and assignment operator. + ChannelMixer(const ChannelMixer& other) = delete; + ChannelMixer& operator=(const ChannelMixer& other) = delete; +}; + +} // namespace webrtc + +#endif // AUDIO_UTILITY_CHANNEL_MIXER_H_ diff --git a/third_party/libwebrtc/audio/utility/channel_mixer_unittest.cc b/third_party/libwebrtc/audio/utility/channel_mixer_unittest.cc new file mode 100644 index 0000000000..94cb1ac7e3 --- /dev/null +++ b/third_party/libwebrtc/audio/utility/channel_mixer_unittest.cc @@ -0,0 +1,393 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/utility/channel_mixer.h" + +#include <memory> + +#include "api/audio/audio_frame.h" +#include "api/audio/channel_layout.h" +#include "audio/utility/channel_mixing_matrix.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +constexpr uint32_t kTimestamp = 27; +constexpr int kSampleRateHz = 16000; +constexpr size_t kSamplesPerChannel = kSampleRateHz / 100; + +class ChannelMixerTest : public ::testing::Test { + protected: + ChannelMixerTest() { + // Use 10ms audio frames by default. Don't set values yet. + frame_.samples_per_channel_ = kSamplesPerChannel; + frame_.sample_rate_hz_ = kSampleRateHz; + EXPECT_TRUE(frame_.muted()); + } + + virtual ~ChannelMixerTest() {} + + AudioFrame frame_; +}; + +void SetFrameData(int16_t data, AudioFrame* frame) { + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel() * frame->num_channels(); + i++) { + frame_data[i] = data; + } +} + +void SetMonoData(int16_t center, AudioFrame* frame) { + frame->num_channels_ = 1; + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel(); ++i) { + frame_data[i] = center; + } + EXPECT_FALSE(frame->muted()); +} + +void SetStereoData(int16_t left, int16_t right, AudioFrame* frame) { + ASSERT_LE(2 * frame->samples_per_channel(), frame->max_16bit_samples()); + frame->num_channels_ = 2; + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel() * 2; i += 2) { + frame_data[i] = left; + frame_data[i + 1] = right; + } + EXPECT_FALSE(frame->muted()); +} + +void SetFiveOneData(int16_t front_left, + int16_t front_right, + int16_t center, + int16_t lfe, + int16_t side_left, + int16_t side_right, + AudioFrame* frame) { + ASSERT_LE(6 * frame->samples_per_channel(), frame->max_16bit_samples()); + frame->num_channels_ = 6; + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel() * 6; i += 6) { + frame_data[i] = front_left; + frame_data[i + 1] = front_right; + frame_data[i + 2] = center; + frame_data[i + 3] = lfe; + frame_data[i + 4] = side_left; + frame_data[i + 5] = side_right; + } + EXPECT_FALSE(frame->muted()); +} + +void SetSevenOneData(int16_t front_left, + int16_t front_right, + int16_t center, + int16_t lfe, + int16_t side_left, + int16_t side_right, + int16_t back_left, + int16_t back_right, + AudioFrame* frame) { + ASSERT_LE(8 * frame->samples_per_channel(), frame->max_16bit_samples()); + frame->num_channels_ = 8; + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel() * 8; i += 8) { + frame_data[i] = front_left; + frame_data[i + 1] = front_right; + frame_data[i + 2] = center; + frame_data[i + 3] = lfe; + frame_data[i + 4] = side_left; + frame_data[i + 5] = side_right; + frame_data[i + 6] = back_left; + frame_data[i + 7] = back_right; + } + EXPECT_FALSE(frame->muted()); +} + +bool AllSamplesEquals(int16_t sample, const AudioFrame* frame) { + const int16_t* frame_data = frame->data(); + for (size_t i = 0; i < frame->samples_per_channel() * frame->num_channels(); + i++) { + if (frame_data[i] != sample) { + return false; + } + } + return true; +} + +void VerifyFramesAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) { + EXPECT_EQ(frame1.num_channels(), frame2.num_channels()); + EXPECT_EQ(frame1.samples_per_channel(), frame2.samples_per_channel()); + const int16_t* frame1_data = frame1.data(); + const int16_t* frame2_data = frame2.data(); + for (size_t i = 0; i < frame1.samples_per_channel() * frame1.num_channels(); + i++) { + EXPECT_EQ(frame1_data[i], frame2_data[i]); + } + EXPECT_EQ(frame1.muted(), frame2.muted()); +} + +} // namespace + +// Test all possible layout conversions can be constructed and mixed. Don't +// care about the actual content, simply run through all mixing combinations +// and ensure that nothing fails. +TEST_F(ChannelMixerTest, ConstructAllPossibleLayouts) { + for (ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + input_layout <= CHANNEL_LAYOUT_MAX; + input_layout = static_cast<ChannelLayout>(input_layout + 1)) { + for (ChannelLayout output_layout = CHANNEL_LAYOUT_MONO; + output_layout <= CHANNEL_LAYOUT_MAX; + output_layout = static_cast<ChannelLayout>(output_layout + 1)) { + // DISCRETE, BITSTREAM can't be tested here based on the current approach. + // CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC is not mixable. + // Stereo down mix should never be the output layout. + if (input_layout == CHANNEL_LAYOUT_BITSTREAM || + input_layout == CHANNEL_LAYOUT_DISCRETE || + input_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC || + output_layout == CHANNEL_LAYOUT_BITSTREAM || + output_layout == CHANNEL_LAYOUT_DISCRETE || + output_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC || + output_layout == CHANNEL_LAYOUT_STEREO_DOWNMIX) { + continue; + } + + rtc::StringBuilder ss; + ss << "Input Layout: " << input_layout + << ", Output Layout: " << output_layout; + SCOPED_TRACE(ss.str()); + ChannelMixer mixer(input_layout, output_layout); + + frame_.UpdateFrame(kTimestamp, nullptr, kSamplesPerChannel, kSampleRateHz, + AudioFrame::kNormalSpeech, AudioFrame::kVadActive, + ChannelLayoutToChannelCount(input_layout)); + EXPECT_TRUE(frame_.muted()); + mixer.Transform(&frame_); + } + } +} + +// Ensure that the audio frame is untouched when input and output channel +// layouts are identical, i.e., the transformation should have no effect. +// Exclude invalid mixing combinations. +TEST_F(ChannelMixerTest, NoMixingForIdenticalChannelLayouts) { + for (ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + input_layout <= CHANNEL_LAYOUT_MAX; + input_layout = static_cast<ChannelLayout>(input_layout + 1)) { + for (ChannelLayout output_layout = CHANNEL_LAYOUT_MONO; + output_layout <= CHANNEL_LAYOUT_MAX; + output_layout = static_cast<ChannelLayout>(output_layout + 1)) { + if (input_layout != output_layout || + input_layout == CHANNEL_LAYOUT_BITSTREAM || + input_layout == CHANNEL_LAYOUT_DISCRETE || + input_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC || + output_layout == CHANNEL_LAYOUT_STEREO_DOWNMIX) { + continue; + } + ChannelMixer mixer(input_layout, output_layout); + frame_.num_channels_ = ChannelLayoutToChannelCount(input_layout); + SetFrameData(99, &frame_); + mixer.Transform(&frame_); + EXPECT_EQ(ChannelLayoutToChannelCount(input_layout), + static_cast<int>(frame_.num_channels())); + EXPECT_TRUE(AllSamplesEquals(99, &frame_)); + } + } +} + +TEST_F(ChannelMixerTest, StereoToMono) { + ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_MONO); + // + // Input: stereo + // LEFT RIGHT + // Output: mono CENTER 0.5 0.5 + // + SetStereoData(7, 3, &frame_); + EXPECT_EQ(2u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(1u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout()); + + AudioFrame mono_frame; + mono_frame.samples_per_channel_ = frame_.samples_per_channel(); + SetMonoData(5, &mono_frame); + VerifyFramesAreEqual(mono_frame, frame_); + + SetStereoData(-32768, -32768, &frame_); + EXPECT_EQ(2u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(1u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout()); + SetMonoData(-32768, &mono_frame); + VerifyFramesAreEqual(mono_frame, frame_); +} + +TEST_F(ChannelMixerTest, StereoToMonoMuted) { + ASSERT_TRUE(frame_.muted()); + ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_MONO); + mixer.Transform(&frame_); + EXPECT_EQ(1u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout()); + EXPECT_TRUE(frame_.muted()); +} + +TEST_F(ChannelMixerTest, FiveOneToSevenOneMuted) { + ASSERT_TRUE(frame_.muted()); + ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_7_1); + mixer.Transform(&frame_); + EXPECT_EQ(8u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout()); + EXPECT_TRUE(frame_.muted()); +} + +TEST_F(ChannelMixerTest, FiveOneToMono) { + ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_MONO); + // + // Input: 5.1 + // LEFT RIGHT CENTER LFE SIDE_LEFT SIDE_RIGHT + // Output: mono CENTER 0.707 0.707 1 0.707 0.707 0.707 + // + // a = [10, 20, 15, 2, 5, 5] + // b = [1/sqrt(2), 1/sqrt(2), 1.0, 1/sqrt(2), 1/sqrt(2), 1/sqrt(2)] => + // a * b (dot product) = 44.69848480983499, + // which is truncated into 44 using 16 bit representation. + // + SetFiveOneData(10, 20, 15, 2, 5, 5, &frame_); + EXPECT_EQ(6u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(1u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout()); + + AudioFrame mono_frame; + mono_frame.samples_per_channel_ = frame_.samples_per_channel(); + SetMonoData(44, &mono_frame); + VerifyFramesAreEqual(mono_frame, frame_); + + SetFiveOneData(-32768, -32768, -32768, -32768, -32768, -32768, &frame_); + EXPECT_EQ(6u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(1u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout()); + SetMonoData(-32768, &mono_frame); + VerifyFramesAreEqual(mono_frame, frame_); +} + +TEST_F(ChannelMixerTest, FiveOneToSevenOne) { + ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_7_1); + // + // Input: 5.1 + // LEFT RIGHT CENTER LFE SIDE_LEFT SIDE_RIGHT + // Output: 7.1 LEFT 1 0 0 0 0 0 + // RIGHT 0 1 0 0 0 0 + // CENTER 0 0 1 0 0 0 + // LFE 0 0 0 1 0 0 + // SIDE_LEFT 0 0 0 0 1 0 + // SIDE_RIGHT 0 0 0 0 0 1 + // BACK_LEFT 0 0 0 0 0 0 + // BACK_RIGHT 0 0 0 0 0 0 + // + SetFiveOneData(10, 20, 15, 2, 5, 5, &frame_); + EXPECT_EQ(6u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(8u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout()); + + AudioFrame seven_one_frame; + seven_one_frame.samples_per_channel_ = frame_.samples_per_channel(); + SetSevenOneData(10, 20, 15, 2, 5, 5, 0, 0, &seven_one_frame); + VerifyFramesAreEqual(seven_one_frame, frame_); + + SetFiveOneData(-32768, 32767, -32768, 32767, -32768, 32767, &frame_); + EXPECT_EQ(6u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(8u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout()); + SetSevenOneData(-32768, 32767, -32768, 32767, -32768, 32767, 0, 0, + &seven_one_frame); + VerifyFramesAreEqual(seven_one_frame, frame_); +} + +TEST_F(ChannelMixerTest, FiveOneBackToStereo) { + ChannelMixer mixer(CHANNEL_LAYOUT_5_1_BACK, CHANNEL_LAYOUT_STEREO); + // + // Input: 5.1 + // LEFT RIGHT CENTER LFE BACK_LEFT BACK_RIGHT + // Output: stereo LEFT 1 0 0.707 0.707 0.707 0 + // RIGHT 0 1 0.707 0.707 0 0.707 + // + SetFiveOneData(20, 30, 15, 2, 5, 5, &frame_); + EXPECT_EQ(6u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(2u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout()); + + AudioFrame stereo_frame; + stereo_frame.samples_per_channel_ = frame_.samples_per_channel(); + SetStereoData(35, 45, &stereo_frame); + VerifyFramesAreEqual(stereo_frame, frame_); + + SetFiveOneData(-32768, -32768, -32768, -32768, -32768, -32768, &frame_); + EXPECT_EQ(6u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(2u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout()); + SetStereoData(-32768, -32768, &stereo_frame); + VerifyFramesAreEqual(stereo_frame, frame_); +} + +TEST_F(ChannelMixerTest, MonoToStereo) { + ChannelMixer mixer(CHANNEL_LAYOUT_MONO, CHANNEL_LAYOUT_STEREO); + // + // Input: mono + // CENTER + // Output: stereo LEFT 1 + // RIGHT 1 + // + SetMonoData(44, &frame_); + EXPECT_EQ(1u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(2u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout()); + + AudioFrame stereo_frame; + stereo_frame.samples_per_channel_ = frame_.samples_per_channel(); + SetStereoData(44, 44, &stereo_frame); + VerifyFramesAreEqual(stereo_frame, frame_); +} + +TEST_F(ChannelMixerTest, StereoToFiveOne) { + ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_5_1); + // + // Input: Stereo + // LEFT RIGHT + // Output: 5.1 LEFT 1 0 + // RIGHT 0 1 + // CENTER 0 0 + // LFE 0 0 + // SIDE_LEFT 0 0 + // SIDE_RIGHT 0 0 + // + SetStereoData(50, 60, &frame_); + EXPECT_EQ(2u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(6u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_5_1, frame_.channel_layout()); + + AudioFrame five_one_frame; + five_one_frame.samples_per_channel_ = frame_.samples_per_channel(); + SetFiveOneData(50, 60, 0, 0, 0, 0, &five_one_frame); + VerifyFramesAreEqual(five_one_frame, frame_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/utility/channel_mixing_matrix.cc b/third_party/libwebrtc/audio/utility/channel_mixing_matrix.cc new file mode 100644 index 0000000000..1244653f63 --- /dev/null +++ b/third_party/libwebrtc/audio/utility/channel_mixing_matrix.cc @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/utility/channel_mixing_matrix.h" + +#include <stddef.h> + +#include <algorithm> + +#include "audio/utility/channel_mixer.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { + +namespace { + +// Selects the default usage of VoIP channel mapping adjustments. +bool UseChannelMappingAdjustmentsByDefault() { + return !field_trial::IsEnabled( + "WebRTC-VoIPChannelRemixingAdjustmentKillSwitch"); +} + +} // namespace + +static void ValidateLayout(ChannelLayout layout) { + RTC_CHECK_NE(layout, CHANNEL_LAYOUT_NONE); + RTC_CHECK_LE(layout, CHANNEL_LAYOUT_MAX); + RTC_CHECK_NE(layout, CHANNEL_LAYOUT_UNSUPPORTED); + RTC_CHECK_NE(layout, CHANNEL_LAYOUT_DISCRETE); + RTC_CHECK_NE(layout, CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC); + + // Verify there's at least one channel. Should always be true here by virtue + // of not being one of the invalid layouts, but lets double check to be sure. + int channel_count = ChannelLayoutToChannelCount(layout); + RTC_DCHECK_GT(channel_count, 0); + + // If we have more than one channel, verify a symmetric layout for sanity. + // The unit test will verify all possible layouts, so this can be a DCHECK. + // Symmetry allows simplifying the matrix building code by allowing us to + // assume that if one channel of a pair exists, the other will too. + if (channel_count > 1) { + // Assert that LEFT exists if and only if RIGHT exists, and so on. + RTC_DCHECK_EQ(ChannelOrder(layout, LEFT) >= 0, + ChannelOrder(layout, RIGHT) >= 0); + RTC_DCHECK_EQ(ChannelOrder(layout, SIDE_LEFT) >= 0, + ChannelOrder(layout, SIDE_RIGHT) >= 0); + RTC_DCHECK_EQ(ChannelOrder(layout, BACK_LEFT) >= 0, + ChannelOrder(layout, BACK_RIGHT) >= 0); + RTC_DCHECK_EQ(ChannelOrder(layout, LEFT_OF_CENTER) >= 0, + ChannelOrder(layout, RIGHT_OF_CENTER) >= 0); + } else { + RTC_DCHECK_EQ(layout, CHANNEL_LAYOUT_MONO); + } +} + +ChannelMixingMatrix::ChannelMixingMatrix(ChannelLayout input_layout, + int input_channels, + ChannelLayout output_layout, + int output_channels) + : use_voip_channel_mapping_adjustments_( + UseChannelMappingAdjustmentsByDefault()), + input_layout_(input_layout), + input_channels_(input_channels), + output_layout_(output_layout), + output_channels_(output_channels) { + // Stereo down mix should never be the output layout. + RTC_CHECK_NE(output_layout, CHANNEL_LAYOUT_STEREO_DOWNMIX); + + // Verify that the layouts are supported + if (input_layout != CHANNEL_LAYOUT_DISCRETE) + ValidateLayout(input_layout); + if (output_layout != CHANNEL_LAYOUT_DISCRETE) + ValidateLayout(output_layout); + + // Special case for 5.0, 5.1 with back channels when upmixed to 7.0, 7.1, + // which should map the back LR to side LR. + if (input_layout_ == CHANNEL_LAYOUT_5_0_BACK && + output_layout_ == CHANNEL_LAYOUT_7_0) { + input_layout_ = CHANNEL_LAYOUT_5_0; + } else if (input_layout_ == CHANNEL_LAYOUT_5_1_BACK && + output_layout_ == CHANNEL_LAYOUT_7_1) { + input_layout_ = CHANNEL_LAYOUT_5_1; + } +} + +ChannelMixingMatrix::~ChannelMixingMatrix() = default; + +bool ChannelMixingMatrix::CreateTransformationMatrix( + std::vector<std::vector<float>>* matrix) { + matrix_ = matrix; + + // Size out the initial matrix. + matrix_->reserve(output_channels_); + for (int output_ch = 0; output_ch < output_channels_; ++output_ch) + matrix_->push_back(std::vector<float>(input_channels_, 0)); + + // First check for discrete case. + if (input_layout_ == CHANNEL_LAYOUT_DISCRETE || + output_layout_ == CHANNEL_LAYOUT_DISCRETE) { + // If the number of input channels is more than output channels, then + // copy as many as we can then drop the remaining input channels. + // If the number of input channels is less than output channels, then + // copy them all, then zero out the remaining output channels. + int passthrough_channels = std::min(input_channels_, output_channels_); + for (int i = 0; i < passthrough_channels; ++i) + (*matrix_)[i][i] = 1; + + return true; + } + + // If specified, use adjusted channel mapping for the VoIP scenario. + if (use_voip_channel_mapping_adjustments_ && + input_layout_ == CHANNEL_LAYOUT_MONO && + ChannelLayoutToChannelCount(output_layout_) >= 2) { + // Only place the mono input in the front left and right channels. + (*matrix_)[0][0] = 1.f; + (*matrix_)[1][0] = 1.f; + + for (size_t output_ch = 2; output_ch < matrix_->size(); ++output_ch) { + (*matrix_)[output_ch][0] = 0.f; + } + return true; + } + + // Route matching channels and figure out which ones aren't accounted for. + for (Channels ch = LEFT; ch < CHANNELS_MAX + 1; + ch = static_cast<Channels>(ch + 1)) { + int input_ch_index = ChannelOrder(input_layout_, ch); + if (input_ch_index < 0) + continue; + + int output_ch_index = ChannelOrder(output_layout_, ch); + if (output_ch_index < 0) { + unaccounted_inputs_.push_back(ch); + continue; + } + + RTC_DCHECK_LT(static_cast<size_t>(output_ch_index), matrix_->size()); + RTC_DCHECK_LT(static_cast<size_t>(input_ch_index), + (*matrix_)[output_ch_index].size()); + (*matrix_)[output_ch_index][input_ch_index] = 1; + } + + // If all input channels are accounted for, there's nothing left to do. + if (unaccounted_inputs_.empty()) { + // Since all output channels map directly to inputs we can optimize. + return true; + } + + // Mix front LR into center. + if (IsUnaccounted(LEFT)) { + // When down mixing to mono from stereo, we need to be careful of full scale + // stereo mixes. Scaling by 1 / sqrt(2) here will likely lead to clipping + // so we use 1 / 2 instead. + float scale = + (output_layout_ == CHANNEL_LAYOUT_MONO && input_channels_ == 2) + ? 0.5 + : ChannelMixer::kHalfPower; + Mix(LEFT, CENTER, scale); + Mix(RIGHT, CENTER, scale); + } + + // Mix center into front LR. + if (IsUnaccounted(CENTER)) { + // When up mixing from mono, just do a copy to front LR. + float scale = + (input_layout_ == CHANNEL_LAYOUT_MONO) ? 1 : ChannelMixer::kHalfPower; + MixWithoutAccounting(CENTER, LEFT, scale); + Mix(CENTER, RIGHT, scale); + } + + // Mix back LR into: side LR || back center || front LR || front center. + if (IsUnaccounted(BACK_LEFT)) { + if (HasOutputChannel(SIDE_LEFT)) { + // If the input has side LR, mix back LR into side LR, but instead if the + // input doesn't have side LR (but output does) copy back LR to side LR. + float scale = HasInputChannel(SIDE_LEFT) ? ChannelMixer::kHalfPower : 1; + Mix(BACK_LEFT, SIDE_LEFT, scale); + Mix(BACK_RIGHT, SIDE_RIGHT, scale); + } else if (HasOutputChannel(BACK_CENTER)) { + // Mix back LR into back center. + Mix(BACK_LEFT, BACK_CENTER, ChannelMixer::kHalfPower); + Mix(BACK_RIGHT, BACK_CENTER, ChannelMixer::kHalfPower); + } else if (output_layout_ > CHANNEL_LAYOUT_MONO) { + // Mix back LR into front LR. + Mix(BACK_LEFT, LEFT, ChannelMixer::kHalfPower); + Mix(BACK_RIGHT, RIGHT, ChannelMixer::kHalfPower); + } else { + // Mix back LR into front center. + Mix(BACK_LEFT, CENTER, ChannelMixer::kHalfPower); + Mix(BACK_RIGHT, CENTER, ChannelMixer::kHalfPower); + } + } + + // Mix side LR into: back LR || back center || front LR || front center. + if (IsUnaccounted(SIDE_LEFT)) { + if (HasOutputChannel(BACK_LEFT)) { + // If the input has back LR, mix side LR into back LR, but instead if the + // input doesn't have back LR (but output does) copy side LR to back LR. + float scale = HasInputChannel(BACK_LEFT) ? ChannelMixer::kHalfPower : 1; + Mix(SIDE_LEFT, BACK_LEFT, scale); + Mix(SIDE_RIGHT, BACK_RIGHT, scale); + } else if (HasOutputChannel(BACK_CENTER)) { + // Mix side LR into back center. + Mix(SIDE_LEFT, BACK_CENTER, ChannelMixer::kHalfPower); + Mix(SIDE_RIGHT, BACK_CENTER, ChannelMixer::kHalfPower); + } else if (output_layout_ > CHANNEL_LAYOUT_MONO) { + // Mix side LR into front LR. + Mix(SIDE_LEFT, LEFT, ChannelMixer::kHalfPower); + Mix(SIDE_RIGHT, RIGHT, ChannelMixer::kHalfPower); + } else { + // Mix side LR into front center. + Mix(SIDE_LEFT, CENTER, ChannelMixer::kHalfPower); + Mix(SIDE_RIGHT, CENTER, ChannelMixer::kHalfPower); + } + } + + // Mix back center into: back LR || side LR || front LR || front center. + if (IsUnaccounted(BACK_CENTER)) { + if (HasOutputChannel(BACK_LEFT)) { + // Mix back center into back LR. + MixWithoutAccounting(BACK_CENTER, BACK_LEFT, ChannelMixer::kHalfPower); + Mix(BACK_CENTER, BACK_RIGHT, ChannelMixer::kHalfPower); + } else if (HasOutputChannel(SIDE_LEFT)) { + // Mix back center into side LR. + MixWithoutAccounting(BACK_CENTER, SIDE_LEFT, ChannelMixer::kHalfPower); + Mix(BACK_CENTER, SIDE_RIGHT, ChannelMixer::kHalfPower); + } else if (output_layout_ > CHANNEL_LAYOUT_MONO) { + // Mix back center into front LR. + // TODO(dalecurtis): Not sure about these values? + MixWithoutAccounting(BACK_CENTER, LEFT, ChannelMixer::kHalfPower); + Mix(BACK_CENTER, RIGHT, ChannelMixer::kHalfPower); + } else { + // Mix back center into front center. + // TODO(dalecurtis): Not sure about these values? + Mix(BACK_CENTER, CENTER, ChannelMixer::kHalfPower); + } + } + + // Mix LR of center into: front LR || front center. + if (IsUnaccounted(LEFT_OF_CENTER)) { + if (HasOutputChannel(LEFT)) { + // Mix LR of center into front LR. + Mix(LEFT_OF_CENTER, LEFT, ChannelMixer::kHalfPower); + Mix(RIGHT_OF_CENTER, RIGHT, ChannelMixer::kHalfPower); + } else { + // Mix LR of center into front center. + Mix(LEFT_OF_CENTER, CENTER, ChannelMixer::kHalfPower); + Mix(RIGHT_OF_CENTER, CENTER, ChannelMixer::kHalfPower); + } + } + + // Mix LFE into: front center || front LR. + if (IsUnaccounted(LFE)) { + if (!HasOutputChannel(CENTER)) { + // Mix LFE into front LR. + MixWithoutAccounting(LFE, LEFT, ChannelMixer::kHalfPower); + Mix(LFE, RIGHT, ChannelMixer::kHalfPower); + } else { + // Mix LFE into front center. + Mix(LFE, CENTER, ChannelMixer::kHalfPower); + } + } + + // All channels should now be accounted for. + RTC_DCHECK(unaccounted_inputs_.empty()); + + // See if the output `matrix_` is simply a remapping matrix. If each input + // channel maps to a single output channel we can simply remap. Doing this + // programmatically is less fragile than logic checks on channel mappings. + for (int output_ch = 0; output_ch < output_channels_; ++output_ch) { + int input_mappings = 0; + for (int input_ch = 0; input_ch < input_channels_; ++input_ch) { + // We can only remap if each row contains a single scale of 1. I.e., each + // output channel is mapped from a single unscaled input channel. + if ((*matrix_)[output_ch][input_ch] != 1 || ++input_mappings > 1) + return false; + } + } + + // If we've gotten here, `matrix_` is simply a remapping. + return true; +} + +void ChannelMixingMatrix::AccountFor(Channels ch) { + unaccounted_inputs_.erase( + std::find(unaccounted_inputs_.begin(), unaccounted_inputs_.end(), ch)); +} + +bool ChannelMixingMatrix::IsUnaccounted(Channels ch) const { + return std::find(unaccounted_inputs_.begin(), unaccounted_inputs_.end(), + ch) != unaccounted_inputs_.end(); +} + +bool ChannelMixingMatrix::HasInputChannel(Channels ch) const { + return ChannelOrder(input_layout_, ch) >= 0; +} + +bool ChannelMixingMatrix::HasOutputChannel(Channels ch) const { + return ChannelOrder(output_layout_, ch) >= 0; +} + +void ChannelMixingMatrix::Mix(Channels input_ch, + Channels output_ch, + float scale) { + MixWithoutAccounting(input_ch, output_ch, scale); + AccountFor(input_ch); +} + +void ChannelMixingMatrix::MixWithoutAccounting(Channels input_ch, + Channels output_ch, + float scale) { + int input_ch_index = ChannelOrder(input_layout_, input_ch); + int output_ch_index = ChannelOrder(output_layout_, output_ch); + + RTC_DCHECK(IsUnaccounted(input_ch)); + RTC_DCHECK_GE(input_ch_index, 0); + RTC_DCHECK_GE(output_ch_index, 0); + + RTC_DCHECK_EQ((*matrix_)[output_ch_index][input_ch_index], 0); + (*matrix_)[output_ch_index][input_ch_index] = scale; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/audio/utility/channel_mixing_matrix.h b/third_party/libwebrtc/audio/utility/channel_mixing_matrix.h new file mode 100644 index 0000000000..ee00860846 --- /dev/null +++ b/third_party/libwebrtc/audio/utility/channel_mixing_matrix.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_UTILITY_CHANNEL_MIXING_MATRIX_H_ +#define AUDIO_UTILITY_CHANNEL_MIXING_MATRIX_H_ + +#include <vector> + +#include "api/audio/channel_layout.h" + +namespace webrtc { + +class ChannelMixingMatrix { + public: + ChannelMixingMatrix(ChannelLayout input_layout, + int input_channels, + ChannelLayout output_layout, + int output_channels); + + ~ChannelMixingMatrix(); + + // Create the transformation matrix of input channels to output channels. + // Updates the empty matrix with the transformation, and returns true + // if the transformation is just a remapping of channels (no mixing). + // The size of `matrix` is `output_channels` x `input_channels`, i.e., the + // number of rows equals the number of output channels and the number of + // columns corresponds to the number of input channels. + // This file is derived from Chromium's media/base/channel_mixing_matrix.h. + bool CreateTransformationMatrix(std::vector<std::vector<float>>* matrix); + + private: + const bool use_voip_channel_mapping_adjustments_; + + // Result transformation of input channels to output channels + std::vector<std::vector<float>>* matrix_; + + // Input and output channel layout provided during construction. + ChannelLayout input_layout_; + int input_channels_; + ChannelLayout output_layout_; + int output_channels_; + + // Helper variable for tracking which inputs are currently unaccounted, + // should be empty after construction completes. + std::vector<Channels> unaccounted_inputs_; + + // Helper methods for managing unaccounted input channels. + void AccountFor(Channels ch); + bool IsUnaccounted(Channels ch) const; + + // Helper methods for checking if `ch` exists in either `input_layout_` or + // `output_layout_` respectively. + bool HasInputChannel(Channels ch) const; + bool HasOutputChannel(Channels ch) const; + + // Helper methods for updating `matrix_` with the proper value for + // mixing `input_ch` into `output_ch`. MixWithoutAccounting() does not + // remove the channel from `unaccounted_inputs_`. + void Mix(Channels input_ch, Channels output_ch, float scale); + void MixWithoutAccounting(Channels input_ch, Channels output_ch, float scale); + + // Delete the copy constructor and assignment operator. + ChannelMixingMatrix(const ChannelMixingMatrix& other) = delete; + ChannelMixingMatrix& operator=(const ChannelMixingMatrix& other) = delete; +}; + +} // namespace webrtc + +#endif // AUDIO_UTILITY_CHANNEL_MIXING_MATRIX_H_ diff --git a/third_party/libwebrtc/audio/utility/channel_mixing_matrix_unittest.cc b/third_party/libwebrtc/audio/utility/channel_mixing_matrix_unittest.cc new file mode 100644 index 0000000000..a4efb4fd38 --- /dev/null +++ b/third_party/libwebrtc/audio/utility/channel_mixing_matrix_unittest.cc @@ -0,0 +1,476 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/utility/channel_mixing_matrix.h" + +#include <stddef.h> + +#include "audio/utility/channel_mixer.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/logging.h" +#include "rtc_base/strings/string_builder.h" +#include "test/field_trial.h" +#include "test/gtest.h" + +namespace webrtc { + +// Test all possible layout conversions can be constructed and mixed. +// Also ensure that the channel matrix fulfill certain conditions when remapping +// is supported. +TEST(ChannelMixingMatrixTest, ConstructAllPossibleLayouts) { + for (ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + input_layout <= CHANNEL_LAYOUT_MAX; + input_layout = static_cast<ChannelLayout>(input_layout + 1)) { + for (ChannelLayout output_layout = CHANNEL_LAYOUT_MONO; + output_layout <= CHANNEL_LAYOUT_MAX; + output_layout = static_cast<ChannelLayout>(output_layout + 1)) { + // DISCRETE, BITSTREAM can't be tested here based on the current approach. + // CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC is not mixable. + // Stereo down mix should never be the output layout. + if (input_layout == CHANNEL_LAYOUT_BITSTREAM || + input_layout == CHANNEL_LAYOUT_DISCRETE || + input_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC || + output_layout == CHANNEL_LAYOUT_BITSTREAM || + output_layout == CHANNEL_LAYOUT_DISCRETE || + output_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC || + output_layout == CHANNEL_LAYOUT_STEREO_DOWNMIX) { + continue; + } + + rtc::StringBuilder ss; + ss << "Input Layout: " << input_layout + << ", Output Layout: " << output_layout; + SCOPED_TRACE(ss.str()); + ChannelMixingMatrix matrix_builder( + input_layout, ChannelLayoutToChannelCount(input_layout), + output_layout, ChannelLayoutToChannelCount(output_layout)); + const int input_channels = ChannelLayoutToChannelCount(input_layout); + const int output_channels = ChannelLayoutToChannelCount(output_layout); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + if (remapping) { + // Also ensure that (when remapping can take place), a maximum of one + // input channel is included per output. This knowledge will simplify + // the channel mixing algorithm since it allows us to find the only + // scale factor which equals 1.0 and copy that input to its + // corresponding output. If no such factor can be found, the + // corresponding output can be set to zero. + for (int i = 0; i < output_channels; i++) { + EXPECT_EQ(static_cast<size_t>(input_channels), matrix[i].size()); + int num_input_channels_accounted_for_per_output = 0; + for (int j = 0; j < input_channels; j++) { + float scale = matrix[i][j]; + if (scale > 0) { + EXPECT_EQ(scale, 1.0f); + num_input_channels_accounted_for_per_output++; + } + } + // Each output channel shall contain contribution from one or less + // input channels. + EXPECT_LE(num_input_channels_accounted_for_per_output, 1); + } + } + } + } +} + +// Verify channels are mixed and scaled correctly. +TEST(ChannelMixingMatrixTest, StereoToMono) { + ChannelLayout input_layout = CHANNEL_LAYOUT_STEREO; + ChannelLayout output_layout = CHANNEL_LAYOUT_MONO; + ChannelMixingMatrix matrix_builder( + input_layout, ChannelLayoutToChannelCount(input_layout), output_layout, + ChannelLayoutToChannelCount(output_layout)); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + // Input: stereo + // LEFT RIGHT + // Output: mono CENTER 0.5 0.5 + // + EXPECT_FALSE(remapping); + EXPECT_EQ(1u, matrix.size()); + EXPECT_EQ(2u, matrix[0].size()); + EXPECT_EQ(0.5f, matrix[0][0]); + EXPECT_EQ(0.5f, matrix[0][1]); +} + +TEST(ChannelMixingMatrixTest, MonoToStereo) { + ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + ChannelLayout output_layout = CHANNEL_LAYOUT_STEREO; + ChannelMixingMatrix matrix_builder( + input_layout, ChannelLayoutToChannelCount(input_layout), output_layout, + ChannelLayoutToChannelCount(output_layout)); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + // Input: mono + // CENTER + // Output: stereo LEFT 1 + // RIGHT 1 + // + EXPECT_TRUE(remapping); + EXPECT_EQ(2u, matrix.size()); + EXPECT_EQ(1u, matrix[0].size()); + EXPECT_EQ(1.0f, matrix[0][0]); + EXPECT_EQ(1u, matrix[1].size()); + EXPECT_EQ(1.0f, matrix[1][0]); +} + +TEST(ChannelMixingMatrixTest, MonoToTwoOneWithoutVoIPAdjustments) { + test::ScopedFieldTrials field_trials( + "WebRTC-VoIPChannelRemixingAdjustmentKillSwitch/Enabled/"); + ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + ChannelLayout output_layout = CHANNEL_LAYOUT_2_1; + ChannelMixingMatrix matrix_builder( + input_layout, ChannelLayoutToChannelCount(input_layout), output_layout, + ChannelLayoutToChannelCount(output_layout)); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + // Input: mono + // CENTER + // Output: 2.1 FRONT_LEFT 1 + // FRONT_RIGHT 1 + // BACK_CENTER 0 + // + EXPECT_FALSE(remapping); + EXPECT_EQ(3u, matrix.size()); + EXPECT_EQ(1u, matrix[0].size()); + EXPECT_EQ(1.0f, matrix[0][0]); + EXPECT_EQ(1.0f, matrix[1][0]); + EXPECT_EQ(0.0f, matrix[2][0]); +} + +TEST(ChannelMixingMatrixTest, MonoToTwoOneWithVoIPAdjustments) { + ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + ChannelLayout output_layout = CHANNEL_LAYOUT_2_1; + ChannelMixingMatrix matrix_builder( + input_layout, ChannelLayoutToChannelCount(input_layout), output_layout, + ChannelLayoutToChannelCount(output_layout)); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + // Input: mono + // CENTER + // Output: 2.1 FRONT_LEFT 1 + // FRONT_RIGHT 1 + // BACK_CENTER 0 + // + EXPECT_TRUE(remapping); + EXPECT_EQ(3u, matrix.size()); + EXPECT_EQ(1u, matrix[0].size()); + EXPECT_EQ(1.0f, matrix[0][0]); + EXPECT_EQ(1.0f, matrix[1][0]); + EXPECT_EQ(0.0f, matrix[2][0]); +} + +TEST(ChannelMixingMatrixTest, MonoToFiveOneWithoutVoIPAdjustments) { + test::ScopedFieldTrials field_trials( + "WebRTC-VoIPChannelRemixingAdjustmentKillSwitch/Enabled/"); + ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + ChannelLayout output_layout = CHANNEL_LAYOUT_5_1; + const int input_channels = ChannelLayoutToChannelCount(input_layout); + const int output_channels = ChannelLayoutToChannelCount(output_layout); + ChannelMixingMatrix matrix_builder(input_layout, input_channels, + output_layout, output_channels); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + // Input: mono + // CENTER + // Output: 5.1 LEFT 0 + // RIGHT 0 + // CENTER 1 + // LFE 0 + // SIDE_LEFT 0 + // SIDE_RIGHT 0 + // + EXPECT_TRUE(remapping); + EXPECT_EQ(static_cast<size_t>(output_channels), matrix.size()); + for (int n = 0; n < output_channels; n++) { + EXPECT_EQ(static_cast<size_t>(input_channels), matrix[n].size()); + if (n == CENTER) { + EXPECT_EQ(1.0f, matrix[CENTER][0]); + } else { + EXPECT_EQ(0.0f, matrix[n][0]); + } + } +} + +TEST(ChannelMixingMatrixTest, MonoToFiveOneWithVoIPAdjustments) { + ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + ChannelLayout output_layout = CHANNEL_LAYOUT_5_1; + const int input_channels = ChannelLayoutToChannelCount(input_layout); + const int output_channels = ChannelLayoutToChannelCount(output_layout); + ChannelMixingMatrix matrix_builder(input_layout, input_channels, + output_layout, output_channels); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + // Input: mono + // CENTER + // Output: 5.1 LEFT 1 + // RIGHT 1 + // CENTER 0 + // LFE 0 + // SIDE_LEFT 0 + // SIDE_RIGHT 0 + // + EXPECT_TRUE(remapping); + EXPECT_EQ(static_cast<size_t>(output_channels), matrix.size()); + for (int n = 0; n < output_channels; n++) { + EXPECT_EQ(static_cast<size_t>(input_channels), matrix[n].size()); + if (n == LEFT || n == RIGHT) { + EXPECT_EQ(1.0f, matrix[n][0]); + } else { + EXPECT_EQ(0.0f, matrix[n][0]); + } + } +} + +TEST(ChannelMixingMatrixTest, MonoToSevenOneWithoutVoIPAdjustments) { + test::ScopedFieldTrials field_trials( + "WebRTC-VoIPChannelRemixingAdjustmentKillSwitch/Enabled/"); + ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + ChannelLayout output_layout = CHANNEL_LAYOUT_7_1; + const int input_channels = ChannelLayoutToChannelCount(input_layout); + const int output_channels = ChannelLayoutToChannelCount(output_layout); + ChannelMixingMatrix matrix_builder(input_layout, input_channels, + output_layout, output_channels); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + // Input: mono + // CENTER + // Output: 7.1 LEFT 0 + // RIGHT 0 + // CENTER 1 + // LFE 0 + // SIDE_LEFT 0 + // SIDE_RIGHT 0 + // BACK_LEFT 0 + // BACK_RIGHT 0 + // + EXPECT_TRUE(remapping); + EXPECT_EQ(static_cast<size_t>(output_channels), matrix.size()); + for (int n = 0; n < output_channels; n++) { + EXPECT_EQ(static_cast<size_t>(input_channels), matrix[n].size()); + if (n == CENTER) { + EXPECT_EQ(1.0f, matrix[CENTER][0]); + } else { + EXPECT_EQ(0.0f, matrix[n][0]); + } + } +} + +TEST(ChannelMixingMatrixTest, MonoToSevenOneWithVoIPAdjustments) { + ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + ChannelLayout output_layout = CHANNEL_LAYOUT_7_1; + const int input_channels = ChannelLayoutToChannelCount(input_layout); + const int output_channels = ChannelLayoutToChannelCount(output_layout); + ChannelMixingMatrix matrix_builder(input_layout, input_channels, + output_layout, output_channels); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + // Input: mono + // CENTER + // Output: 7.1 LEFT 1 + // RIGHT 1 + // CENTER 0 + // LFE 0 + // SIDE_LEFT 0 + // SIDE_RIGHT 0 + // BACK_LEFT 0 + // BACK_RIGHT 0 + // + EXPECT_TRUE(remapping); + EXPECT_EQ(static_cast<size_t>(output_channels), matrix.size()); + for (int n = 0; n < output_channels; n++) { + EXPECT_EQ(static_cast<size_t>(input_channels), matrix[n].size()); + if (n == LEFT || n == RIGHT) { + EXPECT_EQ(1.0f, matrix[n][0]); + } else { + EXPECT_EQ(0.0f, matrix[n][0]); + } + } +} + +TEST(ChannelMixingMatrixTest, FiveOneToMono) { + ChannelLayout input_layout = CHANNEL_LAYOUT_5_1; + ChannelLayout output_layout = CHANNEL_LAYOUT_MONO; + ChannelMixingMatrix matrix_builder( + input_layout, ChannelLayoutToChannelCount(input_layout), output_layout, + ChannelLayoutToChannelCount(output_layout)); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + // Note: 1/sqrt(2) is shown as 0.707. + // + // Input: 5.1 + // LEFT RIGHT CENTER LFE SIDE_LEFT SIDE_RIGHT + // Output: mono CENTER 0.707 0.707 1 0.707 0.707 0.707 + // + EXPECT_FALSE(remapping); + EXPECT_EQ(1u, matrix.size()); + EXPECT_EQ(6u, matrix[0].size()); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[0][0]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[0][1]); + // The center channel will be mixed at scale 1. + EXPECT_EQ(1.0f, matrix[0][2]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[0][3]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[0][4]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[0][5]); +} + +TEST(ChannelMixingMatrixTest, FiveOneBackToStereo) { + // Front L, Front R, Front C, LFE, Back L, Back R + ChannelLayout input_layout = CHANNEL_LAYOUT_5_1_BACK; + ChannelLayout output_layout = CHANNEL_LAYOUT_STEREO; + const int input_channels = ChannelLayoutToChannelCount(input_layout); + const int output_channels = ChannelLayoutToChannelCount(output_layout); + ChannelMixingMatrix matrix_builder(input_layout, input_channels, + output_layout, output_channels); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + // Note: 1/sqrt(2) is shown as 0.707. + // Note: The Channels enumerator is given by {LEFT = 0, RIGHT, CENTER, LFE, + // BACK_LEFT, BACK_RIGHT,...}, hence we can use the enumerator values as + // indexes in the matrix when verifying the scaling factors. + // + // Input: 5.1 + // LEFT RIGHT CENTER LFE BACK_LEFT BACK_RIGHT + // Output: stereo LEFT 1 0 0.707 0.707 0.707 0 + // RIGHT 0 1 0.707 0.707 0 0.707 + // + EXPECT_FALSE(remapping); + EXPECT_EQ(static_cast<size_t>(output_channels), matrix.size()); + EXPECT_EQ(static_cast<size_t>(input_channels), matrix[LEFT].size()); + EXPECT_EQ(static_cast<size_t>(input_channels), matrix[RIGHT].size()); + EXPECT_EQ(1.0f, matrix[LEFT][LEFT]); + EXPECT_EQ(1.0f, matrix[RIGHT][RIGHT]); + EXPECT_EQ(0.0f, matrix[LEFT][RIGHT]); + EXPECT_EQ(0.0f, matrix[RIGHT][LEFT]); + EXPECT_EQ(0.0f, matrix[LEFT][BACK_RIGHT]); + EXPECT_EQ(0.0f, matrix[RIGHT][BACK_LEFT]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[LEFT][CENTER]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[LEFT][LFE]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[LEFT][BACK_LEFT]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[RIGHT][CENTER]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[RIGHT][LFE]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[RIGHT][BACK_RIGHT]); +} + +TEST(ChannelMixingMatrixTest, FiveOneToSevenOne) { + // Front L, Front R, Front C, LFE, Side L, Side R + ChannelLayout input_layout = CHANNEL_LAYOUT_5_1; + // Front L, Front R, Front C, LFE, Side L, Side R, Back L, Back R + ChannelLayout output_layout = CHANNEL_LAYOUT_7_1; + const int input_channels = ChannelLayoutToChannelCount(input_layout); + const int output_channels = ChannelLayoutToChannelCount(output_layout); + ChannelMixingMatrix matrix_builder(input_layout, input_channels, + output_layout, output_channels); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + // Input: 5.1 + // LEFT RIGHT CENTER LFE SIDE_LEFT SIDE_RIGHT + // Output: 7.1 LEFT 1 0 0 0 0 0 + // RIGHT 0 1 0 0 0 0 + // CENTER 0 0 1 0 0 0 + // LFE 0 0 0 1 0 0 + // SIDE_LEFT 0 0 0 0 1 0 + // SIDE_RIGHT 0 0 0 0 0 1 + // BACK_LEFT 0 0 0 0 0 0 + // BACK_RIGHT 0 0 0 0 0 0 + // + EXPECT_TRUE(remapping); + EXPECT_EQ(static_cast<size_t>(output_channels), matrix.size()); + for (int i = 0; i < output_channels; i++) { + EXPECT_EQ(static_cast<size_t>(input_channels), matrix[i].size()); + for (int j = 0; j < input_channels; j++) { + if (i == j) { + EXPECT_EQ(1.0f, matrix[i][j]); + } else { + EXPECT_EQ(0.0f, matrix[i][j]); + } + } + } +} + +TEST(ChannelMixingMatrixTest, StereoToFiveOne) { + ChannelLayout input_layout = CHANNEL_LAYOUT_STEREO; + ChannelLayout output_layout = CHANNEL_LAYOUT_5_1; + const int input_channels = ChannelLayoutToChannelCount(input_layout); + const int output_channels = ChannelLayoutToChannelCount(output_layout); + ChannelMixingMatrix matrix_builder(input_layout, input_channels, + output_layout, output_channels); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + // Input: Stereo + // LEFT RIGHT + // Output: 5.1 LEFT 1 0 + // RIGHT 0 1 + // CENTER 0 0 + // LFE 0 0 + // SIDE_LEFT 0 0 + // SIDE_RIGHT 0 0 + // + EXPECT_TRUE(remapping); + EXPECT_EQ(static_cast<size_t>(output_channels), matrix.size()); + for (int n = 0; n < output_channels; n++) { + EXPECT_EQ(static_cast<size_t>(input_channels), matrix[n].size()); + if (n == LEFT) { + EXPECT_EQ(1.0f, matrix[LEFT][LEFT]); + EXPECT_EQ(0.0f, matrix[LEFT][RIGHT]); + } else if (n == RIGHT) { + EXPECT_EQ(0.0f, matrix[RIGHT][LEFT]); + EXPECT_EQ(1.0f, matrix[RIGHT][RIGHT]); + } else { + EXPECT_EQ(0.0f, matrix[n][LEFT]); + EXPECT_EQ(0.0f, matrix[n][RIGHT]); + } + } +} + +TEST(ChannelMixingMatrixTest, DiscreteToDiscrete) { + const struct { + int input_channels; + int output_channels; + } test_case[] = { + {2, 2}, + {2, 5}, + {5, 2}, + }; + + for (size_t n = 0; n < arraysize(test_case); n++) { + int input_channels = test_case[n].input_channels; + int output_channels = test_case[n].output_channels; + ChannelMixingMatrix matrix_builder(CHANNEL_LAYOUT_DISCRETE, input_channels, + CHANNEL_LAYOUT_DISCRETE, + output_channels); + std::vector<std::vector<float>> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + EXPECT_TRUE(remapping); + EXPECT_EQ(static_cast<size_t>(output_channels), matrix.size()); + for (int i = 0; i < output_channels; i++) { + EXPECT_EQ(static_cast<size_t>(input_channels), matrix[i].size()); + for (int j = 0; j < input_channels; j++) { + if (i == j) { + EXPECT_EQ(1.0f, matrix[i][j]); + } else { + EXPECT_EQ(0.0f, matrix[i][j]); + } + } + } + } +} + +} // namespace webrtc |