diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /dom/media/AudioConverter.cpp | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'dom/media/AudioConverter.cpp')
-rw-r--r-- | dom/media/AudioConverter.cpp | 480 |
1 files changed, 480 insertions, 0 deletions
diff --git a/dom/media/AudioConverter.cpp b/dom/media/AudioConverter.cpp new file mode 100644 index 0000000000..1f58608043 --- /dev/null +++ b/dom/media/AudioConverter.cpp @@ -0,0 +1,480 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "AudioConverter.h" +#include <speex/speex_resampler.h> +#include <string.h> +#include <cmath> + +/* + * Parts derived from MythTV AudioConvert Class + * Created by Jean-Yves Avenard. + * + * Copyright (C) Bubblestuff Pty Ltd 2013 + * Copyright (C) foobum@gmail.com 2010 + */ + +namespace mozilla { + +AudioConverter::AudioConverter(const AudioConfig& aIn, const AudioConfig& aOut) + : mIn(aIn), mOut(aOut), mResampler(nullptr) { + MOZ_DIAGNOSTIC_ASSERT(CanConvert(aIn, aOut), + "The conversion is not supported"); + mIn.Layout().MappingTable(mOut.Layout(), &mChannelOrderMap); + if (aIn.Rate() != aOut.Rate()) { + RecreateResampler(); + } +} + +AudioConverter::~AudioConverter() { + if (mResampler) { + speex_resampler_destroy(mResampler); + mResampler = nullptr; + } +} + +bool AudioConverter::CanConvert(const AudioConfig& aIn, + const AudioConfig& aOut) { + if (aIn.Format() != aOut.Format() || + aIn.Interleaved() != aOut.Interleaved()) { + NS_WARNING("No format conversion is supported at this stage"); + return false; + } + if (aIn.Channels() != aOut.Channels() && aOut.Channels() > 2) { + NS_WARNING( + "Only down/upmixing to mono or stereo is supported at this stage"); + return false; + } + if (!aOut.Interleaved()) { + NS_WARNING("planar audio format not supported"); + return false; + } + return true; +} + +bool AudioConverter::CanWorkInPlace() const { + bool needDownmix = mIn.Channels() > mOut.Channels(); + bool needUpmix = mIn.Channels() < mOut.Channels(); + bool canDownmixInPlace = + mIn.Channels() * AudioConfig::SampleSize(mIn.Format()) >= + mOut.Channels() * AudioConfig::SampleSize(mOut.Format()); + bool needResample = mIn.Rate() != mOut.Rate(); + bool canResampleInPlace = mIn.Rate() >= mOut.Rate(); + // We should be able to work in place if 1s of audio input takes less space + // than 1s of audio output. However, as we downmix before resampling we can't + // perform any upsampling in place (e.g. if incoming rate >= outgoing rate) + return !needUpmix && (!needDownmix || canDownmixInPlace) && + (!needResample || canResampleInPlace); +} + +size_t AudioConverter::ProcessInternal(void* aOut, const void* aIn, + size_t aFrames) { + if (!aFrames) { + return 0; + } + if (mIn.Channels() > mOut.Channels()) { + return DownmixAudio(aOut, aIn, aFrames); + } else if (mIn.Channels() < mOut.Channels()) { + return UpmixAudio(aOut, aIn, aFrames); + } else if (mIn.Layout() != mOut.Layout() && CanReorderAudio()) { + ReOrderInterleavedChannels(aOut, aIn, aFrames); + } else if (aIn != aOut) { + memmove(aOut, aIn, FramesOutToBytes(aFrames)); + } + return aFrames; +} + +// Reorder interleaved channels. +// Can work in place (e.g aOut == aIn). +template <class AudioDataType> +void _ReOrderInterleavedChannels(AudioDataType* aOut, const AudioDataType* aIn, + uint32_t aFrames, uint32_t aChannels, + const uint8_t* aChannelOrderMap) { + MOZ_DIAGNOSTIC_ASSERT(aChannels <= AudioConfig::ChannelLayout::MAX_CHANNELS); + AudioDataType val[AudioConfig::ChannelLayout::MAX_CHANNELS]; + for (uint32_t i = 0; i < aFrames; i++) { + for (uint32_t j = 0; j < aChannels; j++) { + val[j] = aIn[aChannelOrderMap[j]]; + } + for (uint32_t j = 0; j < aChannels; j++) { + aOut[j] = val[j]; + } + aOut += aChannels; + aIn += aChannels; + } +} + +void AudioConverter::ReOrderInterleavedChannels(void* aOut, const void* aIn, + size_t aFrames) const { + MOZ_DIAGNOSTIC_ASSERT(mIn.Channels() == mOut.Channels()); + MOZ_DIAGNOSTIC_ASSERT(CanReorderAudio()); + + if (mChannelOrderMap.IsEmpty() || mOut.Channels() == 1 || + mOut.Layout() == mIn.Layout()) { + // If channel count is 1, planar and non-planar formats are the same or + // there's nothing to reorder, or if we don't know how to re-order. + if (aOut != aIn) { + memmove(aOut, aIn, FramesOutToBytes(aFrames)); + } + return; + } + + uint32_t bits = AudioConfig::FormatToBits(mOut.Format()); + switch (bits) { + case 8: + _ReOrderInterleavedChannels((uint8_t*)aOut, (const uint8_t*)aIn, aFrames, + mIn.Channels(), mChannelOrderMap.Elements()); + break; + case 16: + _ReOrderInterleavedChannels((int16_t*)aOut, (const int16_t*)aIn, aFrames, + mIn.Channels(), mChannelOrderMap.Elements()); + break; + default: + MOZ_DIAGNOSTIC_ASSERT(AudioConfig::SampleSize(mOut.Format()) == 4); + _ReOrderInterleavedChannels((int32_t*)aOut, (const int32_t*)aIn, aFrames, + mIn.Channels(), mChannelOrderMap.Elements()); + break; + } +} + +static inline int16_t clipTo15(int32_t aX) { + return aX < -32768 ? -32768 : aX <= 32767 ? aX : 32767; +} + +template <typename TYPE> +static void dumbUpDownMix(TYPE* aOut, int32_t aOutChannels, const TYPE* aIn, + int32_t aInChannels, int32_t aFrames) { + if (aIn == aOut) { + return; + } + int32_t commonChannels = std::min(aInChannels, aOutChannels); + + for (int32_t i = 0; i < aFrames; i++) { + for (int32_t j = 0; j < commonChannels; j++) { + aOut[i * aOutChannels + j] = aIn[i * aInChannels + j]; + } + if (aOutChannels > aInChannels) { + for (int32_t j = 0; j < aInChannels - aOutChannels; j++) { + aOut[i * aOutChannels + j] = 0; + } + } + } +} + +size_t AudioConverter::DownmixAudio(void* aOut, const void* aIn, + size_t aFrames) const { + MOZ_DIAGNOSTIC_ASSERT(mIn.Format() == AudioConfig::FORMAT_S16 || + mIn.Format() == AudioConfig::FORMAT_FLT); + MOZ_DIAGNOSTIC_ASSERT(mIn.Channels() >= mOut.Channels()); + MOZ_DIAGNOSTIC_ASSERT(mOut.Layout() == AudioConfig::ChannelLayout(2) || + mOut.Layout() == AudioConfig::ChannelLayout(1)); + + uint32_t inChannels = mIn.Channels(); + uint32_t outChannels = mOut.Channels(); + + if (inChannels == outChannels) { + if (aOut != aIn) { + memmove(aOut, aIn, FramesOutToBytes(aFrames)); + } + return aFrames; + } + + if (!mIn.Layout().IsValid() || !mOut.Layout().IsValid()) { + // Dumb copy dropping extra channels. + if (mIn.Format() == AudioConfig::FORMAT_FLT) { + dumbUpDownMix(static_cast<float*>(aOut), outChannels, + static_cast<const float*>(aIn), inChannels, aFrames); + } else if (mIn.Format() == AudioConfig::FORMAT_S16) { + dumbUpDownMix(static_cast<int16_t*>(aOut), outChannels, + static_cast<const int16_t*>(aIn), inChannels, aFrames); + } else { + MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type"); + } + return aFrames; + } + + MOZ_ASSERT( + mIn.Layout() == AudioConfig::ChannelLayout::SMPTEDefault(mIn.Layout()), + "Can only downmix input data in SMPTE layout"); + if (inChannels > 2) { + if (mIn.Format() == AudioConfig::FORMAT_FLT) { + // Downmix matrix. Per-row normalization 1 for rows 3,4 and 2 for rows + // 5-8. + static const float dmatrix[6][8][2] = { + /*3*/ {{0.5858f, 0}, {0, 0.5858f}, {0.4142f, 0.4142f}}, + /*4*/ + {{0.4226f, 0}, {0, 0.4226f}, {0.366f, 0.2114f}, {0.2114f, 0.366f}}, + /*5*/ + {{0.6510f, 0}, + {0, 0.6510f}, + {0.4600f, 0.4600f}, + {0.5636f, 0.3254f}, + {0.3254f, 0.5636f}}, + /*6*/ + {{0.5290f, 0}, + {0, 0.5290f}, + {0.3741f, 0.3741f}, + {0.3741f, 0.3741f}, + {0.4582f, 0.2645f}, + {0.2645f, 0.4582f}}, + /*7*/ + {{0.4553f, 0}, + {0, 0.4553f}, + {0.3220f, 0.3220f}, + {0.3220f, 0.3220f}, + {0.2788f, 0.2788f}, + {0.3943f, 0.2277f}, + {0.2277f, 0.3943f}}, + /*8*/ + {{0.3886f, 0}, + {0, 0.3886f}, + {0.2748f, 0.2748f}, + {0.2748f, 0.2748f}, + {0.3366f, 0.1943f}, + {0.1943f, 0.3366f}, + {0.3366f, 0.1943f}, + {0.1943f, 0.3366f}}, + }; + // Re-write the buffer with downmixed data + const float* in = static_cast<const float*>(aIn); + float* out = static_cast<float*>(aOut); + for (uint32_t i = 0; i < aFrames; i++) { + float sampL = 0.0; + float sampR = 0.0; + for (uint32_t j = 0; j < inChannels; j++) { + sampL += in[i * inChannels + j] * dmatrix[inChannels - 3][j][0]; + sampR += in[i * inChannels + j] * dmatrix[inChannels - 3][j][1]; + } + if (outChannels == 2) { + *out++ = sampL; + *out++ = sampR; + } else { + *out++ = (sampL + sampR) * 0.5; + } + } + } else if (mIn.Format() == AudioConfig::FORMAT_S16) { + // Downmix matrix. Per-row normalization 1 for rows 3,4 and 2 for rows + // 5-8. Coefficients in Q14. + static const int16_t dmatrix[6][8][2] = { + /*3*/ {{9598, 0}, {0, 9598}, {6786, 6786}}, + /*4*/ {{6925, 0}, {0, 6925}, {5997, 3462}, {3462, 5997}}, + /*5*/ + {{10663, 0}, {0, 10663}, {7540, 7540}, {9234, 5331}, {5331, 9234}}, + /*6*/ + {{8668, 0}, + {0, 8668}, + {6129, 6129}, + {6129, 6129}, + {7507, 4335}, + {4335, 7507}}, + /*7*/ + {{7459, 0}, + {0, 7459}, + {5275, 5275}, + {5275, 5275}, + {4568, 4568}, + {6460, 3731}, + {3731, 6460}}, + /*8*/ + {{6368, 0}, + {0, 6368}, + {4502, 4502}, + {4502, 4502}, + {5514, 3184}, + {3184, 5514}, + {5514, 3184}, + {3184, 5514}}}; + // Re-write the buffer with downmixed data + const int16_t* in = static_cast<const int16_t*>(aIn); + int16_t* out = static_cast<int16_t*>(aOut); + for (uint32_t i = 0; i < aFrames; i++) { + int32_t sampL = 0; + int32_t sampR = 0; + for (uint32_t j = 0; j < inChannels; j++) { + sampL += in[i * inChannels + j] * dmatrix[inChannels - 3][j][0]; + sampR += in[i * inChannels + j] * dmatrix[inChannels - 3][j][1]; + } + sampL = clipTo15((sampL + 8192) >> 14); + sampR = clipTo15((sampR + 8192) >> 14); + if (outChannels == 2) { + *out++ = sampL; + *out++ = sampR; + } else { + *out++ = (sampL + sampR) * 0.5; + } + } + } else { + MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type"); + } + return aFrames; + } + + MOZ_DIAGNOSTIC_ASSERT(inChannels == 2 && outChannels == 1); + if (mIn.Format() == AudioConfig::FORMAT_FLT) { + const float* in = static_cast<const float*>(aIn); + float* out = static_cast<float*>(aOut); + for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) { + float sample = 0.0; + // The sample of the buffer would be interleaved. + sample = (in[fIdx * inChannels] + in[fIdx * inChannels + 1]) * 0.5; + *out++ = sample; + } + } else if (mIn.Format() == AudioConfig::FORMAT_S16) { + const int16_t* in = static_cast<const int16_t*>(aIn); + int16_t* out = static_cast<int16_t*>(aOut); + for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) { + int32_t sample = 0.0; + // The sample of the buffer would be interleaved. + sample = (in[fIdx * inChannels] + in[fIdx * inChannels + 1]) * 0.5; + *out++ = sample; + } + } else { + MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type"); + } + return aFrames; +} + +size_t AudioConverter::ResampleAudio(void* aOut, const void* aIn, + size_t aFrames) { + if (!mResampler) { + return 0; + } + uint32_t outframes = ResampleRecipientFrames(aFrames); + uint32_t inframes = aFrames; + + int error; + if (mOut.Format() == AudioConfig::FORMAT_FLT) { + const float* in = reinterpret_cast<const float*>(aIn); + float* out = reinterpret_cast<float*>(aOut); + error = speex_resampler_process_interleaved_float(mResampler, in, &inframes, + out, &outframes); + } else if (mOut.Format() == AudioConfig::FORMAT_S16) { + const int16_t* in = reinterpret_cast<const int16_t*>(aIn); + int16_t* out = reinterpret_cast<int16_t*>(aOut); + error = speex_resampler_process_interleaved_int(mResampler, in, &inframes, + out, &outframes); + } else { + MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type"); + error = RESAMPLER_ERR_ALLOC_FAILED; + } + MOZ_ASSERT(error == RESAMPLER_ERR_SUCCESS); + if (error != RESAMPLER_ERR_SUCCESS) { + speex_resampler_destroy(mResampler); + mResampler = nullptr; + return 0; + } + MOZ_ASSERT(inframes == aFrames, "Some frames will be dropped"); + return outframes; +} + +void AudioConverter::RecreateResampler() { + if (mResampler) { + speex_resampler_destroy(mResampler); + } + int error; + mResampler = speex_resampler_init(mOut.Channels(), mIn.Rate(), mOut.Rate(), + SPEEX_RESAMPLER_QUALITY_DEFAULT, &error); + + if (error == RESAMPLER_ERR_SUCCESS) { + speex_resampler_skip_zeros(mResampler); + } else { + NS_WARNING("Failed to initialize resampler."); + mResampler = nullptr; + } +} + +size_t AudioConverter::DrainResampler(void* aOut) { + if (!mResampler) { + return 0; + } + int frames = speex_resampler_get_input_latency(mResampler); + AlignedByteBuffer buffer(FramesOutToBytes(frames)); + if (!buffer) { + // OOM + return 0; + } + frames = ResampleAudio(aOut, buffer.Data(), frames); + // Tore down the resampler as it's easier than handling follow-up. + RecreateResampler(); + return frames; +} + +size_t AudioConverter::UpmixAudio(void* aOut, const void* aIn, + size_t aFrames) const { + MOZ_ASSERT(mIn.Format() == AudioConfig::FORMAT_S16 || + mIn.Format() == AudioConfig::FORMAT_FLT); + MOZ_ASSERT(mIn.Channels() < mOut.Channels()); + MOZ_ASSERT(mIn.Channels() == 1, "Can only upmix mono for now"); + MOZ_ASSERT(mOut.Channels() == 2, "Can only upmix to stereo for now"); + + if (!mIn.Layout().IsValid() || !mOut.Layout().IsValid() || + mOut.Channels() != 2) { + // Dumb copy the channels and insert silence for the extra channels. + if (mIn.Format() == AudioConfig::FORMAT_FLT) { + dumbUpDownMix(static_cast<float*>(aOut), mOut.Channels(), + static_cast<const float*>(aIn), mIn.Channels(), aFrames); + } else if (mIn.Format() == AudioConfig::FORMAT_S16) { + dumbUpDownMix(static_cast<int16_t*>(aOut), mOut.Channels(), + static_cast<const int16_t*>(aIn), mIn.Channels(), aFrames); + } else { + MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type"); + } + return aFrames; + } + + // Upmix mono to stereo. + // This is a very dumb mono to stereo upmixing, power levels are preserved + // following the calculation: left = right = -3dB*mono. + if (mIn.Format() == AudioConfig::FORMAT_FLT) { + const float m3db = std::sqrt(0.5); // -3dB = sqrt(1/2) + const float* in = static_cast<const float*>(aIn); + float* out = static_cast<float*>(aOut); + for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) { + float sample = in[fIdx] * m3db; + // The samples of the buffer would be interleaved. + *out++ = sample; + *out++ = sample; + } + } else if (mIn.Format() == AudioConfig::FORMAT_S16) { + const int16_t* in = static_cast<const int16_t*>(aIn); + int16_t* out = static_cast<int16_t*>(aOut); + for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) { + int16_t sample = + ((int32_t)in[fIdx] * 11585) >> 14; // close enough to i*sqrt(0.5) + // The samples of the buffer would be interleaved. + *out++ = sample; + *out++ = sample; + } + } else { + MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type"); + } + + return aFrames; +} + +size_t AudioConverter::ResampleRecipientFrames(size_t aFrames) const { + if (!aFrames && mIn.Rate() != mOut.Rate()) { + if (!mResampler) { + return 0; + } + // We drain by pushing in get_input_latency() samples of 0 + aFrames = speex_resampler_get_input_latency(mResampler); + } + return (uint64_t)aFrames * mOut.Rate() / mIn.Rate() + 1; +} + +size_t AudioConverter::FramesOutToSamples(size_t aFrames) const { + return aFrames * mOut.Channels(); +} + +size_t AudioConverter::SamplesInToFrames(size_t aSamples) const { + return aSamples / mIn.Channels(); +} + +size_t AudioConverter::FramesOutToBytes(size_t aFrames) const { + return FramesOutToSamples(aFrames) * AudioConfig::SampleSize(mOut.Format()); +} +} // namespace mozilla |