diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /dom/media/webaudio/ConvolverNode.cpp | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'dom/media/webaudio/ConvolverNode.cpp')
-rw-r--r-- | dom/media/webaudio/ConvolverNode.cpp | 479 |
1 files changed, 479 insertions, 0 deletions
diff --git a/dom/media/webaudio/ConvolverNode.cpp b/dom/media/webaudio/ConvolverNode.cpp new file mode 100644 index 0000000000..65562ae6d0 --- /dev/null +++ b/dom/media/webaudio/ConvolverNode.cpp @@ -0,0 +1,479 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ConvolverNode.h" +#include "mozilla/dom/ConvolverNodeBinding.h" +#include "AlignmentUtils.h" +#include "AudioNodeEngine.h" +#include "AudioNodeTrack.h" +#include "blink/Reverb.h" +#include "PlayingRefChangeHandler.h" +#include "Tracing.h" + +namespace mozilla::dom { + +NS_IMPL_CYCLE_COLLECTION_INHERITED(ConvolverNode, AudioNode, mBuffer) + +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(ConvolverNode) +NS_INTERFACE_MAP_END_INHERITING(AudioNode) + +NS_IMPL_ADDREF_INHERITED(ConvolverNode, AudioNode) +NS_IMPL_RELEASE_INHERITED(ConvolverNode, AudioNode) + +class ConvolverNodeEngine final : public AudioNodeEngine { + typedef PlayingRefChangeHandler PlayingRefChanged; + + public: + ConvolverNodeEngine(AudioNode* aNode, bool aNormalize) + : AudioNodeEngine(aNode) {} + + // Indicates how the right output channel is generated. + enum class RightConvolverMode { + // A right convolver is always used when there is more than one impulse + // response channel. + Always, + // With a single response channel, the mode may be either Direct or + // Difference. The decision on which to use is made when stereo input is + // received. Once the right convolver is in use, convolver state is + // suitable only for the selected mode, and so the mode cannot change + // until the right convolver contains only silent history. + // + // With Direct mode, each convolver processes a corresponding channel. + // This mode is selected when input is initially stereo or + // channelInterpretation is "discrete" at the time or starting the right + // convolver when input changes from non-silent mono to stereo. + Direct, + // Difference mode is selected if channelInterpretation is "speakers" at + // the time starting the right convolver when the input changes from mono + // to stereo. + // + // When non-silent input is initially mono, with a single response + // channel, the right output channel is not produced until input becomes + // stereo. Only a single convolver is used for mono processing. When + // stereo input arrives after mono input, output must be as if the mono + // signal remaining in the left convolver is up-mixed, but the right + // convolver has not been initialized with the history of the mono input. + // Copying the state of the left convolver into the right convolver is not + // desirable, because there is considerable state to copy, and the + // different convolvers are intended to process out of phase, which means + // that state from one convolver would not directly map to state in + // another convolver. + // + // Instead the distributive property of convolution is used to generate + // the right output channel using information in the left output channel. + // Using l and r to denote the left and right channel input signals, g the + // impulse response, and * convolution, the convolution of the right + // channel can be given by + // + // r * g = (l + (r - l)) * g + // = l * g + (r - l) * g + // + // The left convolver continues to process the left channel l to produce + // l * g. The right convolver processes the difference of input channel + // signals r - l to produce (r - l) * g. The outputs of the two + // convolvers are added to generate the right channel output r * g. + // + // The benefit of doing this is that the history of the r - l input for a + // "speakers" up-mixed mono signal is zero, and so an empty convolver + // already has exactly the right history for mixing the previous mono + // signal with the new stereo signal. + Difference + }; + + void SetReverb(WebCore::Reverb* aReverb, + uint32_t aImpulseChannelCount) override { + mRemainingLeftOutput = INT32_MIN; + mRemainingRightOutput = 0; + mRemainingRightHistory = 0; + + // Assume for now that convolution of channel difference is not required. + // Direct may change to Difference during processing. + if (aReverb) { + mRightConvolverMode = aImpulseChannelCount == 1 + ? RightConvolverMode::Direct + : RightConvolverMode::Always; + } else { + mRightConvolverMode = RightConvolverMode::Always; + } + + mReverb.reset(aReverb); + } + + void AllocateReverbInput(const AudioBlock& aInput, + uint32_t aTotalChannelCount) { + uint32_t inputChannelCount = aInput.ChannelCount(); + MOZ_ASSERT(inputChannelCount <= aTotalChannelCount); + mReverbInput.AllocateChannels(aTotalChannelCount); + // Pre-multiply the input's volume + for (uint32_t i = 0; i < inputChannelCount; ++i) { + const float* src = static_cast<const float*>(aInput.mChannelData[i]); + float* dest = mReverbInput.ChannelFloatsForWrite(i); + AudioBlockCopyChannelWithScale(src, aInput.mVolume, dest); + } + // Fill remaining channels with silence + for (uint32_t i = inputChannelCount; i < aTotalChannelCount; ++i) { + float* dest = mReverbInput.ChannelFloatsForWrite(i); + std::fill_n(dest, WEBAUDIO_BLOCK_SIZE, 0.0f); + } + } + + void ProcessBlock(AudioNodeTrack* aTrack, GraphTime aFrom, + const AudioBlock& aInput, AudioBlock* aOutput, + bool* aFinished) override; + + bool IsActive() const override { return mRemainingLeftOutput != INT32_MIN; } + + size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const override { + size_t amount = AudioNodeEngine::SizeOfExcludingThis(aMallocSizeOf); + + amount += mReverbInput.SizeOfExcludingThis(aMallocSizeOf, false); + + if (mReverb) { + amount += mReverb->sizeOfIncludingThis(aMallocSizeOf); + } + + return amount; + } + + size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override { + return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); + } + + private: + // Keeping mReverbInput across process calls avoids unnecessary reallocation. + AudioBlock mReverbInput; + UniquePtr<WebCore::Reverb> mReverb; + // Tracks samples of the tail remaining to be output. INT32_MIN is a + // special value to indicate that the end of any previous tail has been + // handled. + int32_t mRemainingLeftOutput = INT32_MIN; + // mRemainingRightOutput and mRemainingRightHistory are only used when + // mRightOutputMode != Always. There is no special handling required at the + // end of tail times and so INT32_MIN is not used. + // mRemainingRightOutput tracks how much longer this node needs to continue + // to produce a right output channel. + int32_t mRemainingRightOutput = 0; + // mRemainingRightHistory tracks how much silent input would be required to + // drain the right convolver, which may sometimes be longer than the period + // a right output channel is required. + int32_t mRemainingRightHistory = 0; + RightConvolverMode mRightConvolverMode = RightConvolverMode::Always; +}; + +static void AddScaledLeftToRight(AudioBlock* aBlock, float aScale) { + const float* left = static_cast<const float*>(aBlock->mChannelData[0]); + float* right = aBlock->ChannelFloatsForWrite(1); + AudioBlockAddChannelWithScale(left, aScale, right); +} + +void ConvolverNodeEngine::ProcessBlock(AudioNodeTrack* aTrack, GraphTime aFrom, + const AudioBlock& aInput, + AudioBlock* aOutput, bool* aFinished) { + TRACE("ConvolverNodeEngine::ProcessBlock"); + if (!mReverb) { + aOutput->SetNull(WEBAUDIO_BLOCK_SIZE); + return; + } + + uint32_t inputChannelCount = aInput.ChannelCount(); + if (aInput.IsNull()) { + if (mRemainingLeftOutput > 0) { + mRemainingLeftOutput -= WEBAUDIO_BLOCK_SIZE; + AllocateReverbInput(aInput, 1); // floats for silence + } else { + if (mRemainingLeftOutput != INT32_MIN) { + mRemainingLeftOutput = INT32_MIN; + MOZ_ASSERT(mRemainingRightOutput <= 0); + MOZ_ASSERT(mRemainingRightHistory <= 0); + aTrack->ScheduleCheckForInactive(); + RefPtr<PlayingRefChanged> refchanged = + new PlayingRefChanged(aTrack, PlayingRefChanged::RELEASE); + aTrack->Graph()->DispatchToMainThreadStableState(refchanged.forget()); + } + aOutput->SetNull(WEBAUDIO_BLOCK_SIZE); + return; + } + } else { + if (mRemainingLeftOutput <= 0) { + RefPtr<PlayingRefChanged> refchanged = + new PlayingRefChanged(aTrack, PlayingRefChanged::ADDREF); + aTrack->Graph()->DispatchToMainThreadStableState(refchanged.forget()); + } + + // Use mVolume as a flag to detect whether AllocateReverbInput() gets + // called. + mReverbInput.mVolume = 0.0f; + + // Special handling of input channel count changes is used when there is + // only a single impulse response channel. See RightConvolverMode. + if (mRightConvolverMode != RightConvolverMode::Always) { + ChannelInterpretation channelInterpretation = + aTrack->GetChannelInterpretation(); + if (inputChannelCount == 2) { + if (mRemainingRightHistory <= 0) { + // Will start the second convolver. Choose to convolve the right + // channel directly if there is no left tail to up-mix or up-mixing + // is "discrete". + mRightConvolverMode = + (mRemainingLeftOutput <= 0 || + channelInterpretation == ChannelInterpretation::Discrete) + ? RightConvolverMode::Direct + : RightConvolverMode::Difference; + } + // The extra WEBAUDIO_BLOCK_SIZE is subtracted below. + mRemainingRightOutput = + mReverb->impulseResponseLength() + WEBAUDIO_BLOCK_SIZE; + mRemainingRightHistory = mRemainingRightOutput; + if (mRightConvolverMode == RightConvolverMode::Difference) { + AllocateReverbInput(aInput, 2); + // Subtract left from right. + AddScaledLeftToRight(&mReverbInput, -1.0f); + } + } else if (mRemainingRightHistory > 0) { + // There is one channel of input, but a second convolver also + // requires input. Up-mix appropriately for the second convolver. + if ((mRightConvolverMode == RightConvolverMode::Difference) ^ + (channelInterpretation == ChannelInterpretation::Discrete)) { + MOZ_ASSERT( + (mRightConvolverMode == RightConvolverMode::Difference && + channelInterpretation == ChannelInterpretation::Speakers) || + (mRightConvolverMode == RightConvolverMode::Direct && + channelInterpretation == ChannelInterpretation::Discrete)); + // The state is one of the following combinations: + // 1) Difference and speakers. + // Up-mixing gives r = l. + // The input to the second convolver is r - l. + // 2) Direct and discrete. + // Up-mixing gives r = 0. + // The input to the second convolver is r. + // + // In each case the input for the second convolver is silence, which + // will drain the convolver. + AllocateReverbInput(aInput, 2); + } else { + if (channelInterpretation == ChannelInterpretation::Discrete) { + MOZ_ASSERT(mRightConvolverMode == RightConvolverMode::Difference); + // channelInterpretation has changed since the second convolver + // was added. "discrete" up-mixing of input would produce a + // silent right channel r = 0, but the second convolver needs + // r - l for RightConvolverMode::Difference. + AllocateReverbInput(aInput, 2); + AddScaledLeftToRight(&mReverbInput, -1.0f); + } else { + MOZ_ASSERT(channelInterpretation == + ChannelInterpretation::Speakers); + MOZ_ASSERT(mRightConvolverMode == RightConvolverMode::Direct); + // The Reverb will essentially up-mix the single input channel by + // feeding it into both convolvers. + } + // The second convolver does not have silent input, and so it will + // not drain. It will need to continue processing up-mixed input + // because the next input block may be stereo, which would be mixed + // with the signal remaining in the convolvers. + // The extra WEBAUDIO_BLOCK_SIZE is subtracted below. + mRemainingRightHistory = + mReverb->impulseResponseLength() + WEBAUDIO_BLOCK_SIZE; + } + } + } + + if (mReverbInput.mVolume == 0.0f) { // not yet set + if (aInput.mVolume != 1.0f) { + AllocateReverbInput(aInput, inputChannelCount); // pre-multiply + } else { + mReverbInput = aInput; + } + } + + mRemainingLeftOutput = mReverb->impulseResponseLength(); + MOZ_ASSERT(mRemainingLeftOutput > 0); + } + + // "The ConvolverNode produces a mono output only in the single case where + // there is a single input channel and a single-channel buffer." + uint32_t outputChannelCount = 2; + uint32_t reverbOutputChannelCount = 2; + if (mRightConvolverMode != RightConvolverMode::Always) { + // When the input changes from stereo to mono, the output continues to be + // stereo for the length of the tail time, during which the two channels + // may differ. + if (mRemainingRightOutput > 0) { + MOZ_ASSERT(mRemainingRightHistory > 0); + mRemainingRightOutput -= WEBAUDIO_BLOCK_SIZE; + } else { + outputChannelCount = 1; + } + // The second convolver keeps processing until it drains. + if (mRemainingRightHistory > 0) { + mRemainingRightHistory -= WEBAUDIO_BLOCK_SIZE; + } else { + reverbOutputChannelCount = 1; + } + } + + // If there are two convolvers, then they each need an output buffer, even + // if the second convolver is only processing to keep history of up-mixed + // input. + aOutput->AllocateChannels(reverbOutputChannelCount); + + mReverb->process(&mReverbInput, aOutput); + + if (mRightConvolverMode == RightConvolverMode::Difference && + outputChannelCount == 2) { + // Add left to right. + AddScaledLeftToRight(aOutput, 1.0f); + } else { + // Trim if outputChannelCount < reverbOutputChannelCount + aOutput->mChannelData.TruncateLength(outputChannelCount); + } +} + +ConvolverNode::ConvolverNode(AudioContext* aContext) + : AudioNode(aContext, 2, ChannelCountMode::Clamped_max, + ChannelInterpretation::Speakers), + mNormalize(true) { + ConvolverNodeEngine* engine = new ConvolverNodeEngine(this, mNormalize); + mTrack = AudioNodeTrack::Create( + aContext, engine, AudioNodeTrack::NO_TRACK_FLAGS, aContext->Graph()); +} + +/* static */ +already_AddRefed<ConvolverNode> ConvolverNode::Create( + JSContext* aCx, AudioContext& aAudioContext, + const ConvolverOptions& aOptions, ErrorResult& aRv) { + RefPtr<ConvolverNode> audioNode = new ConvolverNode(&aAudioContext); + + audioNode->Initialize(aOptions, aRv); + if (NS_WARN_IF(aRv.Failed())) { + return nullptr; + } + + // This must be done before setting the buffer. + audioNode->SetNormalize(!aOptions.mDisableNormalization); + + if (aOptions.mBuffer.WasPassed()) { + MOZ_ASSERT(aCx); + audioNode->SetBuffer(aCx, aOptions.mBuffer.Value(), aRv); + if (NS_WARN_IF(aRv.Failed())) { + return nullptr; + } + } + + return audioNode.forget(); +} + +size_t ConvolverNode::SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const { + size_t amount = AudioNode::SizeOfExcludingThis(aMallocSizeOf); + if (mBuffer) { + // NB: mBuffer might be shared with the associated engine, by convention + // the AudioNode will report. + amount += mBuffer->SizeOfIncludingThis(aMallocSizeOf); + } + return amount; +} + +size_t ConvolverNode::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); +} + +JSObject* ConvolverNode::WrapObject(JSContext* aCx, + JS::Handle<JSObject*> aGivenProto) { + return ConvolverNode_Binding::Wrap(aCx, this, aGivenProto); +} + +void ConvolverNode::SetBuffer(JSContext* aCx, AudioBuffer* aBuffer, + ErrorResult& aRv) { + if (aBuffer) { + switch (aBuffer->NumberOfChannels()) { + case 1: + case 2: + case 4: + // Supported number of channels + break; + default: + aRv.ThrowNotSupportedError( + nsPrintfCString("%u is not a supported number of channels", + aBuffer->NumberOfChannels())); + return; + } + } + + if (aBuffer && (aBuffer->SampleRate() != Context()->SampleRate())) { + aRv.ThrowNotSupportedError(nsPrintfCString( + "Buffer sample rate (%g) does not match AudioContext sample rate (%g)", + aBuffer->SampleRate(), Context()->SampleRate())); + return; + } + + // Send the buffer to the track + AudioNodeTrack* ns = mTrack; + MOZ_ASSERT(ns, "Why don't we have a track here?"); + if (aBuffer) { + AudioChunk data = aBuffer->GetThreadSharedChannelsForRate(aCx); + if (data.mBufferFormat == AUDIO_FORMAT_S16) { + // Reverb expects data in float format. + // Convert on the main thread so as to minimize allocations on the audio + // thread. + // Reverb will dispose of the buffer once initialized, so convert here + // and leave the smaller arrays in the AudioBuffer. + // There is currently no value in providing 16/32-byte aligned data + // because PadAndMakeScaledDFT() will copy the data (without SIMD + // instructions) to aligned arrays for the FFT. + CheckedInt<size_t> bufferSize(sizeof(float)); + bufferSize *= data.mDuration; + bufferSize *= data.ChannelCount(); + RefPtr<SharedBuffer> floatBuffer = + SharedBuffer::Create(bufferSize, fallible); + if (!floatBuffer) { + aRv.Throw(NS_ERROR_OUT_OF_MEMORY); + return; + } + auto floatData = static_cast<float*>(floatBuffer->Data()); + for (size_t i = 0; i < data.ChannelCount(); ++i) { + ConvertAudioSamples(data.ChannelData<int16_t>()[i], floatData, + data.mDuration); + data.mChannelData[i] = floatData; + floatData += data.mDuration; + } + data.mBuffer = std::move(floatBuffer); + data.mBufferFormat = AUDIO_FORMAT_FLOAT32; + } else if (data.mBufferFormat == AUDIO_FORMAT_SILENCE) { + // This is valid, but a signal convolved by a silent signal is silent, set + // the reverb to nullptr and return. + ns->SetReverb(nullptr, 0); + mBuffer = aBuffer; + return; + } + + // Note about empirical tuning (this is copied from Blink) + // The maximum FFT size affects reverb performance and accuracy. + // If the reverb is single-threaded and processes entirely in the real-time + // audio thread, it's important not to make this too high. In this case + // 8192 is a good value. But, the Reverb object is multi-threaded, so we + // want this as high as possible without losing too much accuracy. Very + // large FFTs will have worse phase errors. Given these constraints 32768 is + // a good compromise. + const size_t MaxFFTSize = 32768; + + bool allocationFailure = false; + UniquePtr<WebCore::Reverb> reverb(new WebCore::Reverb( + data, MaxFFTSize, !Context()->IsOffline(), mNormalize, + aBuffer->SampleRate(), &allocationFailure)); + if (!allocationFailure) { + ns->SetReverb(reverb.release(), data.ChannelCount()); + } else { + aRv.Throw(NS_ERROR_OUT_OF_MEMORY); + return; + } + } else { + ns->SetReverb(nullptr, 0); + } + mBuffer = aBuffer; +} + +void ConvolverNode::SetNormalize(bool aNormalize) { mNormalize = aNormalize; } + +} // namespace mozilla::dom |