/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim:set ts=2 sw=2 sts=2 et cindent: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "ConvolverNode.h" #include "mozilla/dom/ConvolverNodeBinding.h" #include "AlignmentUtils.h" #include "AudioNodeEngine.h" #include "AudioNodeTrack.h" #include "blink/Reverb.h" #include "PlayingRefChangeHandler.h" namespace mozilla::dom { NS_IMPL_CYCLE_COLLECTION_INHERITED(ConvolverNode, AudioNode, mBuffer) NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(ConvolverNode) NS_INTERFACE_MAP_END_INHERITING(AudioNode) NS_IMPL_ADDREF_INHERITED(ConvolverNode, AudioNode) NS_IMPL_RELEASE_INHERITED(ConvolverNode, AudioNode) class ConvolverNodeEngine final : public AudioNodeEngine { typedef PlayingRefChangeHandler PlayingRefChanged; public: ConvolverNodeEngine(AudioNode* aNode, bool aNormalize) : AudioNodeEngine(aNode) {} // Indicates how the right output channel is generated. enum class RightConvolverMode { // A right convolver is always used when there is more than one impulse // response channel. Always, // With a single response channel, the mode may be either Direct or // Difference. The decision on which to use is made when stereo input is // received. Once the right convolver is in use, convolver state is // suitable only for the selected mode, and so the mode cannot change // until the right convolver contains only silent history. // // With Direct mode, each convolver processes a corresponding channel. // This mode is selected when input is initially stereo or // channelInterpretation is "discrete" at the time or starting the right // convolver when input changes from non-silent mono to stereo. Direct, // Difference mode is selected if channelInterpretation is "speakers" at // the time starting the right convolver when the input changes from mono // to stereo. // // When non-silent input is initially mono, with a single response // channel, the right output channel is not produced until input becomes // stereo. Only a single convolver is used for mono processing. When // stereo input arrives after mono input, output must be as if the mono // signal remaining in the left convolver is up-mixed, but the right // convolver has not been initialized with the history of the mono input. // Copying the state of the left convolver into the right convolver is not // desirable, because there is considerable state to copy, and the // different convolvers are intended to process out of phase, which means // that state from one convolver would not directly map to state in // another convolver. // // Instead the distributive property of convolution is used to generate // the right output channel using information in the left output channel. // Using l and r to denote the left and right channel input signals, g the // impulse response, and * convolution, the convolution of the right // channel can be given by // // r * g = (l + (r - l)) * g // = l * g + (r - l) * g // // The left convolver continues to process the left channel l to produce // l * g. The right convolver processes the difference of input channel // signals r - l to produce (r - l) * g. The outputs of the two // convolvers are added to generate the right channel output r * g. // // The benefit of doing this is that the history of the r - l input for a // "speakers" up-mixed mono signal is zero, and so an empty convolver // already has exactly the right history for mixing the previous mono // signal with the new stereo signal. Difference }; void SetReverb(WebCore::Reverb* aReverb, uint32_t aImpulseChannelCount) override { mRemainingLeftOutput = INT32_MIN; mRemainingRightOutput = 0; mRemainingRightHistory = 0; // Assume for now that convolution of channel difference is not required. // Direct may change to Difference during processing. if (aReverb) { mRightConvolverMode = aImpulseChannelCount == 1 ? RightConvolverMode::Direct : RightConvolverMode::Always; } else { mRightConvolverMode = RightConvolverMode::Always; } mReverb.reset(aReverb); } void AllocateReverbInput(const AudioBlock& aInput, uint32_t aTotalChannelCount) { uint32_t inputChannelCount = aInput.ChannelCount(); MOZ_ASSERT(inputChannelCount <= aTotalChannelCount); mReverbInput.AllocateChannels(aTotalChannelCount); // Pre-multiply the input's volume for (uint32_t i = 0; i < inputChannelCount; ++i) { const float* src = static_cast(aInput.mChannelData[i]); float* dest = mReverbInput.ChannelFloatsForWrite(i); AudioBlockCopyChannelWithScale(src, aInput.mVolume, dest); } // Fill remaining channels with silence for (uint32_t i = inputChannelCount; i < aTotalChannelCount; ++i) { float* dest = mReverbInput.ChannelFloatsForWrite(i); std::fill_n(dest, WEBAUDIO_BLOCK_SIZE, 0.0f); } } void ProcessBlock(AudioNodeTrack* aTrack, GraphTime aFrom, const AudioBlock& aInput, AudioBlock* aOutput, bool* aFinished) override; bool IsActive() const override { return mRemainingLeftOutput != INT32_MIN; } size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const override { size_t amount = AudioNodeEngine::SizeOfExcludingThis(aMallocSizeOf); amount += mReverbInput.SizeOfExcludingThis(aMallocSizeOf, false); if (mReverb) { amount += mReverb->sizeOfIncludingThis(aMallocSizeOf); } return amount; } size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override { return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); } private: // Keeping mReverbInput across process calls avoids unnecessary reallocation. AudioBlock mReverbInput; UniquePtr mReverb; // Tracks samples of the tail remaining to be output. INT32_MIN is a // special value to indicate that the end of any previous tail has been // handled. int32_t mRemainingLeftOutput = INT32_MIN; // mRemainingRightOutput and mRemainingRightHistory are only used when // mRightOutputMode != Always. There is no special handling required at the // end of tail times and so INT32_MIN is not used. // mRemainingRightOutput tracks how much longer this node needs to continue // to produce a right output channel. int32_t mRemainingRightOutput = 0; // mRemainingRightHistory tracks how much silent input would be required to // drain the right convolver, which may sometimes be longer than the period // a right output channel is required. int32_t mRemainingRightHistory = 0; RightConvolverMode mRightConvolverMode = RightConvolverMode::Always; }; static void AddScaledLeftToRight(AudioBlock* aBlock, float aScale) { const float* left = static_cast(aBlock->mChannelData[0]); float* right = aBlock->ChannelFloatsForWrite(1); AudioBlockAddChannelWithScale(left, aScale, right); } void ConvolverNodeEngine::ProcessBlock(AudioNodeTrack* aTrack, GraphTime aFrom, const AudioBlock& aInput, AudioBlock* aOutput, bool* aFinished) { if (!mReverb) { aOutput->SetNull(WEBAUDIO_BLOCK_SIZE); return; } uint32_t inputChannelCount = aInput.ChannelCount(); if (aInput.IsNull()) { if (mRemainingLeftOutput > 0) { mRemainingLeftOutput -= WEBAUDIO_BLOCK_SIZE; AllocateReverbInput(aInput, 1); // floats for silence } else { if (mRemainingLeftOutput != INT32_MIN) { mRemainingLeftOutput = INT32_MIN; MOZ_ASSERT(mRemainingRightOutput <= 0); MOZ_ASSERT(mRemainingRightHistory <= 0); aTrack->ScheduleCheckForInactive(); RefPtr refchanged = new PlayingRefChanged(aTrack, PlayingRefChanged::RELEASE); aTrack->Graph()->DispatchToMainThreadStableState(refchanged.forget()); } aOutput->SetNull(WEBAUDIO_BLOCK_SIZE); return; } } else { if (mRemainingLeftOutput <= 0) { RefPtr refchanged = new PlayingRefChanged(aTrack, PlayingRefChanged::ADDREF); aTrack->Graph()->DispatchToMainThreadStableState(refchanged.forget()); } // Use mVolume as a flag to detect whether AllocateReverbInput() gets // called. mReverbInput.mVolume = 0.0f; // Special handling of input channel count changes is used when there is // only a single impulse response channel. See RightConvolverMode. if (mRightConvolverMode != RightConvolverMode::Always) { ChannelInterpretation channelInterpretation = aTrack->GetChannelInterpretation(); if (inputChannelCount == 2) { if (mRemainingRightHistory <= 0) { // Will start the second convolver. Choose to convolve the right // channel directly if there is no left tail to up-mix or up-mixing // is "discrete". mRightConvolverMode = (mRemainingLeftOutput <= 0 || channelInterpretation == ChannelInterpretation::Discrete) ? RightConvolverMode::Direct : RightConvolverMode::Difference; } // The extra WEBAUDIO_BLOCK_SIZE is subtracted below. mRemainingRightOutput = mReverb->impulseResponseLength() + WEBAUDIO_BLOCK_SIZE; mRemainingRightHistory = mRemainingRightOutput; if (mRightConvolverMode == RightConvolverMode::Difference) { AllocateReverbInput(aInput, 2); // Subtract left from right. AddScaledLeftToRight(&mReverbInput, -1.0f); } } else if (mRemainingRightHistory > 0) { // There is one channel of input, but a second convolver also // requires input. Up-mix appropriately for the second convolver. if ((mRightConvolverMode == RightConvolverMode::Difference) ^ (channelInterpretation == ChannelInterpretation::Discrete)) { MOZ_ASSERT( (mRightConvolverMode == RightConvolverMode::Difference && channelInterpretation == ChannelInterpretation::Speakers) || (mRightConvolverMode == RightConvolverMode::Direct && channelInterpretation == ChannelInterpretation::Discrete)); // The state is one of the following combinations: // 1) Difference and speakers. // Up-mixing gives r = l. // The input to the second convolver is r - l. // 2) Direct and discrete. // Up-mixing gives r = 0. // The input to the second convolver is r. // // In each case the input for the second convolver is silence, which // will drain the convolver. AllocateReverbInput(aInput, 2); } else { if (channelInterpretation == ChannelInterpretation::Discrete) { MOZ_ASSERT(mRightConvolverMode == RightConvolverMode::Difference); // channelInterpretation has changed since the second convolver // was added. "discrete" up-mixing of input would produce a // silent right channel r = 0, but the second convolver needs // r - l for RightConvolverMode::Difference. AllocateReverbInput(aInput, 2); AddScaledLeftToRight(&mReverbInput, -1.0f); } else { MOZ_ASSERT(channelInterpretation == ChannelInterpretation::Speakers); MOZ_ASSERT(mRightConvolverMode == RightConvolverMode::Direct); // The Reverb will essentially up-mix the single input channel by // feeding it into both convolvers. } // The second convolver does not have silent input, and so it will // not drain. It will need to continue processing up-mixed input // because the next input block may be stereo, which would be mixed // with the signal remaining in the convolvers. // The extra WEBAUDIO_BLOCK_SIZE is subtracted below. mRemainingRightHistory = mReverb->impulseResponseLength() + WEBAUDIO_BLOCK_SIZE; } } } if (mReverbInput.mVolume == 0.0f) { // not yet set if (aInput.mVolume != 1.0f) { AllocateReverbInput(aInput, inputChannelCount); // pre-multiply } else { mReverbInput = aInput; } } mRemainingLeftOutput = mReverb->impulseResponseLength(); MOZ_ASSERT(mRemainingLeftOutput > 0); } // "The ConvolverNode produces a mono output only in the single case where // there is a single input channel and a single-channel buffer." uint32_t outputChannelCount = 2; uint32_t reverbOutputChannelCount = 2; if (mRightConvolverMode != RightConvolverMode::Always) { // When the input changes from stereo to mono, the output continues to be // stereo for the length of the tail time, during which the two channels // may differ. if (mRemainingRightOutput > 0) { MOZ_ASSERT(mRemainingRightHistory > 0); mRemainingRightOutput -= WEBAUDIO_BLOCK_SIZE; } else { outputChannelCount = 1; } // The second convolver keeps processing until it drains. if (mRemainingRightHistory > 0) { mRemainingRightHistory -= WEBAUDIO_BLOCK_SIZE; } else { reverbOutputChannelCount = 1; } } // If there are two convolvers, then they each need an output buffer, even // if the second convolver is only processing to keep history of up-mixed // input. aOutput->AllocateChannels(reverbOutputChannelCount); mReverb->process(&mReverbInput, aOutput); if (mRightConvolverMode == RightConvolverMode::Difference && outputChannelCount == 2) { // Add left to right. AddScaledLeftToRight(aOutput, 1.0f); } else { // Trim if outputChannelCount < reverbOutputChannelCount aOutput->mChannelData.TruncateLength(outputChannelCount); } } ConvolverNode::ConvolverNode(AudioContext* aContext) : AudioNode(aContext, 2, ChannelCountMode::Clamped_max, ChannelInterpretation::Speakers), mNormalize(true) { ConvolverNodeEngine* engine = new ConvolverNodeEngine(this, mNormalize); mTrack = AudioNodeTrack::Create( aContext, engine, AudioNodeTrack::NO_TRACK_FLAGS, aContext->Graph()); } /* static */ already_AddRefed ConvolverNode::Create( JSContext* aCx, AudioContext& aAudioContext, const ConvolverOptions& aOptions, ErrorResult& aRv) { RefPtr audioNode = new ConvolverNode(&aAudioContext); audioNode->Initialize(aOptions, aRv); if (NS_WARN_IF(aRv.Failed())) { return nullptr; } // This must be done before setting the buffer. audioNode->SetNormalize(!aOptions.mDisableNormalization); if (aOptions.mBuffer.WasPassed()) { MOZ_ASSERT(aCx); audioNode->SetBuffer(aCx, aOptions.mBuffer.Value(), aRv); if (NS_WARN_IF(aRv.Failed())) { return nullptr; } } return audioNode.forget(); } size_t ConvolverNode::SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const { size_t amount = AudioNode::SizeOfExcludingThis(aMallocSizeOf); if (mBuffer) { // NB: mBuffer might be shared with the associated engine, by convention // the AudioNode will report. amount += mBuffer->SizeOfIncludingThis(aMallocSizeOf); } return amount; } size_t ConvolverNode::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); } JSObject* ConvolverNode::WrapObject(JSContext* aCx, JS::Handle aGivenProto) { return ConvolverNode_Binding::Wrap(aCx, this, aGivenProto); } void ConvolverNode::SetBuffer(JSContext* aCx, AudioBuffer* aBuffer, ErrorResult& aRv) { if (aBuffer) { switch (aBuffer->NumberOfChannels()) { case 1: case 2: case 4: // Supported number of channels break; default: aRv.ThrowNotSupportedError( nsPrintfCString("%u is not a supported number of channels", aBuffer->NumberOfChannels())); return; } } if (aBuffer && (aBuffer->SampleRate() != Context()->SampleRate())) { aRv.ThrowNotSupportedError(nsPrintfCString( "Buffer sample rate (%g) does not match AudioContext sample rate (%g)", aBuffer->SampleRate(), Context()->SampleRate())); return; } // Send the buffer to the track AudioNodeTrack* ns = mTrack; MOZ_ASSERT(ns, "Why don't we have a track here?"); if (aBuffer) { AudioChunk data = aBuffer->GetThreadSharedChannelsForRate(aCx); if (data.mBufferFormat == AUDIO_FORMAT_S16) { // Reverb expects data in float format. // Convert on the main thread so as to minimize allocations on the audio // thread. // Reverb will dispose of the buffer once initialized, so convert here // and leave the smaller arrays in the AudioBuffer. // There is currently no value in providing 16/32-byte aligned data // because PadAndMakeScaledDFT() will copy the data (without SIMD // instructions) to aligned arrays for the FFT. CheckedInt bufferSize(sizeof(float)); bufferSize *= data.mDuration; bufferSize *= data.ChannelCount(); RefPtr floatBuffer = SharedBuffer::Create(bufferSize, fallible); if (!floatBuffer) { aRv.Throw(NS_ERROR_OUT_OF_MEMORY); return; } auto floatData = static_cast(floatBuffer->Data()); for (size_t i = 0; i < data.ChannelCount(); ++i) { ConvertAudioSamples(data.ChannelData()[i], floatData, data.mDuration); data.mChannelData[i] = floatData; floatData += data.mDuration; } data.mBuffer = std::move(floatBuffer); data.mBufferFormat = AUDIO_FORMAT_FLOAT32; } else if (data.mBufferFormat == AUDIO_FORMAT_SILENCE) { // This is valid, but a signal convolved by a silent signal is silent, set // the reverb to nullptr and return. ns->SetReverb(nullptr, 0); mBuffer = aBuffer; return; } // Note about empirical tuning (this is copied from Blink) // The maximum FFT size affects reverb performance and accuracy. // If the reverb is single-threaded and processes entirely in the real-time // audio thread, it's important not to make this too high. In this case // 8192 is a good value. But, the Reverb object is multi-threaded, so we // want this as high as possible without losing too much accuracy. Very // large FFTs will have worse phase errors. Given these constraints 32768 is // a good compromise. const size_t MaxFFTSize = 32768; bool allocationFailure = false; UniquePtr reverb(new WebCore::Reverb( data, MaxFFTSize, !Context()->IsOffline(), mNormalize, aBuffer->SampleRate(), &allocationFailure)); if (!allocationFailure) { ns->SetReverb(reverb.release(), data.ChannelCount()); } else { aRv.Throw(NS_ERROR_OUT_OF_MEMORY); return; } } else { ns->SetReverb(nullptr, 0); } mBuffer = aBuffer; } void ConvolverNode::SetNormalize(bool aNormalize) { mNormalize = aNormalize; } } // namespace mozilla::dom