summaryrefslogtreecommitdiffstats
path: root/dom/media/webaudio/ConvolverNode.cpp
blob: 65562ae6d00790ab3df583d50f719953e45456d5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:set ts=2 sw=2 sts=2 et cindent: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "ConvolverNode.h"
#include "mozilla/dom/ConvolverNodeBinding.h"
#include "AlignmentUtils.h"
#include "AudioNodeEngine.h"
#include "AudioNodeTrack.h"
#include "blink/Reverb.h"
#include "PlayingRefChangeHandler.h"
#include "Tracing.h"

namespace mozilla::dom {

NS_IMPL_CYCLE_COLLECTION_INHERITED(ConvolverNode, AudioNode, mBuffer)

NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(ConvolverNode)
NS_INTERFACE_MAP_END_INHERITING(AudioNode)

NS_IMPL_ADDREF_INHERITED(ConvolverNode, AudioNode)
NS_IMPL_RELEASE_INHERITED(ConvolverNode, AudioNode)

class ConvolverNodeEngine final : public AudioNodeEngine {
  typedef PlayingRefChangeHandler PlayingRefChanged;

 public:
  ConvolverNodeEngine(AudioNode* aNode, bool aNormalize)
      : AudioNodeEngine(aNode) {}

  // Indicates how the right output channel is generated.
  enum class RightConvolverMode {
    // A right convolver is always used when there is more than one impulse
    // response channel.
    Always,
    // With a single response channel, the mode may be either Direct or
    // Difference.  The decision on which to use is made when stereo input is
    // received.  Once the right convolver is in use, convolver state is
    // suitable only for the selected mode, and so the mode cannot change
    // until the right convolver contains only silent history.
    //
    // With Direct mode, each convolver processes a corresponding channel.
    // This mode is selected when input is initially stereo or
    // channelInterpretation is "discrete" at the time or starting the right
    // convolver when input changes from non-silent mono to stereo.
    Direct,
    // Difference mode is selected if channelInterpretation is "speakers" at
    // the time starting the right convolver when the input changes from mono
    // to stereo.
    //
    // When non-silent input is initially mono, with a single response
    // channel, the right output channel is not produced until input becomes
    // stereo.  Only a single convolver is used for mono processing.  When
    // stereo input arrives after mono input, output must be as if the mono
    // signal remaining in the left convolver is up-mixed, but the right
    // convolver has not been initialized with the history of the mono input.
    // Copying the state of the left convolver into the right convolver is not
    // desirable, because there is considerable state to copy, and the
    // different convolvers are intended to process out of phase, which means
    // that state from one convolver would not directly map to state in
    // another convolver.
    //
    // Instead the distributive property of convolution is used to generate
    // the right output channel using information in the left output channel.
    // Using l and r to denote the left and right channel input signals, g the
    // impulse response, and * convolution, the convolution of the right
    // channel can be given by
    //
    //   r * g = (l + (r - l)) * g
    //         = l * g + (r - l) * g
    //
    // The left convolver continues to process the left channel l to produce
    // l * g.  The right convolver processes the difference of input channel
    // signals r - l to produce (r - l) * g.  The outputs of the two
    // convolvers are added to generate the right channel output r * g.
    //
    // The benefit of doing this is that the history of the r - l input for a
    // "speakers" up-mixed mono signal is zero, and so an empty convolver
    // already has exactly the right history for mixing the previous mono
    // signal with the new stereo signal.
    Difference
  };

  void SetReverb(WebCore::Reverb* aReverb,
                 uint32_t aImpulseChannelCount) override {
    mRemainingLeftOutput = INT32_MIN;
    mRemainingRightOutput = 0;
    mRemainingRightHistory = 0;

    // Assume for now that convolution of channel difference is not required.
    // Direct may change to Difference during processing.
    if (aReverb) {
      mRightConvolverMode = aImpulseChannelCount == 1
                                ? RightConvolverMode::Direct
                                : RightConvolverMode::Always;
    } else {
      mRightConvolverMode = RightConvolverMode::Always;
    }

    mReverb.reset(aReverb);
  }

  void AllocateReverbInput(const AudioBlock& aInput,
                           uint32_t aTotalChannelCount) {
    uint32_t inputChannelCount = aInput.ChannelCount();
    MOZ_ASSERT(inputChannelCount <= aTotalChannelCount);
    mReverbInput.AllocateChannels(aTotalChannelCount);
    // Pre-multiply the input's volume
    for (uint32_t i = 0; i < inputChannelCount; ++i) {
      const float* src = static_cast<const float*>(aInput.mChannelData[i]);
      float* dest = mReverbInput.ChannelFloatsForWrite(i);
      AudioBlockCopyChannelWithScale(src, aInput.mVolume, dest);
    }
    // Fill remaining channels with silence
    for (uint32_t i = inputChannelCount; i < aTotalChannelCount; ++i) {
      float* dest = mReverbInput.ChannelFloatsForWrite(i);
      std::fill_n(dest, WEBAUDIO_BLOCK_SIZE, 0.0f);
    }
  }

  void ProcessBlock(AudioNodeTrack* aTrack, GraphTime aFrom,
                    const AudioBlock& aInput, AudioBlock* aOutput,
                    bool* aFinished) override;

  bool IsActive() const override { return mRemainingLeftOutput != INT32_MIN; }

  size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const override {
    size_t amount = AudioNodeEngine::SizeOfExcludingThis(aMallocSizeOf);

    amount += mReverbInput.SizeOfExcludingThis(aMallocSizeOf, false);

    if (mReverb) {
      amount += mReverb->sizeOfIncludingThis(aMallocSizeOf);
    }

    return amount;
  }

  size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override {
    return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
  }

 private:
  // Keeping mReverbInput across process calls avoids unnecessary reallocation.
  AudioBlock mReverbInput;
  UniquePtr<WebCore::Reverb> mReverb;
  // Tracks samples of the tail remaining to be output.  INT32_MIN is a
  // special value to indicate that the end of any previous tail has been
  // handled.
  int32_t mRemainingLeftOutput = INT32_MIN;
  // mRemainingRightOutput and mRemainingRightHistory are only used when
  // mRightOutputMode != Always.  There is no special handling required at the
  // end of tail times and so INT32_MIN is not used.
  // mRemainingRightOutput tracks how much longer this node needs to continue
  // to produce a right output channel.
  int32_t mRemainingRightOutput = 0;
  // mRemainingRightHistory tracks how much silent input would be required to
  // drain the right convolver, which may sometimes be longer than the period
  // a right output channel is required.
  int32_t mRemainingRightHistory = 0;
  RightConvolverMode mRightConvolverMode = RightConvolverMode::Always;
};

static void AddScaledLeftToRight(AudioBlock* aBlock, float aScale) {
  const float* left = static_cast<const float*>(aBlock->mChannelData[0]);
  float* right = aBlock->ChannelFloatsForWrite(1);
  AudioBlockAddChannelWithScale(left, aScale, right);
}

void ConvolverNodeEngine::ProcessBlock(AudioNodeTrack* aTrack, GraphTime aFrom,
                                       const AudioBlock& aInput,
                                       AudioBlock* aOutput, bool* aFinished) {
  TRACE("ConvolverNodeEngine::ProcessBlock");
  if (!mReverb) {
    aOutput->SetNull(WEBAUDIO_BLOCK_SIZE);
    return;
  }

  uint32_t inputChannelCount = aInput.ChannelCount();
  if (aInput.IsNull()) {
    if (mRemainingLeftOutput > 0) {
      mRemainingLeftOutput -= WEBAUDIO_BLOCK_SIZE;
      AllocateReverbInput(aInput, 1);  // floats for silence
    } else {
      if (mRemainingLeftOutput != INT32_MIN) {
        mRemainingLeftOutput = INT32_MIN;
        MOZ_ASSERT(mRemainingRightOutput <= 0);
        MOZ_ASSERT(mRemainingRightHistory <= 0);
        aTrack->ScheduleCheckForInactive();
        RefPtr<PlayingRefChanged> refchanged =
            new PlayingRefChanged(aTrack, PlayingRefChanged::RELEASE);
        aTrack->Graph()->DispatchToMainThreadStableState(refchanged.forget());
      }
      aOutput->SetNull(WEBAUDIO_BLOCK_SIZE);
      return;
    }
  } else {
    if (mRemainingLeftOutput <= 0) {
      RefPtr<PlayingRefChanged> refchanged =
          new PlayingRefChanged(aTrack, PlayingRefChanged::ADDREF);
      aTrack->Graph()->DispatchToMainThreadStableState(refchanged.forget());
    }

    // Use mVolume as a flag to detect whether AllocateReverbInput() gets
    // called.
    mReverbInput.mVolume = 0.0f;

    // Special handling of input channel count changes is used when there is
    // only a single impulse response channel.  See RightConvolverMode.
    if (mRightConvolverMode != RightConvolverMode::Always) {
      ChannelInterpretation channelInterpretation =
          aTrack->GetChannelInterpretation();
      if (inputChannelCount == 2) {
        if (mRemainingRightHistory <= 0) {
          // Will start the second convolver.  Choose to convolve the right
          // channel directly if there is no left tail to up-mix or up-mixing
          // is "discrete".
          mRightConvolverMode =
              (mRemainingLeftOutput <= 0 ||
               channelInterpretation == ChannelInterpretation::Discrete)
                  ? RightConvolverMode::Direct
                  : RightConvolverMode::Difference;
        }
        // The extra WEBAUDIO_BLOCK_SIZE is subtracted below.
        mRemainingRightOutput =
            mReverb->impulseResponseLength() + WEBAUDIO_BLOCK_SIZE;
        mRemainingRightHistory = mRemainingRightOutput;
        if (mRightConvolverMode == RightConvolverMode::Difference) {
          AllocateReverbInput(aInput, 2);
          // Subtract left from right.
          AddScaledLeftToRight(&mReverbInput, -1.0f);
        }
      } else if (mRemainingRightHistory > 0) {
        // There is one channel of input, but a second convolver also
        // requires input.  Up-mix appropriately for the second convolver.
        if ((mRightConvolverMode == RightConvolverMode::Difference) ^
            (channelInterpretation == ChannelInterpretation::Discrete)) {
          MOZ_ASSERT(
              (mRightConvolverMode == RightConvolverMode::Difference &&
               channelInterpretation == ChannelInterpretation::Speakers) ||
              (mRightConvolverMode == RightConvolverMode::Direct &&
               channelInterpretation == ChannelInterpretation::Discrete));
          // The state is one of the following combinations:
          // 1) Difference and speakers.
          //    Up-mixing gives r = l.
          //    The input to the second convolver is r - l.
          // 2) Direct and discrete.
          //    Up-mixing gives r = 0.
          //    The input to the second convolver is r.
          //
          // In each case the input for the second convolver is silence, which
          // will drain the convolver.
          AllocateReverbInput(aInput, 2);
        } else {
          if (channelInterpretation == ChannelInterpretation::Discrete) {
            MOZ_ASSERT(mRightConvolverMode == RightConvolverMode::Difference);
            // channelInterpretation has changed since the second convolver
            // was added.  "discrete" up-mixing of input would produce a
            // silent right channel r = 0, but the second convolver needs
            // r - l for RightConvolverMode::Difference.
            AllocateReverbInput(aInput, 2);
            AddScaledLeftToRight(&mReverbInput, -1.0f);
          } else {
            MOZ_ASSERT(channelInterpretation ==
                       ChannelInterpretation::Speakers);
            MOZ_ASSERT(mRightConvolverMode == RightConvolverMode::Direct);
            // The Reverb will essentially up-mix the single input channel by
            // feeding it into both convolvers.
          }
          // The second convolver does not have silent input, and so it will
          // not drain.  It will need to continue processing up-mixed input
          // because the next input block may be stereo, which would be mixed
          // with the signal remaining in the convolvers.
          // The extra WEBAUDIO_BLOCK_SIZE is subtracted below.
          mRemainingRightHistory =
              mReverb->impulseResponseLength() + WEBAUDIO_BLOCK_SIZE;
        }
      }
    }

    if (mReverbInput.mVolume == 0.0f) {  // not yet set
      if (aInput.mVolume != 1.0f) {
        AllocateReverbInput(aInput, inputChannelCount);  // pre-multiply
      } else {
        mReverbInput = aInput;
      }
    }

    mRemainingLeftOutput = mReverb->impulseResponseLength();
    MOZ_ASSERT(mRemainingLeftOutput > 0);
  }

  // "The ConvolverNode produces a mono output only in the single case where
  // there is a single input channel and a single-channel buffer."
  uint32_t outputChannelCount = 2;
  uint32_t reverbOutputChannelCount = 2;
  if (mRightConvolverMode != RightConvolverMode::Always) {
    // When the input changes from stereo to mono, the output continues to be
    // stereo for the length of the tail time, during which the two channels
    // may differ.
    if (mRemainingRightOutput > 0) {
      MOZ_ASSERT(mRemainingRightHistory > 0);
      mRemainingRightOutput -= WEBAUDIO_BLOCK_SIZE;
    } else {
      outputChannelCount = 1;
    }
    // The second convolver keeps processing until it drains.
    if (mRemainingRightHistory > 0) {
      mRemainingRightHistory -= WEBAUDIO_BLOCK_SIZE;
    } else {
      reverbOutputChannelCount = 1;
    }
  }

  // If there are two convolvers, then they each need an output buffer, even
  // if the second convolver is only processing to keep history of up-mixed
  // input.
  aOutput->AllocateChannels(reverbOutputChannelCount);

  mReverb->process(&mReverbInput, aOutput);

  if (mRightConvolverMode == RightConvolverMode::Difference &&
      outputChannelCount == 2) {
    // Add left to right.
    AddScaledLeftToRight(aOutput, 1.0f);
  } else {
    // Trim if outputChannelCount < reverbOutputChannelCount
    aOutput->mChannelData.TruncateLength(outputChannelCount);
  }
}

ConvolverNode::ConvolverNode(AudioContext* aContext)
    : AudioNode(aContext, 2, ChannelCountMode::Clamped_max,
                ChannelInterpretation::Speakers),
      mNormalize(true) {
  ConvolverNodeEngine* engine = new ConvolverNodeEngine(this, mNormalize);
  mTrack = AudioNodeTrack::Create(
      aContext, engine, AudioNodeTrack::NO_TRACK_FLAGS, aContext->Graph());
}

/* static */
already_AddRefed<ConvolverNode> ConvolverNode::Create(
    JSContext* aCx, AudioContext& aAudioContext,
    const ConvolverOptions& aOptions, ErrorResult& aRv) {
  RefPtr<ConvolverNode> audioNode = new ConvolverNode(&aAudioContext);

  audioNode->Initialize(aOptions, aRv);
  if (NS_WARN_IF(aRv.Failed())) {
    return nullptr;
  }

  // This must be done before setting the buffer.
  audioNode->SetNormalize(!aOptions.mDisableNormalization);

  if (aOptions.mBuffer.WasPassed()) {
    MOZ_ASSERT(aCx);
    audioNode->SetBuffer(aCx, aOptions.mBuffer.Value(), aRv);
    if (NS_WARN_IF(aRv.Failed())) {
      return nullptr;
    }
  }

  return audioNode.forget();
}

size_t ConvolverNode::SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const {
  size_t amount = AudioNode::SizeOfExcludingThis(aMallocSizeOf);
  if (mBuffer) {
    // NB: mBuffer might be shared with the associated engine, by convention
    //     the AudioNode will report.
    amount += mBuffer->SizeOfIncludingThis(aMallocSizeOf);
  }
  return amount;
}

size_t ConvolverNode::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
  return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
}

JSObject* ConvolverNode::WrapObject(JSContext* aCx,
                                    JS::Handle<JSObject*> aGivenProto) {
  return ConvolverNode_Binding::Wrap(aCx, this, aGivenProto);
}

void ConvolverNode::SetBuffer(JSContext* aCx, AudioBuffer* aBuffer,
                              ErrorResult& aRv) {
  if (aBuffer) {
    switch (aBuffer->NumberOfChannels()) {
      case 1:
      case 2:
      case 4:
        // Supported number of channels
        break;
      default:
        aRv.ThrowNotSupportedError(
            nsPrintfCString("%u is not a supported number of channels",
                            aBuffer->NumberOfChannels()));
        return;
    }
  }

  if (aBuffer && (aBuffer->SampleRate() != Context()->SampleRate())) {
    aRv.ThrowNotSupportedError(nsPrintfCString(
        "Buffer sample rate (%g) does not match AudioContext sample rate (%g)",
        aBuffer->SampleRate(), Context()->SampleRate()));
    return;
  }

  // Send the buffer to the track
  AudioNodeTrack* ns = mTrack;
  MOZ_ASSERT(ns, "Why don't we have a track here?");
  if (aBuffer) {
    AudioChunk data = aBuffer->GetThreadSharedChannelsForRate(aCx);
    if (data.mBufferFormat == AUDIO_FORMAT_S16) {
      // Reverb expects data in float format.
      // Convert on the main thread so as to minimize allocations on the audio
      // thread.
      // Reverb will dispose of the buffer once initialized, so convert here
      // and leave the smaller arrays in the AudioBuffer.
      // There is currently no value in providing 16/32-byte aligned data
      // because PadAndMakeScaledDFT() will copy the data (without SIMD
      // instructions) to aligned arrays for the FFT.
      CheckedInt<size_t> bufferSize(sizeof(float));
      bufferSize *= data.mDuration;
      bufferSize *= data.ChannelCount();
      RefPtr<SharedBuffer> floatBuffer =
          SharedBuffer::Create(bufferSize, fallible);
      if (!floatBuffer) {
        aRv.Throw(NS_ERROR_OUT_OF_MEMORY);
        return;
      }
      auto floatData = static_cast<float*>(floatBuffer->Data());
      for (size_t i = 0; i < data.ChannelCount(); ++i) {
        ConvertAudioSamples(data.ChannelData<int16_t>()[i], floatData,
                            data.mDuration);
        data.mChannelData[i] = floatData;
        floatData += data.mDuration;
      }
      data.mBuffer = std::move(floatBuffer);
      data.mBufferFormat = AUDIO_FORMAT_FLOAT32;
    } else if (data.mBufferFormat == AUDIO_FORMAT_SILENCE) {
      // This is valid, but a signal convolved by a silent signal is silent, set
      // the reverb to nullptr and return.
      ns->SetReverb(nullptr, 0);
      mBuffer = aBuffer;
      return;
    }

    // Note about empirical tuning (this is copied from Blink)
    // The maximum FFT size affects reverb performance and accuracy.
    // If the reverb is single-threaded and processes entirely in the real-time
    // audio thread, it's important not to make this too high.  In this case
    // 8192 is a good value. But, the Reverb object is multi-threaded, so we
    // want this as high as possible without losing too much accuracy. Very
    // large FFTs will have worse phase errors. Given these constraints 32768 is
    // a good compromise.
    const size_t MaxFFTSize = 32768;

    bool allocationFailure = false;
    UniquePtr<WebCore::Reverb> reverb(new WebCore::Reverb(
        data, MaxFFTSize, !Context()->IsOffline(), mNormalize,
        aBuffer->SampleRate(), &allocationFailure));
    if (!allocationFailure) {
      ns->SetReverb(reverb.release(), data.ChannelCount());
    } else {
      aRv.Throw(NS_ERROR_OUT_OF_MEMORY);
      return;
    }
  } else {
    ns->SetReverb(nullptr, 0);
  }
  mBuffer = aBuffer;
}

void ConvolverNode::SetNormalize(bool aNormalize) { mNormalize = aNormalize; }

}  // namespace mozilla::dom