firefox/dom/media/webaudio/blink/ReverbConvolver.cpp

/*
 * Copyright (C) 2010 Google Inc. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1.  Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 * 2.  Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
 *     its contributors may be used to endorse or promote products derived
 *     from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "ReverbConvolver.h"
#include "ReverbConvolverStage.h"

using namespace mozilla;

namespace WebCore {

const int InputBufferSize = 8 * 16384;

// We only process the leading portion of the impulse response in the real-time
// thread.  We don't exceed this length. It turns out then, that the background
// thread has about 278msec of scheduling slop. Empirically, this has been found
// to be a good compromise between giving enough time for scheduling slop, while
// still minimizing the amount of processing done in the primary (high-priority)
// thread. This was found to be a good value on Mac OS X, and may work well on
// other platforms as well, assuming the very rough scheduling latencies are
// similar on these time-scales.  Of course, this code may need to be tuned for
// individual platforms if this assumption is found to be incorrect.
const size_t RealtimeFrameLimit = 8192 + 4096  // ~278msec @ 44.1KHz
                                  - WEBAUDIO_BLOCK_SIZE;
// First stage will have size MinFFTSize - successive stages will double in
// size each time until we hit the maximum size.
const size_t MinFFTSize = 256;
// If we are using background threads then don't exceed this FFT size for the
// stages which run in the real-time thread.  This avoids having only one or
// two large stages (size 16384 or so) at the end which take a lot of time
// every several processing slices.  This way we amortize the cost over more
// processing slices.
const size_t MaxRealtimeFFTSize = 4096;

ReverbConvolver::ReverbConvolver(const float* impulseResponseData,
                                 size_t impulseResponseLength,
                                 size_t maxFFTSize, size_t convolverRenderPhase,
                                 bool useBackgroundThreads,
                                 bool* aAllocationFailure)
    : m_impulseResponseLength(impulseResponseLength),
      m_inputBuffer(InputBufferSize),
      m_backgroundThread("ConvolverWorker"),
      m_backgroundThreadMonitor("ConvolverMonitor"),
      m_useBackgroundThreads(useBackgroundThreads),
      m_wantsToExit(false),
      m_moreInputBuffered(false) {
  *aAllocationFailure = !m_accumulationBuffer.allocate(impulseResponseLength +
                                                       WEBAUDIO_BLOCK_SIZE);
  if (*aAllocationFailure) {
    return;
  }
  // For the moment, a good way to know if we have real-time constraint is to
  // check if we're using background threads. Otherwise, assume we're being run
  // from a command-line tool.
  bool hasRealtimeConstraint = useBackgroundThreads;

  const float* response = impulseResponseData;
  size_t totalResponseLength = impulseResponseLength;

  // The total latency is zero because the first FFT stage is small enough
  // to return output in the first block.
  size_t reverbTotalLatency = 0;

  size_t stageOffset = 0;
  size_t stagePhase = 0;
  size_t fftSize = MinFFTSize;
  while (stageOffset < totalResponseLength) {
    size_t stageSize = fftSize / 2;

    // For the last stage, it's possible that stageOffset is such that we're
    // straddling the end of the impulse response buffer (if we use stageSize),
    // so reduce the last stage's length...
    if (stageSize + stageOffset > totalResponseLength) {
      stageSize = totalResponseLength - stageOffset;
      // Use smallest FFT that is large enough to cover the last stage.
      fftSize = MinFFTSize;
      while (stageSize * 2 > fftSize) {
        fftSize *= 2;
      }
    }

    // This "staggers" the time when each FFT happens so they don't all happen
    // at the same time
    int renderPhase = convolverRenderPhase + stagePhase;

    UniquePtr<ReverbConvolverStage> stage(new ReverbConvolverStage(
        response, totalResponseLength, reverbTotalLatency, stageOffset,
        stageSize, fftSize, renderPhase, &m_accumulationBuffer));

    bool isBackgroundStage = false;

    if (this->useBackgroundThreads() && stageOffset > RealtimeFrameLimit) {
      m_backgroundStages.AppendElement(std::move(stage));
      isBackgroundStage = true;
    } else
      m_stages.AppendElement(std::move(stage));

    // Figure out next FFT size
    fftSize *= 2;

    stageOffset += stageSize;

    if (hasRealtimeConstraint && !isBackgroundStage &&
        fftSize > MaxRealtimeFFTSize) {
      fftSize = MaxRealtimeFFTSize;
      // Custom phase positions for all but the first of the realtime
      // stages of largest size.  These spread out the work of the
      // larger realtime stages.  None of the FFTs of size 1024, 2048 or
      // 4096 are performed when processing the same block.  The first
      // MaxRealtimeFFTSize = 4096 stage, at the end of the doubling,
      // performs its FFT at block 7.  The FFTs of size 2048 are
      // performed in blocks 3 + 8 * n and size 1024 at 1 + 4 * n.
      const uint32_t phaseLookup[] = {14, 0, 10, 4};
      stagePhase = WEBAUDIO_BLOCK_SIZE *
                   phaseLookup[m_stages.Length() % std::size(phaseLookup)];
    } else if (fftSize > maxFFTSize) {
      fftSize = maxFFTSize;
      // A prime offset spreads out FFTs in a way that all
      // available phase positions will be used if there are sufficient
      // stages.
      stagePhase += 5 * WEBAUDIO_BLOCK_SIZE;
    } else if (stageSize > WEBAUDIO_BLOCK_SIZE) {
      // As the stages are doubling in size, the next FFT will occur
      // mid-way between FFTs for this stage.
      stagePhase = stageSize - WEBAUDIO_BLOCK_SIZE;
    }
  }

  // Start up background thread
  // FIXME: would be better to up the thread priority here.  It doesn't need to
  // be real-time, but higher than the default...
  if (this->useBackgroundThreads() && m_backgroundStages.Length() > 0) {
    if (!m_backgroundThread.Start()) {
      NS_WARNING("Cannot start convolver thread.");
      return;
    }
    m_backgroundThread.message_loop()->PostTask(NewNonOwningRunnableMethod(
        "WebCore::ReverbConvolver::backgroundThreadEntry", this,
        &ReverbConvolver::backgroundThreadEntry));
  }
}

ReverbConvolver::~ReverbConvolver() {
  // Wait for background thread to stop
  if (useBackgroundThreads() && m_backgroundThread.IsRunning()) {
    m_wantsToExit = true;

    // Wake up thread so it can return
    {
      MonitorAutoLock locker(m_backgroundThreadMonitor);
      m_moreInputBuffered = true;
      m_backgroundThreadMonitor.Notify();
    }

    m_backgroundThread.Stop();
  }
}

size_t ReverbConvolver::sizeOfIncludingThis(
    mozilla::MallocSizeOf aMallocSizeOf) const {
  size_t amount = aMallocSizeOf(this);
  amount += m_stages.ShallowSizeOfExcludingThis(aMallocSizeOf);
  for (size_t i = 0; i < m_stages.Length(); i++) {
    if (m_stages[i]) {
      amount += m_stages[i]->sizeOfIncludingThis(aMallocSizeOf);
    }
  }

  amount += m_backgroundStages.ShallowSizeOfExcludingThis(aMallocSizeOf);
  for (size_t i = 0; i < m_backgroundStages.Length(); i++) {
    if (m_backgroundStages[i]) {
      amount += m_backgroundStages[i]->sizeOfIncludingThis(aMallocSizeOf);
    }
  }

  // NB: The buffer sizes are static, so even though they might be accessed
  //     in another thread it's safe to measure them.
  amount += m_accumulationBuffer.sizeOfExcludingThis(aMallocSizeOf);
  amount += m_inputBuffer.sizeOfExcludingThis(aMallocSizeOf);

  // Possible future measurements:
  // - m_backgroundThread
  // - m_backgroundThreadMonitor
  return amount;
}

void ReverbConvolver::backgroundThreadEntry() {
  while (!m_wantsToExit) {
    // Wait for realtime thread to give us more input
    m_moreInputBuffered = false;
    {
      MonitorAutoLock locker(m_backgroundThreadMonitor);
      while (!m_moreInputBuffered && !m_wantsToExit)
        m_backgroundThreadMonitor.Wait();
    }

    // Process all of the stages until their read indices reach the input
    // buffer's write index
    int writeIndex = m_inputBuffer.writeIndex();

    // Even though it doesn't seem like every stage needs to maintain its own
    // version of readIndex we do this in case we want to run in more than one
    // background thread.
    int readIndex;

    while ((readIndex = m_backgroundStages[0]->inputReadIndex()) !=
           writeIndex) {  // FIXME: do better to detect buffer overrun...
      // Accumulate contributions from each stage
      for (size_t i = 0; i < m_backgroundStages.Length(); ++i)
        m_backgroundStages[i]->processInBackground(this);
    }
  }
}

void ReverbConvolver::process(const float* sourceChannelData,
                              float* destinationChannelData) {
  const float* source = sourceChannelData;
  float* destination = destinationChannelData;
  bool isDataSafe = source && destination;
  MOZ_ASSERT(isDataSafe);
  if (!isDataSafe) return;

  // Feed input buffer (read by all threads)
  m_inputBuffer.write(source, WEBAUDIO_BLOCK_SIZE);

  // Accumulate contributions from each stage
  for (size_t i = 0; i < m_stages.Length(); ++i) m_stages[i]->process(source);

  // Finally read from accumulation buffer
  m_accumulationBuffer.readAndClear(destination, WEBAUDIO_BLOCK_SIZE);

  // Now that we've buffered more input, wake up our background thread.

  // Not using a MonitorAutoLock looks strange, but we use a TryLock() instead
  // because this is run on the real-time thread where it is a disaster for the
  // lock to be contended (causes audio glitching).  It's OK if we fail to
  // signal from time to time, since we'll get to it the next time we're called.
  // We're called repeatedly and frequently (around every 3ms).  The background
  // thread is processing well into the future and has a considerable amount of
  // leeway here...
  if (m_backgroundThreadMonitor.TryLock()) {
    m_moreInputBuffered = true;
    m_backgroundThreadMonitor.Notify();
    m_backgroundThreadMonitor.Unlock();
  }
}

}  // namespace WebCore