369 lines
14 KiB
C++
369 lines
14 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
|
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#ifndef DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_
|
|
#define DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_
|
|
|
|
#include "AudioRingBuffer.h"
|
|
#include "AudioSegment.h"
|
|
#include "TimeUnits.h"
|
|
#include "WavDumper.h"
|
|
|
|
#include <speex/speex_resampler.h>
|
|
|
|
namespace mozilla {
|
|
|
|
const uint32_t STEREO = 2;
|
|
|
|
/**
|
|
* DynamicResampler allows updating on the fly the output sample rate and the
|
|
* number of channels. In addition to that, it maintains an internal buffer for
|
|
* the input data and allows pre-buffering as well. The Resample() method
|
|
* strives to provide the requested number of output frames by using the input
|
|
* data including any pre-buffering. If there are fewer frames in the internal
|
|
* buffer than is requested, the internal buffer is padded with enough silence
|
|
* to allow the requested to be resampled and returned.
|
|
*
|
|
* Input data buffering makes use of the AudioRingBuffer. The capacity of the
|
|
* buffer is initially 100ms of audio and it is pre-allocated during
|
|
* SetSampleFormat(). Should the input data grow beyond that, the input buffer
|
|
* is re-allocated on the fly. In addition to that, due to special feature of
|
|
* AudioRingBuffer, no extra copies take place when the input data is fed to the
|
|
* resampler.
|
|
*
|
|
* The sample format must be set before using any method.
|
|
*
|
|
* The DynamicResampler is not thread-safe, so all the methods appart from the
|
|
* constructor must be called on the same thread.
|
|
*/
|
|
class DynamicResampler final {
|
|
public:
|
|
/**
|
|
* Provide the initial input and output rate and the amount of pre-buffering.
|
|
* The channel count will be set to stereo. Memory allocation will take
|
|
* place. The input buffer is non-interleaved.
|
|
*/
|
|
DynamicResampler(uint32_t aInRate, uint32_t aOutRate,
|
|
uint32_t aInputPreBufferFrameCount = 0);
|
|
~DynamicResampler();
|
|
|
|
/**
|
|
* Set the sample format type to float or short.
|
|
*/
|
|
void SetSampleFormat(AudioSampleFormat aFormat);
|
|
uint32_t GetInRate() const { return mInRate; }
|
|
uint32_t GetChannels() const { return mChannels; }
|
|
|
|
/**
|
|
* Append `aInFrames` number of frames from `aInBuffer` to the internal input
|
|
* buffer. Memory copy/move takes place.
|
|
*/
|
|
void AppendInput(Span<const float* const> aInBuffer, uint32_t aInFrames);
|
|
void AppendInput(Span<const int16_t* const> aInBuffer, uint32_t aInFrames);
|
|
/**
|
|
* Append `aInFrames` number of frames of silence to the internal input
|
|
* buffer. Memory copy/move takes place.
|
|
*/
|
|
void AppendInputSilence(const uint32_t aInFrames);
|
|
/**
|
|
* Return the number of frames the internal input buffer can store.
|
|
*/
|
|
uint32_t InFramesBufferSize() const;
|
|
/**
|
|
* Return the number of frames stored in the internal input buffer.
|
|
*/
|
|
uint32_t InFramesBuffered(uint32_t aChannelIndex) const;
|
|
|
|
/**
|
|
* Prepends existing input data with a silent pre-buffer if not already done.
|
|
* Data will be prepended so that after resampling aDuration of data,
|
|
* the buffering level will be as close as possible to
|
|
* mInputPreBufferFrameCount, which is the desired buffering level.
|
|
*/
|
|
void EnsurePreBuffer(media::TimeUnit aDuration);
|
|
|
|
/**
|
|
* Set the number of frames that should be used for input pre-buffering.
|
|
*/
|
|
void SetInputPreBufferFrameCount(uint32_t aInputPreBufferFrameCount);
|
|
|
|
/*
|
|
* Resample as much frames as needed from the internal input buffer to the
|
|
* `aOutBuffer` in order to provide all `aOutFrames`.
|
|
*
|
|
* On first call, prepends the input buffer with silence so that after
|
|
* resampling aOutFrames frames of data, the input buffer holds data as close
|
|
* as possible to the configured pre-buffer size.
|
|
*
|
|
* If there are not enough input frames to provide the requested output
|
|
* frames, the input buffer is padded with enough silence to allow the
|
|
* requested frames to be resampled, and the pre-buffer is reset so that the
|
|
* next call will be treated as the first.
|
|
*
|
|
* Returns true if the internal input buffer underran and had to be padded
|
|
* with silence, otherwise false.
|
|
*/
|
|
bool Resample(float* aOutBuffer, uint32_t aOutFrames, uint32_t aChannelIndex);
|
|
bool Resample(int16_t* aOutBuffer, uint32_t aOutFrames,
|
|
uint32_t aChannelIndex);
|
|
|
|
/**
|
|
* Update the output rate or/and the channel count. If a value is not updated
|
|
* compared to the current one nothing happens. Changing the `aInRate`
|
|
* results in recalculation in the resampler. Changing `aChannels` results in
|
|
* the reallocation of the internal input buffer with the exception of
|
|
* changes between mono to stereo and vice versa where no reallocation takes
|
|
* place. A stereo internal input buffer is always maintained even if the
|
|
* sound is mono.
|
|
*/
|
|
void UpdateResampler(uint32_t aInRate, uint32_t aChannels);
|
|
|
|
private:
|
|
template <typename T>
|
|
void AppendInputInternal(Span<const T* const>& aInBuffer,
|
|
uint32_t aInFrames) {
|
|
MOZ_ASSERT(aInBuffer.Length() == (uint32_t)mChannels);
|
|
for (uint32_t i = 0; i < mChannels; ++i) {
|
|
PushInFrames(aInBuffer[i], aInFrames, i);
|
|
}
|
|
}
|
|
|
|
void ResampleInternal(const float* aInBuffer, uint32_t* aInFrames,
|
|
float* aOutBuffer, uint32_t* aOutFrames,
|
|
uint32_t aChannelIndex);
|
|
void ResampleInternal(const int16_t* aInBuffer, uint32_t* aInFrames,
|
|
int16_t* aOutBuffer, uint32_t* aOutFrames,
|
|
uint32_t aChannelIndex);
|
|
|
|
template <typename T>
|
|
bool ResampleInternal(T* aOutBuffer, uint32_t aOutFrames,
|
|
uint32_t aChannelIndex) {
|
|
MOZ_ASSERT(mInRate);
|
|
MOZ_ASSERT(mOutRate);
|
|
MOZ_ASSERT(mChannels);
|
|
MOZ_ASSERT(aChannelIndex < mChannels);
|
|
MOZ_ASSERT(aChannelIndex < mInternalInBuffer.Length());
|
|
MOZ_ASSERT(aOutFrames);
|
|
|
|
uint32_t outFramesNeeded = aOutFrames;
|
|
T* nextOutFrame = aOutBuffer;
|
|
if (mInRate == mOutRate) {
|
|
if (!mResamplerIsBypassed) {
|
|
uint32_t latency = speex_resampler_get_input_latency(mResampler);
|
|
mInternalInBuffer[aChannelIndex].ReadNoCopy(
|
|
[&](const Span<const T>& aInBuffer) -> uint32_t {
|
|
// Although unlikely with the sample rates used with this class,
|
|
// the resampler input latency may temporarily be higher than
|
|
// indicated, after a change in resampling rate that reduces the
|
|
// indicated latency. The resampler's "magic" samples cause
|
|
// this. All frames in the resampler are extracted when
|
|
// `latency` output frames have been extracted.
|
|
uint32_t outFramesResampled = std::min(outFramesNeeded, latency);
|
|
uint32_t inFrames = aInBuffer.Length();
|
|
ResampleInternal(aInBuffer.Elements(), &inFrames, nextOutFrame,
|
|
&outFramesResampled, aChannelIndex);
|
|
nextOutFrame += outFramesResampled;
|
|
outFramesNeeded -= outFramesResampled;
|
|
if (outFramesResampled == latency) {
|
|
mResamplerIsBypassed = true;
|
|
// The last `latency` frames of input to the resampler will not
|
|
// be extracted from the resampler. Leave them in
|
|
// mInternalInBuffer to be copied directly to nextOutFrame.
|
|
MOZ_ASSERT(inFrames >= latency);
|
|
return inFrames - latency;
|
|
}
|
|
return inFrames;
|
|
});
|
|
}
|
|
bool underrun = false;
|
|
if (uint32_t buffered = mInternalInBuffer[aChannelIndex].AvailableRead();
|
|
buffered < outFramesNeeded) {
|
|
underrun = true;
|
|
mIsPreBufferSet = false;
|
|
mInternalInBuffer[aChannelIndex].WriteSilence(outFramesNeeded -
|
|
buffered);
|
|
}
|
|
DebugOnly<uint32_t> numFramesRead = mInternalInBuffer[aChannelIndex].Read(
|
|
Span(nextOutFrame, outFramesNeeded));
|
|
MOZ_ASSERT(numFramesRead == outFramesNeeded);
|
|
// Workaround to avoid discontinuity when the speex resampler operates
|
|
// again. Feed it with the last 20 frames to warm up the internal memory
|
|
// of the resampler and then skip memory equals to resampler's input
|
|
// latency.
|
|
mInputTail[aChannelIndex].StoreTail<T>(aOutBuffer, aOutFrames);
|
|
if (aChannelIndex == 0 && !mIsWarmingUp) {
|
|
mInputStreamFile.Write(nextOutFrame, outFramesNeeded);
|
|
mOutputStreamFile.Write(nextOutFrame, outFramesNeeded);
|
|
}
|
|
return underrun;
|
|
}
|
|
|
|
auto resample = [&](const T* aInBuffer, uint32_t aInLength) -> uint32_t {
|
|
uint32_t outFramesResampled = outFramesNeeded;
|
|
uint32_t inFrames = aInLength;
|
|
ResampleInternal(aInBuffer, &inFrames, nextOutFrame, &outFramesResampled,
|
|
aChannelIndex);
|
|
nextOutFrame += outFramesResampled;
|
|
outFramesNeeded -= outFramesResampled;
|
|
mInputTail[aChannelIndex].StoreTail<T>(aInBuffer, inFrames);
|
|
return inFrames;
|
|
};
|
|
|
|
MOZ_ASSERT(!mResamplerIsBypassed);
|
|
mInternalInBuffer[aChannelIndex].ReadNoCopy(
|
|
[&](const Span<const T>& aInBuffer) -> uint32_t {
|
|
if (!outFramesNeeded) {
|
|
return 0;
|
|
}
|
|
return resample(aInBuffer.Elements(), aInBuffer.Length());
|
|
});
|
|
|
|
if (outFramesNeeded == 0) {
|
|
return false;
|
|
}
|
|
|
|
while (outFramesNeeded > 0) {
|
|
MOZ_ASSERT(mInternalInBuffer[aChannelIndex].AvailableRead() == 0);
|
|
// Round up.
|
|
uint32_t totalInFramesNeeded =
|
|
((CheckedUint32(outFramesNeeded) * mInRate + mOutRate - 1) / mOutRate)
|
|
.value();
|
|
resample(nullptr, totalInFramesNeeded);
|
|
}
|
|
mIsPreBufferSet = false;
|
|
return true;
|
|
}
|
|
|
|
template <typename T>
|
|
void PushInFrames(const T* aInBuffer, const uint32_t aInFrames,
|
|
uint32_t aChannelIndex) {
|
|
MOZ_ASSERT(aInBuffer);
|
|
MOZ_ASSERT(aInFrames);
|
|
MOZ_ASSERT(mChannels);
|
|
MOZ_ASSERT(aChannelIndex < mChannels);
|
|
MOZ_ASSERT(aChannelIndex < mInternalInBuffer.Length());
|
|
EnsureInputBufferSizeInFrames(
|
|
mInternalInBuffer[aChannelIndex].AvailableRead() + aInFrames);
|
|
mInternalInBuffer[aChannelIndex].Write(Span(aInBuffer, aInFrames));
|
|
}
|
|
|
|
void WarmUpResampler(bool aSkipLatency);
|
|
|
|
bool EnsureInputBufferSizeInFrames(uint32_t aSizeInFrames) {
|
|
uint32_t sampleSize = 0;
|
|
if (mSampleFormat == AUDIO_FORMAT_FLOAT32) {
|
|
sampleSize = sizeof(float);
|
|
} else if (mSampleFormat == AUDIO_FORMAT_S16) {
|
|
sampleSize = sizeof(short);
|
|
}
|
|
|
|
if (sampleSize == 0) {
|
|
// No sample format set, we wouldn't know how many bytes to allocate.
|
|
return true;
|
|
}
|
|
|
|
uint32_t sizeInFrames = InFramesBufferSize();
|
|
if (aSizeInFrames <= sizeInFrames) {
|
|
// Buffer size is sufficient.
|
|
return true; // no reallocation necessary
|
|
}
|
|
|
|
// 5 second cap.
|
|
const uint32_t cap = 5 * mInRate;
|
|
if (sizeInFrames >= cap) {
|
|
// Already at the cap.
|
|
return false;
|
|
}
|
|
|
|
// As a backoff strategy, at least double the previous size.
|
|
sizeInFrames *= 2;
|
|
|
|
if (aSizeInFrames > sizeInFrames) {
|
|
// A larger buffer than the normal backoff strategy provides is needed, or
|
|
// this is the first time setting the buffer size. Add another 50ms, as
|
|
// some jitter is expected.
|
|
sizeInFrames = aSizeInFrames + mInRate / 20;
|
|
}
|
|
|
|
// mInputPreBufferFrameCount is an indication of the desired average
|
|
// buffering. Provide for at least twice this.
|
|
sizeInFrames = std::max(sizeInFrames, mInputPreBufferFrameCount * 2);
|
|
|
|
sizeInFrames = std::min(cap, sizeInFrames);
|
|
|
|
bool success = true;
|
|
for (auto& b : mInternalInBuffer) {
|
|
success = success && b.EnsureLengthBytes(sampleSize * sizeInFrames);
|
|
}
|
|
|
|
if (success) {
|
|
// All buffers have the new size.
|
|
return true;
|
|
}
|
|
|
|
// Allocating an input buffer failed. We stick with the old buffer size.
|
|
NS_WARNING(nsPrintfCString("Failed to allocate a buffer of %u bytes (%u "
|
|
"frames). Expect glitches.",
|
|
sampleSize * sizeInFrames, sizeInFrames)
|
|
.get());
|
|
return false;
|
|
}
|
|
|
|
public:
|
|
const uint32_t mOutRate;
|
|
|
|
private:
|
|
bool mIsPreBufferSet = false;
|
|
bool mIsWarmingUp = false;
|
|
// The resampler can be bypassed when the input and output rates match and
|
|
// any frames buffered in the resampler have been extracted. This initial
|
|
// value is reset on construction by UpdateResampler() if the rates differ.
|
|
bool mResamplerIsBypassed = true;
|
|
uint32_t mInputPreBufferFrameCount;
|
|
uint32_t mChannels = 0;
|
|
uint32_t mInRate;
|
|
|
|
AutoTArray<AudioRingBuffer, STEREO> mInternalInBuffer;
|
|
|
|
SpeexResamplerState* mResampler = nullptr;
|
|
AudioSampleFormat mSampleFormat = AUDIO_FORMAT_SILENCE;
|
|
|
|
class TailBuffer {
|
|
public:
|
|
template <typename T>
|
|
T* Buffer() {
|
|
return reinterpret_cast<T*>(mBuffer);
|
|
}
|
|
/* Store the MAXSIZE last elements of the buffer. */
|
|
template <typename T>
|
|
void StoreTail(const Span<const T>& aInBuffer) {
|
|
StoreTail(aInBuffer.data(), aInBuffer.size());
|
|
}
|
|
template <typename T>
|
|
void StoreTail(const T* aInBuffer, uint32_t aInFrames) {
|
|
const T* inBuffer = aInBuffer;
|
|
mSize = std::min(aInFrames, MAXSIZE);
|
|
if (inBuffer) {
|
|
PodCopy(Buffer<T>(), inBuffer + aInFrames - mSize, mSize);
|
|
} else {
|
|
std::fill_n(Buffer<T>(), mSize, static_cast<T>(0));
|
|
}
|
|
}
|
|
uint32_t Length() { return mSize; }
|
|
static constexpr uint32_t MAXSIZE = 20;
|
|
|
|
private:
|
|
float mBuffer[MAXSIZE] = {};
|
|
uint32_t mSize = 0;
|
|
};
|
|
AutoTArray<TailBuffer, STEREO> mInputTail;
|
|
|
|
WavDumper mInputStreamFile;
|
|
WavDumper mOutputStreamFile;
|
|
};
|
|
|
|
} // namespace mozilla
|
|
|
|
#endif // DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_
|