diff options
Diffstat (limited to 'dom/media/driftcontrol/DynamicResampler.h')
-rw-r--r-- | dom/media/driftcontrol/DynamicResampler.h | 350 |
1 files changed, 350 insertions, 0 deletions
diff --git a/dom/media/driftcontrol/DynamicResampler.h b/dom/media/driftcontrol/DynamicResampler.h new file mode 100644 index 0000000000..c1b9000aa0 --- /dev/null +++ b/dom/media/driftcontrol/DynamicResampler.h @@ -0,0 +1,350 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_ +#define DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_ + +#include "AudioRingBuffer.h" +#include "AudioSegment.h" +#include "TimeUnits.h" +#include "WavDumper.h" + +#include <speex/speex_resampler.h> + +namespace mozilla { + +const uint32_t STEREO = 2; + +/** + * DynamicResampler allows updating on the fly the output sample rate and the + * number of channels. In addition to that, it maintains an internal buffer for + * the input data and allows pre-buffering as well. The Resample() method + * strives to provide the requested number of output frames by using the input + * data including any pre-buffering. If there are fewer frames in the internal + * buffer than is requested, the internal buffer is padded with enough silence + * to allow the requested to be resampled and returned. + * + * Input data buffering makes use of the AudioRingBuffer. The capacity of the + * buffer is initially 100ms of float audio and it is pre-allocated at the + * constructor. Should the input data grow beyond that, the input buffer is + * re-allocated on the fly. In addition to that, due to special feature of + * AudioRingBuffer, no extra copies take place when the input data is fed to the + * resampler. + * + * The sample format must be set before using any method. If the provided sample + * format is of type short the pre-allocated capacity of the input buffer + * becomes 200ms of short audio. + * + * The DynamicResampler is not thread-safe, so all the methods appart from the + * constructor must be called on the same thread. + */ +class DynamicResampler final { + public: + /** + * Provide the initial input and output rate and the amount of pre-buffering. + * The channel count will be set to stereo. Memory allocation will take + * place. The input buffer is non-interleaved. + */ + DynamicResampler( + uint32_t aInRate, uint32_t aOutRate, + media::TimeUnit aPreBufferDuration = media::TimeUnit::Zero()); + ~DynamicResampler(); + + /** + * Set the sample format type to float or short. + */ + void SetSampleFormat(AudioSampleFormat aFormat); + uint32_t GetOutRate() const { return mOutRate; } + uint32_t GetChannels() const { return mChannels; } + + /** + * Append `aInFrames` number of frames from `aInBuffer` to the internal input + * buffer. Memory copy/move takes place. + */ + void AppendInput(Span<const float* const> aInBuffer, uint32_t aInFrames); + void AppendInput(Span<const int16_t* const> aInBuffer, uint32_t aInFrames); + /** + * Append `aInFrames` number of frames of silence to the internal input + * buffer. Memory copy/move takes place. + */ + void AppendInputSilence(const uint32_t aInFrames); + /** + * Return the number of frames the internal input buffer can store. + */ + uint32_t InFramesBufferSize() const; + /** + * Return the number of frames stored in the internal input buffer. + */ + uint32_t InFramesBuffered(uint32_t aChannelIndex) const; + + /** + * Prepends existing input data with a silent pre-buffer if not already done. + * Data will be prepended so that after resampling aOutFrames worth of output + * data, the buffering level will be as close as possible to + * mPreBufferDuration, which is the desired buffering level. + */ + void EnsurePreBuffer(media::TimeUnit aDuration); + + /** + * Set the duration that should be used for pre-buffering. + */ + void SetPreBufferDuration(media::TimeUnit aDuration); + + /* + * Resample as much frames as needed from the internal input buffer to the + * `aOutBuffer` in order to provide all `aOutFrames`. + * + * On first call, prepends the input buffer with silence so that after + * resampling aOutFrames frames of data, the input buffer holds data as close + * as possible to the configured pre-buffer size. + * + * If there are not enough input frames to provide the requested output + * frames, the input buffer is padded with enough silence to allow the + * requested frames to be resampled, and the pre-buffer is reset so that the + * next call will be treated as the first. + * + * Returns true if the internal input buffer underran and had to be padded + * with silence, otherwise false. + */ + bool Resample(float* aOutBuffer, uint32_t aOutFrames, uint32_t aChannelIndex); + bool Resample(int16_t* aOutBuffer, uint32_t aOutFrames, + uint32_t aChannelIndex); + + /** + * Update the output rate or/and the channel count. If a value is not updated + * compared to the current one nothing happens. Changing the `aOutRate` + * results in recalculation in the resampler. Changing `aChannels` results in + * the reallocation of the internal input buffer with the exception of + * changes between mono to stereo and vice versa where no reallocation takes + * place. A stereo internal input buffer is always maintained even if the + * sound is mono. + */ + void UpdateResampler(uint32_t aOutRate, uint32_t aChannels); + + private: + template <typename T> + void AppendInputInternal(Span<const T* const>& aInBuffer, + uint32_t aInFrames) { + MOZ_ASSERT(aInBuffer.Length() == (uint32_t)mChannels); + for (uint32_t i = 0; i < mChannels; ++i) { + PushInFrames(aInBuffer[i], aInFrames, i); + } + } + + void ResampleInternal(const float* aInBuffer, uint32_t* aInFrames, + float* aOutBuffer, uint32_t* aOutFrames, + uint32_t aChannelIndex); + void ResampleInternal(const int16_t* aInBuffer, uint32_t* aInFrames, + int16_t* aOutBuffer, uint32_t* aOutFrames, + uint32_t aChannelIndex); + + template <typename T> + bool ResampleInternal(T* aOutBuffer, uint32_t aOutFrames, + uint32_t aChannelIndex) { + MOZ_ASSERT(mInRate); + MOZ_ASSERT(mOutRate); + MOZ_ASSERT(mChannels); + MOZ_ASSERT(aChannelIndex < mChannels); + MOZ_ASSERT(aChannelIndex < mInternalInBuffer.Length()); + MOZ_ASSERT(aOutFrames); + + if (mInRate == mOutRate) { + bool underrun = false; + if (uint32_t buffered = mInternalInBuffer[aChannelIndex].AvailableRead(); + buffered < aOutFrames) { + underrun = true; + mIsPreBufferSet = false; + mInternalInBuffer[aChannelIndex].WriteSilence(aOutFrames - buffered); + } + DebugOnly<uint32_t> numFramesRead = + mInternalInBuffer[aChannelIndex].Read(Span(aOutBuffer, aOutFrames)); + MOZ_ASSERT(numFramesRead == aOutFrames); + // Workaround to avoid discontinuity when the speex resampler operates + // again. Feed it with the last 20 frames to warm up the internal memory + // of the resampler and then skip memory equals to resampler's input + // latency. + mInputTail[aChannelIndex].StoreTail<T>(aOutBuffer, aOutFrames); + if (aChannelIndex == 0 && !mIsWarmingUp) { + mInputStreamFile.Write(aOutBuffer, aOutFrames); + mOutputStreamFile.Write(aOutBuffer, aOutFrames); + } + return underrun; + } + + uint32_t totalOutFramesNeeded = aOutFrames; + auto resample = [&] { + mInternalInBuffer[aChannelIndex].ReadNoCopy( + [&](const Span<const T>& aInBuffer) -> uint32_t { + if (!totalOutFramesNeeded) { + return 0; + } + uint32_t outFramesResampled = totalOutFramesNeeded; + uint32_t inFrames = aInBuffer.Length(); + ResampleInternal(aInBuffer.data(), &inFrames, aOutBuffer, + &outFramesResampled, aChannelIndex); + aOutBuffer += outFramesResampled; + totalOutFramesNeeded -= outFramesResampled; + mInputTail[aChannelIndex].StoreTail<T>(aInBuffer.To(inFrames)); + return inFrames; + }); + }; + + resample(); + + if (totalOutFramesNeeded == 0) { + return false; + } + + while (totalOutFramesNeeded > 0) { + MOZ_ASSERT(mInternalInBuffer[aChannelIndex].AvailableRead() == 0); + // Round up. + uint32_t totalInFramesNeeded = + ((CheckedUint32(totalOutFramesNeeded) * mInRate + mOutRate - 1) / + mOutRate) + .value(); + mInternalInBuffer[aChannelIndex].WriteSilence(totalInFramesNeeded); + resample(); + } + mIsPreBufferSet = false; + return true; + } + + template <typename T> + void PushInFrames(const T* aInBuffer, const uint32_t aInFrames, + uint32_t aChannelIndex) { + MOZ_ASSERT(aInBuffer); + MOZ_ASSERT(aInFrames); + MOZ_ASSERT(mChannels); + MOZ_ASSERT(aChannelIndex < mChannels); + MOZ_ASSERT(aChannelIndex < mInternalInBuffer.Length()); + EnsureInputBufferDuration(media::TimeUnit( + CheckedInt64(mInternalInBuffer[aChannelIndex].AvailableRead()) + + aInFrames, + mInRate)); + mInternalInBuffer[aChannelIndex].Write(Span(aInBuffer, aInFrames)); + } + + void WarmUpResampler(bool aSkipLatency); + + media::TimeUnit CalculateInputBufferDuration() const { + // Pre-allocate something big, twice the pre-buffer, or at least 100ms. + return std::max(mPreBufferDuration * 2, media::TimeUnit::FromSeconds(0.1)); + } + + bool EnsureInputBufferDuration(media::TimeUnit aDuration) { + if (aDuration <= mSetBufferDuration) { + // Buffer size is sufficient. + return true; + } + + // 5 second cap. + const media::TimeUnit cap = media::TimeUnit::FromSeconds(5); + if (mSetBufferDuration == cap) { + // Already at the cap. + return false; + } + + uint32_t sampleSize = 0; + if (mSampleFormat == AUDIO_FORMAT_FLOAT32) { + sampleSize = sizeof(float); + } else if (mSampleFormat == AUDIO_FORMAT_S16) { + sampleSize = sizeof(short); + } + + if (sampleSize == 0) { + // No sample format set, we wouldn't know how many bytes to allocate. + return true; + } + + // As a backoff strategy, at least double the previous size. + media::TimeUnit duration = mSetBufferDuration * 2; + + if (aDuration > duration) { + // A larger buffer than the normal backoff strategy provides is needed, or + // this is the first time setting the buffer size. Round up to the nearest + // 100ms, some jitter is expected. + duration = aDuration.ToBase<media::TimeUnit::CeilingPolicy>(10); + } + + duration = std::min(cap, duration); + + bool success = true; + for (auto& b : mInternalInBuffer) { + success = success && + b.SetLengthBytes(sampleSize * duration.ToTicksAtRate(mInRate)); + } + + if (success) { + // All buffers have the new size. + mSetBufferDuration = duration; + return true; + } + + const uint32_t sizeInFrames = + static_cast<uint32_t>(mSetBufferDuration.ToTicksAtRate(mInRate)); + // Allocating an input buffer failed. We stick with the old buffer size. + NS_WARNING(nsPrintfCString("Failed to allocate a buffer of %u bytes (%u " + "frames). Expect glitches.", + sampleSize * sizeInFrames, sizeInFrames) + .get()); + for (auto& b : mInternalInBuffer) { + MOZ_ALWAYS_TRUE(b.SetLengthBytes(sampleSize * sizeInFrames)); + } + return false; + } + + public: + const uint32_t mInRate; + + private: + bool mIsPreBufferSet = false; + bool mIsWarmingUp = false; + media::TimeUnit mPreBufferDuration; + media::TimeUnit mSetBufferDuration = media::TimeUnit::Zero(); + uint32_t mChannels = 0; + uint32_t mOutRate; + + AutoTArray<AudioRingBuffer, STEREO> mInternalInBuffer; + + SpeexResamplerState* mResampler = nullptr; + AudioSampleFormat mSampleFormat = AUDIO_FORMAT_SILENCE; + + class TailBuffer { + public: + template <typename T> + T* Buffer() { + return reinterpret_cast<T*>(mBuffer); + } + /* Store the MAXSIZE last elements of the buffer. */ + template <typename T> + void StoreTail(const Span<const T>& aInBuffer) { + StoreTail(aInBuffer.data(), aInBuffer.size()); + } + template <typename T> + void StoreTail(const T* aInBuffer, uint32_t aInFrames) { + if (aInFrames >= MAXSIZE) { + PodCopy(Buffer<T>(), aInBuffer + aInFrames - MAXSIZE, MAXSIZE); + mSize = MAXSIZE; + } else { + PodCopy(Buffer<T>(), aInBuffer, aInFrames); + mSize = aInFrames; + } + } + uint32_t Length() { return mSize; } + static const uint32_t MAXSIZE = 20; + + private: + float mBuffer[MAXSIZE] = {}; + uint32_t mSize = 0; + }; + AutoTArray<TailBuffer, STEREO> mInputTail; + + WavDumper mInputStreamFile; + WavDumper mOutputStreamFile; +}; + +} // namespace mozilla + +#endif // DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_ |