diff options
Diffstat (limited to 'dom/media/DynamicResampler.h')
-rw-r--r-- | dom/media/DynamicResampler.h | 409 |
1 files changed, 409 insertions, 0 deletions
diff --git a/dom/media/DynamicResampler.h b/dom/media/DynamicResampler.h new file mode 100644 index 0000000000..f8c5aff0e4 --- /dev/null +++ b/dom/media/DynamicResampler.h @@ -0,0 +1,409 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MOZILLA_DYNAMIC_RESAMPLER_H_ +#define MOZILLA_DYNAMIC_RESAMPLER_H_ + +#include "AudioRingBuffer.h" +#include "AudioSegment.h" + +#include <speex/speex_resampler.h> + +namespace mozilla { + +const uint32_t STEREO = 2; + +/** + * DynamicResampler allows updating on the fly the output sample rate and the + * number of channels. In addition to that, it maintains an internal buffer for + * the input data and allows pre-buffering as well. The Resample() method + * strives to provide the requested number of output frames by using the input + * data including any pre-buffering. If this is not possible then it will not + * attempt to resample and it will return failure. + * + * Input data buffering makes use of the AudioRingBuffer. The capacity of the + * buffer is 100ms of float audio and it is pre-allocated at the constructor. + * No extra allocations take place when the input is appended. In addition to + * that, due to special feature of AudioRingBuffer, no extra copies take place + * when the input data is fed to the resampler. + * + * The sample format must be set before using any method. If the provided sample + * format is of type short the pre-allocated capacity of the input buffer + * becomes 200ms of short audio. + * + * The DynamicResampler is not thread-safe, so all the methods appart from the + * constructor must be called on the same thread. + */ +class DynamicResampler final { + public: + /** + * Provide the initial input and output rate and the amount of pre-buffering. + * The channel count will be set to stereo. Memory allocation will take + * place. The input buffer is non-interleaved. + */ + DynamicResampler(uint32_t aInRate, uint32_t aOutRate, + uint32_t aPreBufferFrames = 0); + ~DynamicResampler(); + + /** + * Set the sample format type to float or short. + */ + void SetSampleFormat(AudioSampleFormat aFormat); + uint32_t GetOutRate() const { return mOutRate; } + uint32_t GetChannels() const { return mChannels; } + + /** + * Append `aInFrames` number of frames from `aInBuffer` to the internal input + * buffer. Memory copy/move takes place. + */ + void AppendInput(const nsTArray<const float*>& aInBuffer, uint32_t aInFrames); + void AppendInput(const nsTArray<const int16_t*>& aInBuffer, + uint32_t aInFrames); + /** + * Append `aInFrames` number of frames of silence to the internal input + * buffer. Memory copy/move takes place. + */ + void AppendInputSilence(const uint32_t aInFrames); + /** + * Return the number of frames stored in the internal input buffer. + */ + uint32_t InFramesBuffered(uint32_t aChannelIndex) const; + /** + * Return the number of frames left to store in the internal input buffer. + */ + uint32_t InFramesLeftToBuffer(uint32_t aChannelIndex) const; + + /* + * Resampler as much frame is needed from the internal input buffer to the + * `aOutBuffer` in order to provide all `aOutFrames` and return true. If there + * not enough input frames to provide the requested output frames no + * resampling is attempted and false is returned. + */ + bool Resample(float* aOutBuffer, uint32_t* aOutFrames, + uint32_t aChannelIndex); + bool Resample(int16_t* aOutBuffer, uint32_t* aOutFrames, + uint32_t aChannelIndex); + + /** + * Update the output rate or/and the channel count. If a value is not updated + * compared to the current one nothing happens. Changing the `aOutRate` + * results in recalculation in the resampler. Changing `aChannels` results in + * the reallocation of the internal input buffer with the exception of + * changes between mono to stereo and vice versa where no reallocation takes + * place. A stereo internal input buffer is always maintained even if the + * sound is mono. + */ + void UpdateResampler(uint32_t aOutRate, uint32_t aChannels); + + /** + * Returns true if the resampler has enough input data to provide to the + * output of the `Resample()` method `aOutFrames` number of frames. This is a + * way to know in advance if the `Resampler` method will return true or false + * given that nothing changes in between. + */ + bool CanResample(uint32_t aOutFrames) const; + + private: + template <typename T> + void AppendInputInternal(const nsTArray<const T*>& aInBuffer, + uint32_t aInFrames) { + MOZ_ASSERT(aInBuffer.Length() == (uint32_t)mChannels); + for (uint32_t i = 0; i < mChannels; ++i) { + PushInFrames(aInBuffer[i], aInFrames, i); + } + } + + void ResampleInternal(const float* aInBuffer, uint32_t* aInFrames, + float* aOutBuffer, uint32_t* aOutFrames, + uint32_t aChannelIndex); + void ResampleInternal(const int16_t* aInBuffer, uint32_t* aInFrames, + int16_t* aOutBuffer, uint32_t* aOutFrames, + uint32_t aChannelIndex); + + template <typename T> + bool ResampleInternal(T* aOutBuffer, uint32_t* aOutFrames, + uint32_t aChannelIndex) { + MOZ_ASSERT(mInRate); + MOZ_ASSERT(mOutRate); + MOZ_ASSERT(mChannels); + MOZ_ASSERT(aChannelIndex <= mChannels); + MOZ_ASSERT(aChannelIndex <= mInternalInBuffer.Length()); + MOZ_ASSERT(aOutFrames); + MOZ_ASSERT(*aOutFrames); + + // Not enough input, don't do anything + if (!EnoughInFrames(*aOutFrames, aChannelIndex)) { + *aOutFrames = 0; + return false; + } + + if (mInRate == mOutRate) { + mInternalInBuffer[aChannelIndex].Read(Span(aOutBuffer, *aOutFrames)); + // Workaround to avoid discontinuity when the speex resampler operates + // again. Feed it with the last 20 frames to warm up the internal memory + // of the resampler and then skip memory equals to resampler's input + // latency. + mInputTail[aChannelIndex].StoreTail<T>(aOutBuffer, *aOutFrames); + return true; + } + + uint32_t totalOutFramesNeeded = *aOutFrames; + + mInternalInBuffer[aChannelIndex].ReadNoCopy( + [this, &aOutBuffer, &totalOutFramesNeeded, + aChannelIndex](const Span<const T>& aInBuffer) -> uint32_t { + if (!totalOutFramesNeeded) { + return 0; + } + uint32_t outFramesResampled = totalOutFramesNeeded; + uint32_t inFrames = aInBuffer.Length(); + ResampleInternal(aInBuffer.data(), &inFrames, aOutBuffer, + &outFramesResampled, aChannelIndex); + aOutBuffer += outFramesResampled; + totalOutFramesNeeded -= outFramesResampled; + mInputTail[aChannelIndex].StoreTail<T>(aInBuffer); + return inFrames; + }); + + MOZ_ASSERT(totalOutFramesNeeded == 0); + return true; + } + + bool EnoughInFrames(uint32_t aOutFrames, uint32_t aChannelIndex) const; + + template <typename T> + void PushInFrames(const T* aInBuffer, const uint32_t aInFrames, + uint32_t aChannelIndex) { + MOZ_ASSERT(aInBuffer); + MOZ_ASSERT(aInFrames); + MOZ_ASSERT(mChannels); + MOZ_ASSERT(aChannelIndex <= mChannels); + MOZ_ASSERT(aChannelIndex <= mInternalInBuffer.Length()); + mInternalInBuffer[aChannelIndex].Write(Span(aInBuffer, aInFrames)); + } + + void WarmUpResampler(bool aSkipLatency); + + public: + const uint32_t mInRate; + const uint32_t mPreBufferFrames; + + private: + uint32_t mChannels = 0; + uint32_t mOutRate; + + AutoTArray<AudioRingBuffer, STEREO> mInternalInBuffer; + + SpeexResamplerState* mResampler = nullptr; + AudioSampleFormat mSampleFormat = AUDIO_FORMAT_SILENCE; + + class TailBuffer { + public: + template <typename T> + T* Buffer() { + return reinterpret_cast<T*>(mBuffer); + } + /* Store the MAXSIZE last elements of the buffer. */ + template <typename T> + void StoreTail(const Span<const T>& aInBuffer) { + StoreTail(aInBuffer.data(), aInBuffer.size()); + } + template <typename T> + void StoreTail(const T* aInBuffer, uint32_t aInFrames) { + if (aInFrames >= MAXSIZE) { + PodCopy(Buffer<T>(), aInBuffer + aInFrames - MAXSIZE, MAXSIZE); + mSize = MAXSIZE; + } else { + PodCopy(Buffer<T>(), aInBuffer, aInFrames); + mSize = aInFrames; + } + } + uint32_t Length() { return mSize; } + static const uint32_t MAXSIZE = 20; + + private: + float mBuffer[MAXSIZE] = {}; + uint32_t mSize = 0; + }; + AutoTArray<TailBuffer, STEREO> mInputTail; +}; + +/** + * AudioChunkList provides a way to have preallocated audio buffers in + * AudioSegment. The idea is that the amount of AudioChunks is created in + * advance. Each AudioChunk is able to hold a specific amount of audio + * (capacity). The total capacity of AudioChunkList is specified by the number + * of AudioChunks. The important aspect of the AudioChunkList is that + * preallocates everything and reuse the same chunks similar to a ring buffer. + * + * Why the whole AudioChunk is preallocated and not some raw memory buffer? This + * is due to the limitations of MediaTrackGraph. The way that MTG works depends + * on `AudioSegment`s to convey the actual audio data. An AudioSegment consists + * of AudioChunks. The AudioChunk is built in a way, that owns and allocates the + * audio buffers. Thus, since the use of AudioSegment is mandatory if the audio + * data was in a different form, the only way to use it from the audio thread + * would be to create the AudioChunk there. That would result in a copy + * operation (not very important) and most of all an allocation of the audio + * buffer in the audio thread. This happens in many places inside MTG it's a bad + * practice, though, and it has been avoided due to the AudioChunkList. + * + * After construction the sample format must be set, when it is available. It + * can be set in the audio thread. Before setting the sample format is not + * possible to use any method of AudioChunkList. + * + * Every AudioChunk in the AudioChunkList is preallocated with a capacity of 128 + * frames of float audio. Nevertheless, the sample format is not available at + * that point. Thus if the sample format is set to short, the capacity of each + * chunk changes to 256 number of frames, and the total duration becomes twice + * big. There are methods to get the chunk capacity and total capacity in frames + * and must always be used. + * + * Two things to note. First, when the channel count changes everything is + * recreated which means reallocations. Second, the total capacity might differs + * from the requested total capacity for two reasons. First, if the sample + * format is set to short and second because the number of chunks in the list + * divides exactly the final total capacity. The corresponding method must + * always be used to query the total capacity. + */ +class AudioChunkList { + public: + /** + * Constructor, the final total duration might be different from the requested + * `aTotalDuration`. Memory allocation takes place. + */ + AudioChunkList(uint32_t aTotalDuration, uint32_t aChannels, + const PrincipalHandle& aPrincipalHandle); + AudioChunkList(const AudioChunkList&) = delete; + AudioChunkList(AudioChunkList&&) = delete; + ~AudioChunkList() = default; + + /** + * Set sample format. It must be done before any other method being used. + */ + void SetSampleFormat(AudioSampleFormat aFormat); + /** + * Get the next available AudioChunk. The duration of the chunk will be zero + * and the volume 1.0. However, the buffers will be there ready to be written. + * Please note, that a reference of the preallocated chunk is returned. Thus + * it _must not be consumed_ directly. If the chunk needs to be consumed it + * must be copied to a temporary chunk first. For example: + * ``` + * AudioChunk& chunk = audioChunklist.GetNext(); + * // Set up the chunk + * AudioChunk tmp = chunk; + * audioSegment.AppendAndConsumeChunk(std::move(tmp)); + * ``` + * This way no memory allocation or copy, takes place. + */ + AudioChunk& GetNext(); + + /** + * Get the capacity of each individual AudioChunk in the list. + */ + uint32_t ChunkCapacity() const { + MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16 || + mSampleFormat == AUDIO_FORMAT_FLOAT32); + return mChunkCapacity; + } + /** + * Get the total capacity of AudioChunkList. + */ + uint32_t TotalCapacity() const { + MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16 || + mSampleFormat == AUDIO_FORMAT_FLOAT32); + return CheckedInt<uint32_t>(mChunkCapacity * mChunks.Length()).value(); + } + + /** + * Update the channel count of the AudioChunkList. Memory allocation is + * taking place. + */ + void Update(uint32_t aChannels); + + private: + void IncrementIndex() { + ++mIndex; + mIndex = CheckedInt<uint32_t>(mIndex % mChunks.Length()).value(); + } + void CreateChunks(uint32_t aNumOfChunks, uint32_t aChannels); + void UpdateToMonoOrStereo(uint32_t aChannels); + + private: + const PrincipalHandle mPrincipalHandle; + nsTArray<AudioChunk> mChunks; + uint32_t mIndex = 0; + uint32_t mChunkCapacity = WEBAUDIO_BLOCK_SIZE; + AudioSampleFormat mSampleFormat = AUDIO_FORMAT_SILENCE; +}; + +/** + * Audio Resampler is a resampler able to change the output rate and channels + * count on the fly. The API is simple and it is based in AudioSegment in order + * to be used MTG. All memory allocations, for input and output buffers, happen + * in the constructor and when channel count changes. The memory is recycled in + * order to avoid reallocations. It also supports prebuffering of silence. It + * consists of DynamicResampler and AudioChunkList so please read their + * documentation if you are interested in more details. + * + * The output buffer is preallocated and returned in the form of AudioSegment. + * The intention is to be used directly in a MediaTrack. Since an AudioChunk + * must no be "shared" in order to be written, the AudioSegment returned by + * resampler method must be cleaned up in order to be able for the `AudioChunk`s + * that it consists of to be reused. For `MediaTrack::mSegment` this happens + * every ~50ms (look at MediaTrack::AdvanceTimeVaryingValuesToCurrentTime). Thus + * memory capacity of 100ms has been preallocated for internal input and output + * buffering. + */ +class AudioResampler final { + public: + AudioResampler(uint32_t aInRate, uint32_t aOutRate, uint32_t aPreBufferFrames, + const PrincipalHandle& aPrincipalHandle); + + /** + * Append input data into the resampler internal buffer. Copy/move of the + * memory is taking place. Also, the channel count will change according to + * the channel count of the chunks. + */ + void AppendInput(const AudioSegment& aInSegment); + /** + * Get the number of frames that can be read from the internal input buffer + * before it becomes empty. + */ + uint32_t InputReadableFrames() const; + /** + * Get the number of frames that can be written to the internal input buffer + * before it becomes full. + */ + uint32_t InputWritableFrames() const; + + /* + * Reguest `aOutFrames` of audio in the output sample rate. The internal + * buffered input is used. If there is no enough input for that amount of + * output and empty AudioSegment is returned + */ + AudioSegment Resample(uint32_t aOutFrames); + + /* + * Updates the output rate that will be used by the resampler. + */ + void UpdateOutRate(uint32_t aOutRate) { + Update(aOutRate, mResampler.GetChannels()); + } + + private: + void UpdateChannels(uint32_t aChannels) { + Update(mResampler.GetOutRate(), aChannels); + } + void Update(uint32_t aOutRate, uint32_t aChannels); + + private: + DynamicResampler mResampler; + AudioChunkList mOutputChunks; + bool mIsSampleFormatSet = false; +}; + +} // namespace mozilla + +#endif // MOZILLA_DYNAMIC_RESAMPLER_H_ |