summaryrefslogtreecommitdiffstats
path: root/dom/media/DynamicResampler.h
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /dom/media/DynamicResampler.h
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'dom/media/DynamicResampler.h')
-rw-r--r--dom/media/DynamicResampler.h409
1 files changed, 409 insertions, 0 deletions
diff --git a/dom/media/DynamicResampler.h b/dom/media/DynamicResampler.h
new file mode 100644
index 0000000000..f8c5aff0e4
--- /dev/null
+++ b/dom/media/DynamicResampler.h
@@ -0,0 +1,409 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZILLA_DYNAMIC_RESAMPLER_H_
+#define MOZILLA_DYNAMIC_RESAMPLER_H_
+
+#include "AudioRingBuffer.h"
+#include "AudioSegment.h"
+
+#include <speex/speex_resampler.h>
+
+namespace mozilla {
+
+const uint32_t STEREO = 2;
+
+/**
+ * DynamicResampler allows updating on the fly the output sample rate and the
+ * number of channels. In addition to that, it maintains an internal buffer for
+ * the input data and allows pre-buffering as well. The Resample() method
+ * strives to provide the requested number of output frames by using the input
+ * data including any pre-buffering. If this is not possible then it will not
+ * attempt to resample and it will return failure.
+ *
+ * Input data buffering makes use of the AudioRingBuffer. The capacity of the
+ * buffer is 100ms of float audio and it is pre-allocated at the constructor.
+ * No extra allocations take place when the input is appended. In addition to
+ * that, due to special feature of AudioRingBuffer, no extra copies take place
+ * when the input data is fed to the resampler.
+ *
+ * The sample format must be set before using any method. If the provided sample
+ * format is of type short the pre-allocated capacity of the input buffer
+ * becomes 200ms of short audio.
+ *
+ * The DynamicResampler is not thread-safe, so all the methods appart from the
+ * constructor must be called on the same thread.
+ */
+class DynamicResampler final {
+ public:
+ /**
+ * Provide the initial input and output rate and the amount of pre-buffering.
+ * The channel count will be set to stereo. Memory allocation will take
+ * place. The input buffer is non-interleaved.
+ */
+ DynamicResampler(uint32_t aInRate, uint32_t aOutRate,
+ uint32_t aPreBufferFrames = 0);
+ ~DynamicResampler();
+
+ /**
+ * Set the sample format type to float or short.
+ */
+ void SetSampleFormat(AudioSampleFormat aFormat);
+ uint32_t GetOutRate() const { return mOutRate; }
+ uint32_t GetChannels() const { return mChannels; }
+
+ /**
+ * Append `aInFrames` number of frames from `aInBuffer` to the internal input
+ * buffer. Memory copy/move takes place.
+ */
+ void AppendInput(const nsTArray<const float*>& aInBuffer, uint32_t aInFrames);
+ void AppendInput(const nsTArray<const int16_t*>& aInBuffer,
+ uint32_t aInFrames);
+ /**
+ * Append `aInFrames` number of frames of silence to the internal input
+ * buffer. Memory copy/move takes place.
+ */
+ void AppendInputSilence(const uint32_t aInFrames);
+ /**
+ * Return the number of frames stored in the internal input buffer.
+ */
+ uint32_t InFramesBuffered(uint32_t aChannelIndex) const;
+ /**
+ * Return the number of frames left to store in the internal input buffer.
+ */
+ uint32_t InFramesLeftToBuffer(uint32_t aChannelIndex) const;
+
+ /*
+ * Resampler as much frame is needed from the internal input buffer to the
+ * `aOutBuffer` in order to provide all `aOutFrames` and return true. If there
+ * not enough input frames to provide the requested output frames no
+ * resampling is attempted and false is returned.
+ */
+ bool Resample(float* aOutBuffer, uint32_t* aOutFrames,
+ uint32_t aChannelIndex);
+ bool Resample(int16_t* aOutBuffer, uint32_t* aOutFrames,
+ uint32_t aChannelIndex);
+
+ /**
+ * Update the output rate or/and the channel count. If a value is not updated
+ * compared to the current one nothing happens. Changing the `aOutRate`
+ * results in recalculation in the resampler. Changing `aChannels` results in
+ * the reallocation of the internal input buffer with the exception of
+ * changes between mono to stereo and vice versa where no reallocation takes
+ * place. A stereo internal input buffer is always maintained even if the
+ * sound is mono.
+ */
+ void UpdateResampler(uint32_t aOutRate, uint32_t aChannels);
+
+ /**
+ * Returns true if the resampler has enough input data to provide to the
+ * output of the `Resample()` method `aOutFrames` number of frames. This is a
+ * way to know in advance if the `Resampler` method will return true or false
+ * given that nothing changes in between.
+ */
+ bool CanResample(uint32_t aOutFrames) const;
+
+ private:
+ template <typename T>
+ void AppendInputInternal(const nsTArray<const T*>& aInBuffer,
+ uint32_t aInFrames) {
+ MOZ_ASSERT(aInBuffer.Length() == (uint32_t)mChannels);
+ for (uint32_t i = 0; i < mChannels; ++i) {
+ PushInFrames(aInBuffer[i], aInFrames, i);
+ }
+ }
+
+ void ResampleInternal(const float* aInBuffer, uint32_t* aInFrames,
+ float* aOutBuffer, uint32_t* aOutFrames,
+ uint32_t aChannelIndex);
+ void ResampleInternal(const int16_t* aInBuffer, uint32_t* aInFrames,
+ int16_t* aOutBuffer, uint32_t* aOutFrames,
+ uint32_t aChannelIndex);
+
+ template <typename T>
+ bool ResampleInternal(T* aOutBuffer, uint32_t* aOutFrames,
+ uint32_t aChannelIndex) {
+ MOZ_ASSERT(mInRate);
+ MOZ_ASSERT(mOutRate);
+ MOZ_ASSERT(mChannels);
+ MOZ_ASSERT(aChannelIndex <= mChannels);
+ MOZ_ASSERT(aChannelIndex <= mInternalInBuffer.Length());
+ MOZ_ASSERT(aOutFrames);
+ MOZ_ASSERT(*aOutFrames);
+
+ // Not enough input, don't do anything
+ if (!EnoughInFrames(*aOutFrames, aChannelIndex)) {
+ *aOutFrames = 0;
+ return false;
+ }
+
+ if (mInRate == mOutRate) {
+ mInternalInBuffer[aChannelIndex].Read(Span(aOutBuffer, *aOutFrames));
+ // Workaround to avoid discontinuity when the speex resampler operates
+ // again. Feed it with the last 20 frames to warm up the internal memory
+ // of the resampler and then skip memory equals to resampler's input
+ // latency.
+ mInputTail[aChannelIndex].StoreTail<T>(aOutBuffer, *aOutFrames);
+ return true;
+ }
+
+ uint32_t totalOutFramesNeeded = *aOutFrames;
+
+ mInternalInBuffer[aChannelIndex].ReadNoCopy(
+ [this, &aOutBuffer, &totalOutFramesNeeded,
+ aChannelIndex](const Span<const T>& aInBuffer) -> uint32_t {
+ if (!totalOutFramesNeeded) {
+ return 0;
+ }
+ uint32_t outFramesResampled = totalOutFramesNeeded;
+ uint32_t inFrames = aInBuffer.Length();
+ ResampleInternal(aInBuffer.data(), &inFrames, aOutBuffer,
+ &outFramesResampled, aChannelIndex);
+ aOutBuffer += outFramesResampled;
+ totalOutFramesNeeded -= outFramesResampled;
+ mInputTail[aChannelIndex].StoreTail<T>(aInBuffer);
+ return inFrames;
+ });
+
+ MOZ_ASSERT(totalOutFramesNeeded == 0);
+ return true;
+ }
+
+ bool EnoughInFrames(uint32_t aOutFrames, uint32_t aChannelIndex) const;
+
+ template <typename T>
+ void PushInFrames(const T* aInBuffer, const uint32_t aInFrames,
+ uint32_t aChannelIndex) {
+ MOZ_ASSERT(aInBuffer);
+ MOZ_ASSERT(aInFrames);
+ MOZ_ASSERT(mChannels);
+ MOZ_ASSERT(aChannelIndex <= mChannels);
+ MOZ_ASSERT(aChannelIndex <= mInternalInBuffer.Length());
+ mInternalInBuffer[aChannelIndex].Write(Span(aInBuffer, aInFrames));
+ }
+
+ void WarmUpResampler(bool aSkipLatency);
+
+ public:
+ const uint32_t mInRate;
+ const uint32_t mPreBufferFrames;
+
+ private:
+ uint32_t mChannels = 0;
+ uint32_t mOutRate;
+
+ AutoTArray<AudioRingBuffer, STEREO> mInternalInBuffer;
+
+ SpeexResamplerState* mResampler = nullptr;
+ AudioSampleFormat mSampleFormat = AUDIO_FORMAT_SILENCE;
+
+ class TailBuffer {
+ public:
+ template <typename T>
+ T* Buffer() {
+ return reinterpret_cast<T*>(mBuffer);
+ }
+ /* Store the MAXSIZE last elements of the buffer. */
+ template <typename T>
+ void StoreTail(const Span<const T>& aInBuffer) {
+ StoreTail(aInBuffer.data(), aInBuffer.size());
+ }
+ template <typename T>
+ void StoreTail(const T* aInBuffer, uint32_t aInFrames) {
+ if (aInFrames >= MAXSIZE) {
+ PodCopy(Buffer<T>(), aInBuffer + aInFrames - MAXSIZE, MAXSIZE);
+ mSize = MAXSIZE;
+ } else {
+ PodCopy(Buffer<T>(), aInBuffer, aInFrames);
+ mSize = aInFrames;
+ }
+ }
+ uint32_t Length() { return mSize; }
+ static const uint32_t MAXSIZE = 20;
+
+ private:
+ float mBuffer[MAXSIZE] = {};
+ uint32_t mSize = 0;
+ };
+ AutoTArray<TailBuffer, STEREO> mInputTail;
+};
+
+/**
+ * AudioChunkList provides a way to have preallocated audio buffers in
+ * AudioSegment. The idea is that the amount of AudioChunks is created in
+ * advance. Each AudioChunk is able to hold a specific amount of audio
+ * (capacity). The total capacity of AudioChunkList is specified by the number
+ * of AudioChunks. The important aspect of the AudioChunkList is that
+ * preallocates everything and reuse the same chunks similar to a ring buffer.
+ *
+ * Why the whole AudioChunk is preallocated and not some raw memory buffer? This
+ * is due to the limitations of MediaTrackGraph. The way that MTG works depends
+ * on `AudioSegment`s to convey the actual audio data. An AudioSegment consists
+ * of AudioChunks. The AudioChunk is built in a way, that owns and allocates the
+ * audio buffers. Thus, since the use of AudioSegment is mandatory if the audio
+ * data was in a different form, the only way to use it from the audio thread
+ * would be to create the AudioChunk there. That would result in a copy
+ * operation (not very important) and most of all an allocation of the audio
+ * buffer in the audio thread. This happens in many places inside MTG it's a bad
+ * practice, though, and it has been avoided due to the AudioChunkList.
+ *
+ * After construction the sample format must be set, when it is available. It
+ * can be set in the audio thread. Before setting the sample format is not
+ * possible to use any method of AudioChunkList.
+ *
+ * Every AudioChunk in the AudioChunkList is preallocated with a capacity of 128
+ * frames of float audio. Nevertheless, the sample format is not available at
+ * that point. Thus if the sample format is set to short, the capacity of each
+ * chunk changes to 256 number of frames, and the total duration becomes twice
+ * big. There are methods to get the chunk capacity and total capacity in frames
+ * and must always be used.
+ *
+ * Two things to note. First, when the channel count changes everything is
+ * recreated which means reallocations. Second, the total capacity might differs
+ * from the requested total capacity for two reasons. First, if the sample
+ * format is set to short and second because the number of chunks in the list
+ * divides exactly the final total capacity. The corresponding method must
+ * always be used to query the total capacity.
+ */
+class AudioChunkList {
+ public:
+ /**
+ * Constructor, the final total duration might be different from the requested
+ * `aTotalDuration`. Memory allocation takes place.
+ */
+ AudioChunkList(uint32_t aTotalDuration, uint32_t aChannels,
+ const PrincipalHandle& aPrincipalHandle);
+ AudioChunkList(const AudioChunkList&) = delete;
+ AudioChunkList(AudioChunkList&&) = delete;
+ ~AudioChunkList() = default;
+
+ /**
+ * Set sample format. It must be done before any other method being used.
+ */
+ void SetSampleFormat(AudioSampleFormat aFormat);
+ /**
+ * Get the next available AudioChunk. The duration of the chunk will be zero
+ * and the volume 1.0. However, the buffers will be there ready to be written.
+ * Please note, that a reference of the preallocated chunk is returned. Thus
+ * it _must not be consumed_ directly. If the chunk needs to be consumed it
+ * must be copied to a temporary chunk first. For example:
+ * ```
+ * AudioChunk& chunk = audioChunklist.GetNext();
+ * // Set up the chunk
+ * AudioChunk tmp = chunk;
+ * audioSegment.AppendAndConsumeChunk(std::move(tmp));
+ * ```
+ * This way no memory allocation or copy, takes place.
+ */
+ AudioChunk& GetNext();
+
+ /**
+ * Get the capacity of each individual AudioChunk in the list.
+ */
+ uint32_t ChunkCapacity() const {
+ MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16 ||
+ mSampleFormat == AUDIO_FORMAT_FLOAT32);
+ return mChunkCapacity;
+ }
+ /**
+ * Get the total capacity of AudioChunkList.
+ */
+ uint32_t TotalCapacity() const {
+ MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16 ||
+ mSampleFormat == AUDIO_FORMAT_FLOAT32);
+ return CheckedInt<uint32_t>(mChunkCapacity * mChunks.Length()).value();
+ }
+
+ /**
+ * Update the channel count of the AudioChunkList. Memory allocation is
+ * taking place.
+ */
+ void Update(uint32_t aChannels);
+
+ private:
+ void IncrementIndex() {
+ ++mIndex;
+ mIndex = CheckedInt<uint32_t>(mIndex % mChunks.Length()).value();
+ }
+ void CreateChunks(uint32_t aNumOfChunks, uint32_t aChannels);
+ void UpdateToMonoOrStereo(uint32_t aChannels);
+
+ private:
+ const PrincipalHandle mPrincipalHandle;
+ nsTArray<AudioChunk> mChunks;
+ uint32_t mIndex = 0;
+ uint32_t mChunkCapacity = WEBAUDIO_BLOCK_SIZE;
+ AudioSampleFormat mSampleFormat = AUDIO_FORMAT_SILENCE;
+};
+
+/**
+ * Audio Resampler is a resampler able to change the output rate and channels
+ * count on the fly. The API is simple and it is based in AudioSegment in order
+ * to be used MTG. All memory allocations, for input and output buffers, happen
+ * in the constructor and when channel count changes. The memory is recycled in
+ * order to avoid reallocations. It also supports prebuffering of silence. It
+ * consists of DynamicResampler and AudioChunkList so please read their
+ * documentation if you are interested in more details.
+ *
+ * The output buffer is preallocated and returned in the form of AudioSegment.
+ * The intention is to be used directly in a MediaTrack. Since an AudioChunk
+ * must no be "shared" in order to be written, the AudioSegment returned by
+ * resampler method must be cleaned up in order to be able for the `AudioChunk`s
+ * that it consists of to be reused. For `MediaTrack::mSegment` this happens
+ * every ~50ms (look at MediaTrack::AdvanceTimeVaryingValuesToCurrentTime). Thus
+ * memory capacity of 100ms has been preallocated for internal input and output
+ * buffering.
+ */
+class AudioResampler final {
+ public:
+ AudioResampler(uint32_t aInRate, uint32_t aOutRate, uint32_t aPreBufferFrames,
+ const PrincipalHandle& aPrincipalHandle);
+
+ /**
+ * Append input data into the resampler internal buffer. Copy/move of the
+ * memory is taking place. Also, the channel count will change according to
+ * the channel count of the chunks.
+ */
+ void AppendInput(const AudioSegment& aInSegment);
+ /**
+ * Get the number of frames that can be read from the internal input buffer
+ * before it becomes empty.
+ */
+ uint32_t InputReadableFrames() const;
+ /**
+ * Get the number of frames that can be written to the internal input buffer
+ * before it becomes full.
+ */
+ uint32_t InputWritableFrames() const;
+
+ /*
+ * Reguest `aOutFrames` of audio in the output sample rate. The internal
+ * buffered input is used. If there is no enough input for that amount of
+ * output and empty AudioSegment is returned
+ */
+ AudioSegment Resample(uint32_t aOutFrames);
+
+ /*
+ * Updates the output rate that will be used by the resampler.
+ */
+ void UpdateOutRate(uint32_t aOutRate) {
+ Update(aOutRate, mResampler.GetChannels());
+ }
+
+ private:
+ void UpdateChannels(uint32_t aChannels) {
+ Update(mResampler.GetOutRate(), aChannels);
+ }
+ void Update(uint32_t aOutRate, uint32_t aChannels);
+
+ private:
+ DynamicResampler mResampler;
+ AudioChunkList mOutputChunks;
+ bool mIsSampleFormatSet = false;
+};
+
+} // namespace mozilla
+
+#endif // MOZILLA_DYNAMIC_RESAMPLER_H_