diff options
Diffstat (limited to 'dom/media/driftcontrol')
18 files changed, 4250 insertions, 0 deletions
diff --git a/dom/media/driftcontrol/AudioChunkList.cpp b/dom/media/driftcontrol/AudioChunkList.cpp new file mode 100644 index 0000000000..e0010c2ff0 --- /dev/null +++ b/dom/media/driftcontrol/AudioChunkList.cpp @@ -0,0 +1,119 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "AudioChunkList.h" + +namespace mozilla { + +AudioChunkList::AudioChunkList(uint32_t aTotalDuration, uint32_t aChannels, + const PrincipalHandle& aPrincipalHandle) + : mPrincipalHandle(aPrincipalHandle) { + uint32_t numOfChunks = aTotalDuration / mChunkCapacity; + if (aTotalDuration % mChunkCapacity) { + ++numOfChunks; + } + CreateChunks(numOfChunks, aChannels); +} + +void AudioChunkList::CreateChunks(uint32_t aNumOfChunks, uint32_t aChannels) { + MOZ_ASSERT(!mChunks.Length()); + MOZ_ASSERT(aNumOfChunks); + MOZ_ASSERT(aChannels); + mChunks.AppendElements(aNumOfChunks); + + for (AudioChunk& chunk : mChunks) { + AutoTArray<nsTArray<float>, 2> buffer; + buffer.AppendElements(aChannels); + + AutoTArray<const float*, 2> bufferPtrs; + bufferPtrs.AppendElements(aChannels); + + for (uint32_t i = 0; i < aChannels; ++i) { + float* ptr = buffer[i].AppendElements(mChunkCapacity); + bufferPtrs[i] = ptr; + } + + chunk.mBuffer = new mozilla::SharedChannelArrayBuffer(std::move(buffer)); + chunk.mChannelData.AppendElements(aChannels); + for (uint32_t i = 0; i < aChannels; ++i) { + chunk.mChannelData[i] = bufferPtrs[i]; + } + } +} + +void AudioChunkList::UpdateToMonoOrStereo(uint32_t aChannels) { + MOZ_ASSERT(mChunks.Length()); + MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16 || + mSampleFormat == AUDIO_FORMAT_FLOAT32); + MOZ_ASSERT(aChannels == 1 || aChannels == 2); + + for (AudioChunk& chunk : mChunks) { + MOZ_ASSERT(chunk.ChannelCount() != (uint32_t)aChannels); + MOZ_ASSERT(chunk.ChannelCount() == 1 || chunk.ChannelCount() == 2); + chunk.mChannelData.SetLengthAndRetainStorage(aChannels); + if (mSampleFormat == AUDIO_FORMAT_S16) { + SharedChannelArrayBuffer<short>* channelArray = + static_cast<SharedChannelArrayBuffer<short>*>(chunk.mBuffer.get()); + channelArray->mBuffers.SetLengthAndRetainStorage(aChannels); + if (aChannels == 2) { + // This an indirect allocation, unfortunately. + channelArray->mBuffers[1].SetLength(mChunkCapacity); + chunk.mChannelData[1] = channelArray->mBuffers[1].Elements(); + } + } else { + SharedChannelArrayBuffer<float>* channelArray = + static_cast<SharedChannelArrayBuffer<float>*>(chunk.mBuffer.get()); + channelArray->mBuffers.SetLengthAndRetainStorage(aChannels); + if (aChannels == 2) { + // This an indirect allocation, unfortunately. + channelArray->mBuffers[1].SetLength(mChunkCapacity); + chunk.mChannelData[1] = channelArray->mBuffers[1].Elements(); + } + } + } +} + +void AudioChunkList::SetSampleFormat(AudioSampleFormat aFormat) { + MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_SILENCE); + MOZ_ASSERT(aFormat == AUDIO_FORMAT_S16 || aFormat == AUDIO_FORMAT_FLOAT32); + mSampleFormat = aFormat; + if (mSampleFormat == AUDIO_FORMAT_S16) { + mChunkCapacity = 2 * mChunkCapacity; + } +} + +AudioChunk& AudioChunkList::GetNext() { + AudioChunk& chunk = mChunks[mIndex]; + MOZ_ASSERT(!chunk.mChannelData.IsEmpty()); + MOZ_ASSERT(chunk.mBuffer); + MOZ_ASSERT(!chunk.mBuffer->IsShared()); + MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16 || + mSampleFormat == AUDIO_FORMAT_FLOAT32); + chunk.mDuration = 0; + chunk.mVolume = 1.0f; + chunk.mPrincipalHandle = mPrincipalHandle; + chunk.mBufferFormat = mSampleFormat; + IncrementIndex(); + return chunk; +} + +void AudioChunkList::Update(uint32_t aChannels) { + MOZ_ASSERT(mChunks.Length()); + if (mChunks[0].ChannelCount() == aChannels) { + return; + } + + // Special handling between mono and stereo to avoid reallocations. + if (aChannels <= 2 && mChunks[0].ChannelCount() <= 2) { + UpdateToMonoOrStereo(aChannels); + return; + } + + uint32_t numOfChunks = mChunks.Length(); + mChunks.ClearAndRetainStorage(); + CreateChunks(numOfChunks, aChannels); +} + +} // namespace mozilla diff --git a/dom/media/driftcontrol/AudioChunkList.h b/dom/media/driftcontrol/AudioChunkList.h new file mode 100644 index 0000000000..2c10db942d --- /dev/null +++ b/dom/media/driftcontrol/AudioChunkList.h @@ -0,0 +1,124 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef DOM_MEDIA_DRIFTCONTROL_AUDIOCHUNKLIST_H_ +#define DOM_MEDIA_DRIFTCONTROL_AUDIOCHUNKLIST_H_ + +#include "AudioSegment.h" +#include "TimeUnits.h" + +namespace mozilla { + +/** + * AudioChunkList provides a way to have preallocated audio buffers in + * AudioSegment. The idea is that the amount of AudioChunks is created in + * advance. Each AudioChunk is able to hold a specific amount of audio + * (capacity). The total capacity of AudioChunkList is specified by the number + * of AudioChunks. The important aspect of the AudioChunkList is that + * preallocates everything and reuse the same chunks similar to a ring buffer. + * + * Why the whole AudioChunk is preallocated and not some raw memory buffer? This + * is due to the limitations of MediaTrackGraph. The way that MTG works depends + * on `AudioSegment`s to convey the actual audio data. An AudioSegment consists + * of AudioChunks. The AudioChunk is built in a way, that owns and allocates the + * audio buffers. Thus, since the use of AudioSegment is mandatory if the audio + * data was in a different form, the only way to use it from the audio thread + * would be to create the AudioChunk there. That would result in a copy + * operation (not very important) and most of all an allocation of the audio + * buffer in the audio thread. This happens in many places inside MTG it's a bad + * practice, though, and it has been avoided due to the AudioChunkList. + * + * After construction the sample format must be set, when it is available. It + * can be set in the audio thread. Before setting the sample format is not + * possible to use any method of AudioChunkList. + * + * Every AudioChunk in the AudioChunkList is preallocated with a capacity of 128 + * frames of float audio. Nevertheless, the sample format is not available at + * that point. Thus if the sample format is set to short, the capacity of each + * chunk changes to 256 number of frames, and the total duration becomes twice + * big. There are methods to get the chunk capacity and total capacity in frames + * and must always be used. + * + * Two things to note. First, when the channel count changes everything is + * recreated which means reallocations. Second, the total capacity might differs + * from the requested total capacity for two reasons. First, if the sample + * format is set to short and second because the number of chunks in the list + * divides exactly the final total capacity. The corresponding method must + * always be used to query the total capacity. + */ +class AudioChunkList { + public: + /** + * Constructor, the final total duration might be different from the requested + * `aTotalDuration`. Memory allocation takes place. + */ + AudioChunkList(uint32_t aTotalDuration, uint32_t aChannels, + const PrincipalHandle& aPrincipalHandle); + AudioChunkList(const AudioChunkList&) = delete; + AudioChunkList(AudioChunkList&&) = delete; + ~AudioChunkList() = default; + + /** + * Set sample format. It must be done before any other method being used. + */ + void SetSampleFormat(AudioSampleFormat aFormat); + /** + * Get the next available AudioChunk. The duration of the chunk will be zero + * and the volume 1.0. However, the buffers will be there ready to be written. + * Please note, that a reference of the preallocated chunk is returned. Thus + * it _must not be consumed_ directly. If the chunk needs to be consumed it + * must be copied to a temporary chunk first. For example: + * ``` + * AudioChunk& chunk = audioChunklist.GetNext(); + * // Set up the chunk + * AudioChunk tmp = chunk; + * audioSegment.AppendAndConsumeChunk(std::move(tmp)); + * ``` + * This way no memory allocation or copy, takes place. + */ + AudioChunk& GetNext(); + + /** + * Get the capacity of each individual AudioChunk in the list. + */ + uint32_t ChunkCapacity() const { + MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16 || + mSampleFormat == AUDIO_FORMAT_FLOAT32); + return mChunkCapacity; + } + /** + * Get the total capacity of AudioChunkList. + */ + uint32_t TotalCapacity() const { + MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16 || + mSampleFormat == AUDIO_FORMAT_FLOAT32); + return CheckedInt<uint32_t>(mChunkCapacity * mChunks.Length()).value(); + } + + /** + * Update the channel count of the AudioChunkList. Memory allocation is + * taking place. + */ + void Update(uint32_t aChannels); + + private: + void IncrementIndex() { + ++mIndex; + mIndex = CheckedInt<uint32_t>(mIndex % mChunks.Length()).value(); + } + void CreateChunks(uint32_t aNumOfChunks, uint32_t aChannels); + void UpdateToMonoOrStereo(uint32_t aChannels); + + private: + const PrincipalHandle mPrincipalHandle; + nsTArray<AudioChunk> mChunks; + uint32_t mIndex = 0; + uint32_t mChunkCapacity = WEBAUDIO_BLOCK_SIZE; + AudioSampleFormat mSampleFormat = AUDIO_FORMAT_SILENCE; +}; + +} // namespace mozilla + +#endif // DOM_MEDIA_DRIFTCONTROL_AUDIOCHUNKLIST_H_ diff --git a/dom/media/driftcontrol/AudioDriftCorrection.cpp b/dom/media/driftcontrol/AudioDriftCorrection.cpp new file mode 100644 index 0000000000..e66c435c36 --- /dev/null +++ b/dom/media/driftcontrol/AudioDriftCorrection.cpp @@ -0,0 +1,178 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "AudioDriftCorrection.h" + +#include <cmath> + +#include "AudioResampler.h" +#include "DriftController.h" + +namespace mozilla { + +extern LazyLogModule gMediaTrackGraphLog; + +#define LOG_CONTROLLER(level, controller, format, ...) \ + MOZ_LOG(gMediaTrackGraphLog, level, \ + ("DriftController %p: (plot-id %u) " format, controller, \ + (controller)->mPlotId, ##__VA_ARGS__)) + +static media::TimeUnit DesiredBuffering(media::TimeUnit aSourceLatency) { + constexpr media::TimeUnit kMinBuffer(10, MSECS_PER_S); + constexpr media::TimeUnit kMaxBuffer(2500, MSECS_PER_S); + + const auto clamped = std::clamp(aSourceLatency, kMinBuffer, kMaxBuffer); + + // Ensure the base is the source's sampling rate. + return clamped.ToBase(aSourceLatency); +} + +AudioDriftCorrection::AudioDriftCorrection( + uint32_t aSourceRate, uint32_t aTargetRate, + const PrincipalHandle& aPrincipalHandle) + : mTargetRate(aTargetRate), + mDriftController(MakeUnique<DriftController>(aSourceRate, aTargetRate, + mDesiredBuffering)), + mResampler(MakeUnique<AudioResampler>( + aSourceRate, aTargetRate, mDesiredBuffering, aPrincipalHandle)) {} + +AudioDriftCorrection::~AudioDriftCorrection() = default; + +AudioSegment AudioDriftCorrection::RequestFrames(const AudioSegment& aInput, + uint32_t aOutputFrames) { + const media::TimeUnit inputDuration(aInput.GetDuration(), + mDriftController->mSourceRate); + const media::TimeUnit outputDuration(aOutputFrames, mTargetRate); + + if (inputDuration.IsPositive()) { + if (mDesiredBuffering.IsZero()) { + // Start with the desired buffering at at least 50ms, since the drift is + // still unknown. It may be adjust downward later on, when we have adapted + // to the drift more. + const media::TimeUnit desiredBuffering = DesiredBuffering(std::max( + inputDuration * 11 / 10, media::TimeUnit::FromSeconds(0.05))); + LOG_CONTROLLER(LogLevel::Info, mDriftController.get(), + "Initial desired buffering %.2fms", + desiredBuffering.ToSeconds() * 1000.0); + SetDesiredBuffering(desiredBuffering); + } else if (inputDuration > mDesiredBuffering) { + // Input latency is higher than the desired buffering. Increase the + // desired buffering to try to avoid underruns. + if (inputDuration > mSourceLatency) { + const media::TimeUnit desiredBuffering = + DesiredBuffering(inputDuration * 11 / 10); + LOG_CONTROLLER( + LogLevel::Info, mDriftController.get(), + "High observed input latency %.2fms (%" PRId64 + " frames). Increasing desired buffering %.2fms->%.2fms frames", + inputDuration.ToSeconds() * 1000.0, aInput.GetDuration(), + mDesiredBuffering.ToSeconds() * 1000.0, + desiredBuffering.ToSeconds() * 1000.0); + SetDesiredBuffering(desiredBuffering); + } else { + const media::TimeUnit desiredBuffering = + DesiredBuffering(mSourceLatency * 11 / 10); + LOG_CONTROLLER(LogLevel::Info, mDriftController.get(), + "Increasing desired buffering %.2fms->%.2fms, " + "based on reported input-latency %.2fms.", + mDesiredBuffering.ToSeconds() * 1000.0, + desiredBuffering.ToSeconds() * 1000.0, + mSourceLatency.ToSeconds() * 1000.0); + SetDesiredBuffering(desiredBuffering); + } + } + + mIsHandlingUnderrun = false; + // Very important to go first since DynamicResampler will get the sample + // format from the chunk. + mResampler->AppendInput(aInput); + } + bool hasUnderrun = false; + AudioSegment output = mResampler->Resample(aOutputFrames, &hasUnderrun); + mDriftController->UpdateClock(inputDuration, outputDuration, + CurrentBuffering(), BufferSize()); + // Update resampler's rate if there is a new correction. + mResampler->UpdateOutRate(mDriftController->GetCorrectedTargetRate()); + if (hasUnderrun) { + if (!mIsHandlingUnderrun) { + NS_WARNING("Drift-correction: Underrun"); + LOG_CONTROLLER(LogLevel::Info, mDriftController.get(), + "Underrun. Doubling the desired buffering %.2fms->%.2fms", + mDesiredBuffering.ToSeconds() * 1000.0, + (mDesiredBuffering * 2).ToSeconds() * 1000.0); + mIsHandlingUnderrun = true; + ++mNumUnderruns; + SetDesiredBuffering(DesiredBuffering(mDesiredBuffering * 2)); + mDriftController->ResetAfterUnderrun(); + } + } + + if (mDriftController->DurationWithinHysteresis() > + mLatencyReductionTimeLimit && + mDriftController->DurationSinceDesiredBufferingChange() > + mLatencyReductionTimeLimit) { + // We have been stable within hysteresis for a while. Let's reduce the + // desired buffering if we can. + const media::TimeUnit sourceLatency = + mDriftController->MeasuredSourceLatency(); + // We target 30% over the measured source latency, a bit higher than how we + // adapt to high source latency. + const media::TimeUnit targetDesiredBuffering = + DesiredBuffering(sourceLatency * 13 / 10); + if (targetDesiredBuffering < mDesiredBuffering) { + // The new target is lower than the current desired buffering. Proceed by + // reducing the difference by 10%, but do it in 10ms-steps so there is a + // chance of reaching the target (by truncation). + const media::TimeUnit diff = + (mDesiredBuffering - targetDesiredBuffering) / 10; + // Apply the 10%-diff and 2ms-steps, but don't go lower than the + // already-decided desired target. + const media::TimeUnit target = std::max( + targetDesiredBuffering, (mDesiredBuffering - diff).ToBase(500)); + if (target < mDesiredBuffering) { + LOG_CONTROLLER( + LogLevel::Info, mDriftController.get(), + "Reducing desired buffering because the buffering level is stable. " + "%.2fms->%.2fms. Measured source latency is %.2fms, ideal target " + "is %.2fms.", + mDesiredBuffering.ToSeconds() * 1000.0, target.ToSeconds() * 1000.0, + sourceLatency.ToSeconds() * 1000.0, + targetDesiredBuffering.ToSeconds() * 1000.0); + SetDesiredBuffering(target); + } + } + } + return output; +} + +uint32_t AudioDriftCorrection::CurrentBuffering() const { + return mResampler->InputReadableFrames(); +} + +uint32_t AudioDriftCorrection::BufferSize() const { + return mResampler->InputCapacityFrames(); +} + +uint32_t AudioDriftCorrection::NumCorrectionChanges() const { + return mDriftController->NumCorrectionChanges(); +} + +void AudioDriftCorrection::SetSourceLatency(media::TimeUnit aSourceLatency) { + LOG_CONTROLLER( + LogLevel::Info, mDriftController.get(), "SetSourceLatency %.2fms->%.2fms", + mSourceLatency.ToSeconds() * 1000.0, aSourceLatency.ToSeconds() * 1000.0); + + mSourceLatency = aSourceLatency; +} + +void AudioDriftCorrection::SetDesiredBuffering( + media::TimeUnit aDesiredBuffering) { + mDesiredBuffering = aDesiredBuffering; + mDriftController->SetDesiredBuffering(mDesiredBuffering); + mResampler->SetPreBufferDuration(mDesiredBuffering); +} +} // namespace mozilla + +#undef LOG_CONTROLLER diff --git a/dom/media/driftcontrol/AudioDriftCorrection.h b/dom/media/driftcontrol/AudioDriftCorrection.h new file mode 100644 index 0000000000..aeb01d3d2b --- /dev/null +++ b/dom/media/driftcontrol/AudioDriftCorrection.h @@ -0,0 +1,80 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef DOM_MEDIA_DRIFTCONTROL_AUDIODRIFTCORRECTION_H_ +#define DOM_MEDIA_DRIFTCONTROL_AUDIODRIFTCORRECTION_H_ + +#include "AudioSegment.h" +#include "TimeUnits.h" + +namespace mozilla { + +class AudioResampler; +class DriftController; + +/** + * Correct the drift between two independent clocks, the source, and the target + * clock. The target clock is the master clock so the correction syncs the drift + * of the source clock to the target. The nominal sampling rates of source and + * target must be provided. + * + * It works with AudioSegment in order to be able to be used from the + * MediaTrackGraph/MediaTrack. The audio buffers are pre-allocated so the only + * new allocation taking place during operation happens if the input buffer + * outgrows the memory allocated. The preallocation capacity is 100ms for input + * and 100ms for output. The class consists of DriftController and + * AudioResampler check there for more details. + * + * The class is not thread-safe. The construction can happen in any thread but + * the member method must be used in a single thread that can be different than + * the construction thread. Appropriate for being used in the high priority + * audio thread. + */ +class AudioDriftCorrection final { + public: + AudioDriftCorrection(uint32_t aSourceRate, uint32_t aTargetRate, + const PrincipalHandle& aPrincipalHandle); + + ~AudioDriftCorrection(); + + /** + * A segment of input data (in the source rate) and a number of requested + * output frames (in the target rate) are provided, and a segment (in the + * target rate) of drift-corrected data is returned. The input is buffered + * internally so some latency exists. The returned AudioSegment may not be + * long-lived because any point in the internal buffer gets reused every + * 100ms. If not enough data is available in the input buffer to produce + * the requested number of output frames, the input buffer is drained and + * a smaller segment than requested is returned. + */ + AudioSegment RequestFrames(const AudioSegment& aInput, + uint32_t aOutputFrames); + + uint32_t CurrentBuffering() const; + + uint32_t BufferSize() const; + + uint32_t NumCorrectionChanges() const; + + uint32_t NumUnderruns() const { return mNumUnderruns; } + + void SetSourceLatency(media::TimeUnit aSourceLatency); + + const uint32_t mTargetRate; + const media::TimeUnit mLatencyReductionTimeLimit = + media::TimeUnit(15, 1).ToBase(mTargetRate); + + private: + void SetDesiredBuffering(media::TimeUnit aDesiredBuffering); + + media::TimeUnit mSourceLatency = media::TimeUnit::Zero(); + media::TimeUnit mDesiredBuffering = media::TimeUnit::Zero(); + uint32_t mNumUnderruns = 0; + bool mIsHandlingUnderrun = false; + const UniquePtr<DriftController> mDriftController; + const UniquePtr<AudioResampler> mResampler; +}; +} // namespace mozilla +#endif // DOM_MEDIA_DRIFTCONTROL_AUDIODRIFTCORRECTION_H_ diff --git a/dom/media/driftcontrol/AudioResampler.cpp b/dom/media/driftcontrol/AudioResampler.cpp new file mode 100644 index 0000000000..ecef033a5c --- /dev/null +++ b/dom/media/driftcontrol/AudioResampler.cpp @@ -0,0 +1,108 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "AudioResampler.h" + +namespace mozilla { + +AudioResampler::AudioResampler(uint32_t aInRate, uint32_t aOutRate, + media::TimeUnit aPreBufferDuration, + const PrincipalHandle& aPrincipalHandle) + : mResampler(aInRate, aOutRate, aPreBufferDuration), + mOutputChunks(aOutRate / 10, STEREO, aPrincipalHandle) {} + +void AudioResampler::AppendInput(const AudioSegment& aInSegment) { + MOZ_ASSERT(aInSegment.GetDuration()); + for (AudioSegment::ConstChunkIterator iter(aInSegment); !iter.IsEnded(); + iter.Next()) { + const AudioChunk& chunk = *iter; + if (!mIsSampleFormatSet) { + // We don't know the format yet and all buffers are empty. + if (chunk.mBufferFormat == AUDIO_FORMAT_SILENCE) { + // Only silence has been received and the format is unkown. Igonre it, + // if Resampler() is called it will return silence too. + continue; + } + // First no silence data, set the format once for lifetime and let it + // continue the rest of the flow. We will not get in here again. + mOutputChunks.SetSampleFormat(chunk.mBufferFormat); + mResampler.SetSampleFormat(chunk.mBufferFormat); + mIsSampleFormatSet = true; + } + MOZ_ASSERT(mIsSampleFormatSet); + if (chunk.IsNull()) { + mResampler.AppendInputSilence(chunk.GetDuration()); + continue; + } + // Make sure the channel is up to date. An AudioSegment can contain chunks + // with different channel count. + UpdateChannels(chunk.mChannelData.Length()); + if (chunk.mBufferFormat == AUDIO_FORMAT_FLOAT32) { + mResampler.AppendInput(chunk.ChannelData<float>(), chunk.GetDuration()); + } else { + mResampler.AppendInput(chunk.ChannelData<int16_t>(), chunk.GetDuration()); + } + } +} + +AudioSegment AudioResampler::Resample(uint32_t aOutFrames, bool* aHasUnderrun) { + MOZ_ASSERT(aHasUnderrun); + + AudioSegment segment; + + // We don't know what to do yet and we only have received silence if any just + // return what they want and leave + if (!mIsSampleFormatSet) { + segment.AppendNullData(aOutFrames); + return segment; + } + + media::TimeUnit outDuration(aOutFrames, mResampler.GetOutRate()); + mResampler.EnsurePreBuffer(outDuration); + + const media::TimeUnit chunkCapacity(mOutputChunks.ChunkCapacity(), + mResampler.GetOutRate()); + + while (!outDuration.IsZero()) { + MOZ_ASSERT(outDuration.IsPositive()); + AudioChunk& chunk = mOutputChunks.GetNext(); + const media::TimeUnit chunkDuration = std::min(outDuration, chunkCapacity); + outDuration -= chunkDuration; + + const uint32_t outFrames = + chunkDuration.ToTicksAtRate(mResampler.GetOutRate()); + for (uint32_t i = 0; i < chunk.ChannelCount(); ++i) { + if (chunk.mBufferFormat == AUDIO_FORMAT_FLOAT32) { + *aHasUnderrun |= mResampler.Resample( + chunk.ChannelDataForWrite<float>(i), outFrames, i); + } else { + *aHasUnderrun |= mResampler.Resample( + chunk.ChannelDataForWrite<int16_t>(i), outFrames, i); + } + } + chunk.mDuration = outFrames; + + // Create a copy in order to consume that copy and not the pre-allocated + // chunk + segment.AppendAndConsumeChunk(AudioChunk(chunk)); + } + + return segment; +} + +void AudioResampler::Update(uint32_t aOutRate, uint32_t aChannels) { + mResampler.UpdateResampler(aOutRate, aChannels); + mOutputChunks.Update(aChannels); +} + +uint32_t AudioResampler::InputCapacityFrames() const { + return mResampler.InFramesBufferSize(); +} + +uint32_t AudioResampler::InputReadableFrames() const { + return mResampler.InFramesBuffered(0); +} + +} // namespace mozilla diff --git a/dom/media/driftcontrol/AudioResampler.h b/dom/media/driftcontrol/AudioResampler.h new file mode 100644 index 0000000000..20e4f1051b --- /dev/null +++ b/dom/media/driftcontrol/AudioResampler.h @@ -0,0 +1,99 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef DOM_MEDIA_DRIFTCONTROL_AUDIORESAMPLER_H_ +#define DOM_MEDIA_DRIFTCONTROL_AUDIORESAMPLER_H_ + +#include "AudioChunkList.h" +#include "AudioSegment.h" +#include "DynamicResampler.h" +#include "TimeUnits.h" + +namespace mozilla { + +/** + * Audio Resampler is a resampler able to change the output rate and channels + * count on the fly. The API is simple and it is based in AudioSegment in order + * to be used MTG. Memory allocations, for input and output buffers, will happen + * in the constructor, when channel count changes and if the amount of input + * data outgrows the input buffer. The memory is recycled in order to avoid + * reallocations. It also supports prebuffering of silence. It consists of + * DynamicResampler and AudioChunkList so please read their documentation if you + * are interested in more details. + * + * The output buffer is preallocated and returned in the form of AudioSegment. + * The intention is to be used directly in a MediaTrack. Since an AudioChunk + * must no be "shared" in order to be written, the AudioSegment returned by + * resampler method must be cleaned up in order to be able for the `AudioChunk`s + * that it consists of to be reused. For `MediaTrack::mSegment` this happens + * every ~50ms (look at MediaTrack::AdvanceTimeVaryingValuesToCurrentTime). Thus + * memory capacity of 100ms has been preallocated for internal input and output + * buffering. Note that the amount of memory used for input buffering may + * increase if needed. + */ +class AudioResampler final { + public: + AudioResampler(uint32_t aInRate, uint32_t aOutRate, + media::TimeUnit aPreBufferDuration, + const PrincipalHandle& aPrincipalHandle); + + /** + * Append input data into the resampler internal buffer. Copy/move of the + * memory is taking place. Also, the channel count will change according to + * the channel count of the chunks. + */ + void AppendInput(const AudioSegment& aInSegment); + /** + * Get the number of frames that the internal input buffer can hold. + */ + uint32_t InputCapacityFrames() const; + /** + * Get the number of frames that can be read from the internal input buffer + * before it becomes empty. + */ + uint32_t InputReadableFrames() const; + + /* + * Reguest `aOutFrames` of audio in the output sample rate. The internal + * buffered input is used. If the input buffer does not have enough data to + * reach `aOutFrames` frames, the input buffer is padded with enough silence + * to allow the requested frames to be resampled and returned, and the + * pre-buffer is reset so that the next call will be treated as the first. + * + * On first call, prepends the internal buffer with silence so that after + * resampling aOutFrames frames of data, the internal buffer holds input + * data as close as possible to the configured pre-buffer size. + */ + AudioSegment Resample(uint32_t aOutFrames, bool* aHasUnderrun); + + /* + * Updates the output rate that will be used by the resampler. + */ + void UpdateOutRate(uint32_t aOutRate) { + Update(aOutRate, mResampler.GetChannels()); + } + + /** + * Set the duration that should be used for pre-buffering. + */ + void SetPreBufferDuration(media::TimeUnit aPreBufferDuration) { + mResampler.SetPreBufferDuration(aPreBufferDuration); + } + + private: + void UpdateChannels(uint32_t aChannels) { + Update(mResampler.GetOutRate(), aChannels); + } + void Update(uint32_t aOutRate, uint32_t aChannels); + + private: + DynamicResampler mResampler; + AudioChunkList mOutputChunks; + bool mIsSampleFormatSet = false; +}; + +} // namespace mozilla + +#endif // DOM_MEDIA_DRIFTCONTROL_AUDIORESAMPLER_H_ diff --git a/dom/media/driftcontrol/DriftController.cpp b/dom/media/driftcontrol/DriftController.cpp new file mode 100644 index 0000000000..b5603f72bb --- /dev/null +++ b/dom/media/driftcontrol/DriftController.cpp @@ -0,0 +1,237 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "DriftController.h" + +#include <atomic> +#include <cmath> +#include <mutex> + +#include "mozilla/CheckedInt.h" +#include "mozilla/Logging.h" + +namespace mozilla { + +LazyLogModule gDriftControllerGraphsLog("DriftControllerGraphs"); +extern LazyLogModule gMediaTrackGraphLog; + +#define LOG_CONTROLLER(level, controller, format, ...) \ + MOZ_LOG(gMediaTrackGraphLog, level, \ + ("DriftController %p: (plot-id %u) " format, controller, \ + (controller)->mPlotId, ##__VA_ARGS__)) +#define LOG_PLOT_NAMES() \ + MOZ_LOG( \ + gDriftControllerGraphsLog, LogLevel::Verbose, \ + ("id,t,buffering,desired,buffersize,inlatency,outlatency,inrate," \ + "outrate,hysteresisthreshold,corrected,hysteresiscorrected,configured," \ + "p,i,d,kpp,kii,kdd,control")) +#define LOG_PLOT_VALUES(id, t, buffering, desired, buffersize, inlatency, \ + outlatency, inrate, outrate, hysteresisthreshold, \ + corrected, hysteresiscorrected, configured, p, i, d, \ + kpp, kii, kdd, control) \ + MOZ_LOG( \ + gDriftControllerGraphsLog, LogLevel::Verbose, \ + ("DriftController %u,%.3f,%u,%" PRId64 ",%u,%" PRId64 ",%" PRId64 \ + ",%u,%u,%" PRId64 ",%.5f,%.5f,%ld,%d,%.5f,%.5f,%.5f,%.5f,%.5f,%.5f", \ + id, t, buffering, desired, buffersize, inlatency, outlatency, inrate, \ + outrate, hysteresisthreshold, corrected, hysteresiscorrected, \ + configured, p, i, d, kpp, kii, kdd, control)) + +static uint8_t GenerateId() { + static std::atomic<uint8_t> id{0}; + return ++id; +} + +DriftController::DriftController(uint32_t aSourceRate, uint32_t aTargetRate, + media::TimeUnit aDesiredBuffering) + : mPlotId(GenerateId()), + mSourceRate(aSourceRate), + mTargetRate(aTargetRate), + mDesiredBuffering(aDesiredBuffering), + mCorrectedTargetRate(static_cast<float>(aTargetRate)), + mMeasuredSourceLatency(5), + mMeasuredTargetLatency(5) { + LOG_CONTROLLER( + LogLevel::Info, this, + "Created. Resampling %uHz->%uHz. Initial desired buffering: %.2fms.", + mSourceRate, mTargetRate, mDesiredBuffering.ToSeconds() * 1000.0); + static std::once_flag sOnceFlag; + std::call_once(sOnceFlag, [] { LOG_PLOT_NAMES(); }); +} + +void DriftController::SetDesiredBuffering(media::TimeUnit aDesiredBuffering) { + LOG_CONTROLLER(LogLevel::Debug, this, "SetDesiredBuffering %.2fms->%.2fms", + mDesiredBuffering.ToSeconds() * 1000.0, + aDesiredBuffering.ToSeconds() * 1000.0); + mLastDesiredBufferingChangeTime = mTotalTargetClock; + mDesiredBuffering = aDesiredBuffering.ToBase(mSourceRate); +} + +void DriftController::ResetAfterUnderrun() { + mIntegral = 0.0; + mPreviousError = 0.0; + // Trigger a recalculation on the next clock update. + mTargetClock = mAdjustmentInterval; +} + +uint32_t DriftController::GetCorrectedTargetRate() const { + return std::lround(mCorrectedTargetRate); +} + +void DriftController::UpdateClock(media::TimeUnit aSourceDuration, + media::TimeUnit aTargetDuration, + uint32_t aBufferedFrames, + uint32_t aBufferSize) { + mTargetClock += aTargetDuration; + mTotalTargetClock += aTargetDuration; + + mMeasuredTargetLatency.insert(aTargetDuration); + + if (aSourceDuration.IsZero()) { + // Only update the clock after having received input, so input buffering + // estimates are somewhat recent. This helps stabilize the controller + // input (buffering measurements) when the input stream's callback + // interval is much larger than that of the output stream. + return; + } + + mMeasuredSourceLatency.insert(aSourceDuration); + + if (mTargetClock >= mAdjustmentInterval) { + // The adjustment interval has passed. Recalculate. + CalculateCorrection(aBufferedFrames, aBufferSize); + } +} + +void DriftController::CalculateCorrection(uint32_t aBufferedFrames, + uint32_t aBufferSize) { + static constexpr float kProportionalGain = 0.07; + static constexpr float kIntegralGain = 0.006; + static constexpr float kDerivativeGain = 0.12; + + // Maximum 0.1% change per update. + const float cap = static_cast<float>(mTargetRate) / 1000.0f; + + // The integral term can make us grow far outside the cap. Impose a cap on + // it individually that is roughly equivalent to the final cap. + const float integralCap = cap / kIntegralGain; + + int32_t error = CheckedInt32(mDesiredBuffering.ToTicksAtRate(mSourceRate) - + aBufferedFrames) + .value(); + int32_t proportional = error; + // targetClockSec is the number of target clock seconds since last + // correction. + float targetClockSec = static_cast<float>(mTargetClock.ToSeconds()); + // delta-t is targetClockSec. + float integralStep = std::clamp(static_cast<float>(error) * targetClockSec, + -integralCap, integralCap); + mIntegral += integralStep; + float derivative = + static_cast<float>(error - mPreviousError) / targetClockSec; + float controlSignal = kProportionalGain * static_cast<float>(proportional) + + kIntegralGain * mIntegral + + kDerivativeGain * derivative; + float correctedRate = + std::clamp(static_cast<float>(mTargetRate) + controlSignal, + mCorrectedTargetRate - cap, mCorrectedTargetRate + cap); + + // mDesiredBuffering is divided by this to calculate the amount of + // hysteresis to apply. With a denominator of 5, an error within +/- 20% of + // the desired buffering will not make corrections to the target sample + // rate. + static constexpr uint32_t kHysteresisDenominator = 5; // +/- 20% + + // +/- 10ms hysteresis maximum. + const media::TimeUnit hysteresisCap = media::TimeUnit::FromSeconds(0.01); + + // For the minimum desired buffering of 10ms we have a hysteresis threshold + // of +/- 2ms (20%). This goes up to +/- 10ms (clamped) at most for when the + // desired buffering is 50 ms or higher. + const auto hysteresisThreshold = + std::min(hysteresisCap, mDesiredBuffering / kHysteresisDenominator) + .ToTicksAtRate(mSourceRate); + + float hysteresisCorrectedRate = [&] { + uint32_t abserror = std::abs(error); + if (abserror > hysteresisThreshold) { + // The error is outside a hysteresis threshold boundary. + mDurationWithinHysteresis = media::TimeUnit::Zero(); + mIntegralCenterForCap = Nothing(); + mLastHysteresisBoundaryCorrection = Some(error); + return correctedRate; + } + + // The error is within the hysteresis threshold boundaries. + mDurationWithinHysteresis += mTargetClock; + if (!mIntegralCenterForCap) { + mIntegralCenterForCap = Some(mIntegral); + } + + // Would prefer std::signbit, but.. + // https://github.com/microsoft/STL/issues/519. + if (mLastHysteresisBoundaryCorrection && + (*mLastHysteresisBoundaryCorrection < 0) != (error < 0) && + abserror > hysteresisThreshold * 3 / 10) { + // The error came from a boundary and just went 30% past the center line + // (of the distance between center and boundary). Correct now rather + // than when reaching the opposite boundary, so we have a chance of + // finding a stable rate. + mLastHysteresisBoundaryCorrection = Nothing(); + return correctedRate; + } + + return mCorrectedTargetRate; + }(); + + if (mDurationWithinHysteresis > mIntegralCapTimeLimit) { + // Impose a cap on the integral term to not let it grow unboundedly + // while we're within the hysteresis threshold boundaries. Since the + // integral is what finds the drift we center the cap around the integral's + // value when we entered the hysteresis threshold rarther than around 0. We + // impose the cap only after the error has been within the hysteresis + // threshold boundaries for some time, since it would otherwise increase the + // time it takes to reach stability. + mIntegral = std::clamp(mIntegral, *mIntegralCenterForCap - integralCap, + *mIntegralCenterForCap + integralCap); + } + + LOG_CONTROLLER( + LogLevel::Verbose, this, + "Recalculating Correction: Nominal: %uHz->%uHz, Corrected: " + "%uHz->%.2fHz (diff %.2fHz), error: %.2fms (hysteresisThreshold: " + "%.2fms), buffering: %.2fms, desired buffering: %.2fms", + mSourceRate, mTargetRate, mSourceRate, hysteresisCorrectedRate, + hysteresisCorrectedRate - mCorrectedTargetRate, + media::TimeUnit(error, mSourceRate).ToSeconds() * 1000.0, + media::TimeUnit(hysteresisThreshold, mSourceRate).ToSeconds() * 1000.0, + media::TimeUnit(aBufferedFrames, mSourceRate).ToSeconds() * 1000.0, + mDesiredBuffering.ToSeconds() * 1000.0); + LOG_PLOT_VALUES(mPlotId, mTotalTargetClock.ToSeconds(), aBufferedFrames, + mDesiredBuffering.ToTicksAtRate(mSourceRate), aBufferSize, + mMeasuredSourceLatency.mean().ToTicksAtRate(mSourceRate), + mMeasuredTargetLatency.mean().ToTicksAtRate(mTargetRate), + mSourceRate, mTargetRate, hysteresisThreshold, correctedRate, + hysteresisCorrectedRate, std::lround(hysteresisCorrectedRate), + proportional, mIntegral, derivative, + kProportionalGain * proportional, kIntegralGain * mIntegral, + kDerivativeGain * derivative, controlSignal); + + if (std::lround(mCorrectedTargetRate) != + std::lround(hysteresisCorrectedRate)) { + ++mNumCorrectionChanges; + } + + mPreviousError = error; + mCorrectedTargetRate = hysteresisCorrectedRate; + + // Reset the counters to prepare for the next period. + mTargetClock = media::TimeUnit::Zero(); +} +} // namespace mozilla + +#undef LOG_PLOT_VALUES +#undef LOG_PLOT_NAMES +#undef LOG_CONTROLLER diff --git a/dom/media/driftcontrol/DriftController.h b/dom/media/driftcontrol/DriftController.h new file mode 100644 index 0000000000..0bd745c737 --- /dev/null +++ b/dom/media/driftcontrol/DriftController.h @@ -0,0 +1,163 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef DOM_MEDIA_DRIFTCONTROL_DRIFTCONTROLLER_H_ +#define DOM_MEDIA_DRIFTCONTROL_DRIFTCONTROLLER_H_ + +#include "TimeUnits.h" +#include "mozilla/RollingMean.h" + +#include <algorithm> +#include <cstdint> + +#include "MediaSegment.h" + +namespace mozilla { + +/** + * DriftController calculates the divergence of the source clock from its + * nominal (provided) rate compared to that of the target clock, which drives + * the calculations. + * + * The DriftController looks at how the current buffering level differs from the + * desired buffering level and sets a corrected target rate. A resampler should + * be configured to resample from the nominal source rate to the corrected + * target rate. It assumes that the resampler is initially configured to + * resample from the nominal source rate to the nominal target rate. + * + * The pref `media.clock drift.buffering` can be used to configure the minimum + * initial desired internal buffering. Right now it is at 50ms. A larger desired + * buffering level will be used if deemed necessary based on input device + * latency, reported or observed. It will also be increased as a response to an + * underrun, since that indicates the buffer was too small. + */ +class DriftController final { + public: + /** + * Provide the nominal source and the target sample rate. + */ + DriftController(uint32_t aSourceRate, uint32_t aTargetRate, + media::TimeUnit aDesiredBuffering); + + /** + * Set the buffering level that the controller should target. + */ + void SetDesiredBuffering(media::TimeUnit aDesiredBuffering); + + /** + * Reset internal PID-controller state in a way that is suitable for handling + * an underrun. + */ + void ResetAfterUnderrun(); + + /** + * Returns the drift-corrected target rate. + */ + uint32_t GetCorrectedTargetRate() const; + + /** + * The number of times mCorrectedTargetRate has been changed to adjust to + * drift. + */ + uint32_t NumCorrectionChanges() const { return mNumCorrectionChanges; } + + /** + * The amount of time the buffering level has been within the hysteresis + * threshold. + */ + media::TimeUnit DurationWithinHysteresis() const { + return mDurationWithinHysteresis; + } + + /** + * The amount of time that has passed since the last time SetDesiredBuffering + * was called. + */ + media::TimeUnit DurationSinceDesiredBufferingChange() const { + return mTotalTargetClock - mLastDesiredBufferingChangeTime; + } + + /** + * A rolling window average measurement of source latency by looking at the + * duration of the source buffer. + */ + media::TimeUnit MeasuredSourceLatency() const { + return mMeasuredSourceLatency.mean(); + } + + /** + * Update the available source frames, target frames, and the current + * buffer, in every iteration. If the conditions are met a new correction is + * calculated. A new correction is calculated every mAdjustmentInterval. In + * addition to that, the correction is clamped so that the output sample rate + * changes by at most 0.1% of its nominal rate each correction. + */ + void UpdateClock(media::TimeUnit aSourceDuration, + media::TimeUnit aTargetDuration, uint32_t aBufferedFrames, + uint32_t aBufferSize); + + private: + // This implements a simple PID controller with feedback. + // Set point: SP = mDesiredBuffering. + // Process value: PV(t) = aBufferedFrames. This is the feedback. + // Error: e(t) = mDesiredBuffering - aBufferedFrames. + // Control value: CV(t) = the number to add to the nominal target rate, i.e. + // the corrected target rate = CV(t) + nominal target rate. + // + // Controller: + // Proportional part: The error, p(t) = e(t), multiplied by a gain factor, Kp. + // Integral part: The historic cumulative value of the error, + // i(t+1) = i(t) + e(t+1), multiplied by a gain factor, Ki. + // Derivative part: The error's rate of change, d(t+1) = (e(t+1)-e(t))/1, + // multiplied by a gain factor, Kd. + // Control signal: The sum of the parts' output, + // u(t) = Kp*p(t) + Ki*i(t) + Kd*d(t). + // + // Control action: Converting the control signal to a target sample rate. + // Simplified, a positive control signal means the buffer is + // lower than desired (because the error is positive), so the + // target sample rate must be increased in order to consume + // input data slower. We calculate the corrected target rate + // by simply adding the control signal, u(t), to the nominal + // target rate. + // + // Hysteresis: As long as the error is within a threshold of 20% of the set + // point (desired buffering level) (up to 10ms for >50ms desired + // buffering), we call this the hysteresis threshold, the control + // signal does not influence the corrected target rate at all. + // This is to reduce the frequency at which we need to reconfigure + // the resampler, as it causes some allocations. + void CalculateCorrection(uint32_t aBufferedFrames, uint32_t aBufferSize); + + public: + const uint8_t mPlotId; + const uint32_t mSourceRate; + const uint32_t mTargetRate; + const media::TimeUnit mAdjustmentInterval = media::TimeUnit::FromSeconds(1); + const media::TimeUnit mIntegralCapTimeLimit = + media::TimeUnit(10, 1).ToBase(mTargetRate); + + private: + media::TimeUnit mDesiredBuffering; + int32_t mPreviousError = 0; + float mIntegral = 0.0; + Maybe<float> mIntegralCenterForCap; + float mCorrectedTargetRate; + Maybe<int32_t> mLastHysteresisBoundaryCorrection; + media::TimeUnit mDurationWithinHysteresis; + uint32_t mNumCorrectionChanges = 0; + + // An estimate of the source's latency, i.e. callback buffer size, in frames. + RollingMean<media::TimeUnit, media::TimeUnit> mMeasuredSourceLatency; + // An estimate of the target's latency, i.e. callback buffer size, in frames. + RollingMean<media::TimeUnit, media::TimeUnit> mMeasuredTargetLatency; + + media::TimeUnit mTargetClock; + media::TimeUnit mTotalTargetClock; + media::TimeUnit mLastDesiredBufferingChangeTime; +}; + +} // namespace mozilla +#endif // DOM_MEDIA_DRIFTCONTROL_DRIFTCONTROLLER_H_ diff --git a/dom/media/driftcontrol/DynamicResampler.cpp b/dom/media/driftcontrol/DynamicResampler.cpp new file mode 100644 index 0000000000..e6f230278e --- /dev/null +++ b/dom/media/driftcontrol/DynamicResampler.cpp @@ -0,0 +1,284 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "DynamicResampler.h" + +namespace mozilla { + +DynamicResampler::DynamicResampler(uint32_t aInRate, uint32_t aOutRate, + media::TimeUnit aPreBufferDuration) + : mInRate(aInRate), + mPreBufferDuration(aPreBufferDuration), + mOutRate(aOutRate) { + MOZ_ASSERT(aInRate); + MOZ_ASSERT(aOutRate); + MOZ_ASSERT(aPreBufferDuration.IsPositiveOrZero()); + UpdateResampler(mOutRate, STEREO); + mInputStreamFile.Open("DynamicResamplerInFirstChannel", 1, mInRate); + mOutputStreamFile.Open("DynamicResamplerOutFirstChannel", 1, mOutRate); +} + +DynamicResampler::~DynamicResampler() { + if (mResampler) { + speex_resampler_destroy(mResampler); + } +} + +void DynamicResampler::SetSampleFormat(AudioSampleFormat aFormat) { + MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_SILENCE); + MOZ_ASSERT(aFormat == AUDIO_FORMAT_S16 || aFormat == AUDIO_FORMAT_FLOAT32); + + mSampleFormat = aFormat; + for (AudioRingBuffer& b : mInternalInBuffer) { + b.SetSampleFormat(mSampleFormat); + } + + EnsureInputBufferDuration(CalculateInputBufferDuration()); +} + +void DynamicResampler::EnsurePreBuffer(media::TimeUnit aDuration) { + if (mIsPreBufferSet) { + return; + } + + media::TimeUnit buffered(mInternalInBuffer[0].AvailableRead(), mInRate); + if (buffered.IsZero()) { + // Wait for the first input segment before deciding how much to pre-buffer. + // If it is large it indicates high-latency, and the buffer would have to + // handle that. + return; + } + + mIsPreBufferSet = true; + + media::TimeUnit needed = aDuration + mPreBufferDuration; + EnsureInputBufferDuration(needed); + + if (needed > buffered) { + for (auto& b : mInternalInBuffer) { + b.PrependSilence((needed - buffered).ToTicksAtRate(mInRate)); + } + } else if (needed < buffered) { + for (auto& b : mInternalInBuffer) { + b.Discard((buffered - needed).ToTicksAtRate(mInRate)); + } + } +} + +void DynamicResampler::SetPreBufferDuration(media::TimeUnit aDuration) { + MOZ_ASSERT(aDuration.IsPositive()); + mPreBufferDuration = aDuration; +} + +bool DynamicResampler::Resample(float* aOutBuffer, uint32_t aOutFrames, + uint32_t aChannelIndex) { + MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_FLOAT32); + return ResampleInternal(aOutBuffer, aOutFrames, aChannelIndex); +} + +bool DynamicResampler::Resample(int16_t* aOutBuffer, uint32_t aOutFrames, + uint32_t aChannelIndex) { + MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16); + return ResampleInternal(aOutBuffer, aOutFrames, aChannelIndex); +} + +void DynamicResampler::ResampleInternal(const float* aInBuffer, + uint32_t* aInFrames, float* aOutBuffer, + uint32_t* aOutFrames, + uint32_t aChannelIndex) { + MOZ_ASSERT(mResampler); + MOZ_ASSERT(mChannels); + MOZ_ASSERT(mInRate); + MOZ_ASSERT(mOutRate); + + MOZ_ASSERT(aInBuffer); + MOZ_ASSERT(aInFrames); + MOZ_ASSERT(*aInFrames > 0); + MOZ_ASSERT(aOutBuffer); + MOZ_ASSERT(aOutFrames); + MOZ_ASSERT(*aOutFrames > 0); + + MOZ_ASSERT(aChannelIndex <= mChannels); + +#ifdef DEBUG + int rv = +#endif + speex_resampler_process_float(mResampler, aChannelIndex, aInBuffer, + aInFrames, aOutBuffer, aOutFrames); + MOZ_ASSERT(rv == RESAMPLER_ERR_SUCCESS); + + if (aChannelIndex == 0 && !mIsWarmingUp) { + mInputStreamFile.Write(aInBuffer, *aInFrames); + mOutputStreamFile.Write(aOutBuffer, *aOutFrames); + } +} + +void DynamicResampler::ResampleInternal(const int16_t* aInBuffer, + uint32_t* aInFrames, + int16_t* aOutBuffer, + uint32_t* aOutFrames, + uint32_t aChannelIndex) { + MOZ_ASSERT(mResampler); + MOZ_ASSERT(mChannels); + MOZ_ASSERT(mInRate); + MOZ_ASSERT(mOutRate); + + MOZ_ASSERT(aInBuffer); + MOZ_ASSERT(aInFrames); + MOZ_ASSERT(*aInFrames > 0); + MOZ_ASSERT(aOutBuffer); + MOZ_ASSERT(aOutFrames); + MOZ_ASSERT(*aOutFrames > 0); + + MOZ_ASSERT(aChannelIndex <= mChannels); + +#ifdef DEBUG + int rv = +#endif + speex_resampler_process_int(mResampler, aChannelIndex, aInBuffer, + aInFrames, aOutBuffer, aOutFrames); + MOZ_ASSERT(rv == RESAMPLER_ERR_SUCCESS); + + if (aChannelIndex == 0 && !mIsWarmingUp) { + mInputStreamFile.Write(aInBuffer, *aInFrames); + mOutputStreamFile.Write(aOutBuffer, *aOutFrames); + } +} + +void DynamicResampler::UpdateResampler(uint32_t aOutRate, uint32_t aChannels) { + MOZ_ASSERT(aOutRate); + MOZ_ASSERT(aChannels); + + if (mChannels != aChannels) { + if (mResampler) { + speex_resampler_destroy(mResampler); + } + mResampler = speex_resampler_init(aChannels, mInRate, aOutRate, + SPEEX_RESAMPLER_QUALITY_MIN, nullptr); + MOZ_ASSERT(mResampler); + mChannels = aChannels; + mOutRate = aOutRate; + // Between mono and stereo changes, keep always allocated 2 channels to + // avoid reallocations in the most common case. + if ((mChannels == STEREO || mChannels == 1) && + mInternalInBuffer.Length() == STEREO) { + // Don't worry if format is not set it will write silence then. + if ((mSampleFormat == AUDIO_FORMAT_S16 || + mSampleFormat == AUDIO_FORMAT_FLOAT32) && + mChannels == STEREO) { + // The mono channel is always up to date. When we are going from mono + // to stereo upmix the mono to stereo channel + uint32_t bufferedDuration = mInternalInBuffer[0].AvailableRead(); + mInternalInBuffer[1].Clear(); + if (bufferedDuration) { + mInternalInBuffer[1].Write(mInternalInBuffer[0], bufferedDuration); + } + } + // Maintain stereo size + mInputTail.SetLength(STEREO); + WarmUpResampler(false); + return; + } + // upmix or downmix, for now just clear but it has to be updated + // because allocates and this is executed in audio thread. + mInternalInBuffer.Clear(); + for (uint32_t i = 0; i < mChannels; ++i) { + AudioRingBuffer* b = mInternalInBuffer.AppendElement(0); + + if (mSampleFormat != AUDIO_FORMAT_SILENCE) { + // In ctor this update is not needed + b->SetSampleFormat(mSampleFormat); + } + } + media::TimeUnit d = mSetBufferDuration; + mSetBufferDuration = media::TimeUnit::Zero(); + EnsureInputBufferDuration(d); + mInputTail.SetLength(mChannels); + return; + } + + if (mOutRate != aOutRate) { + // If the rates was the same the resampler was not being used so warm up. + if (mOutRate == mInRate) { + WarmUpResampler(true); + } + +#ifdef DEBUG + int rv = +#endif + speex_resampler_set_rate(mResampler, mInRate, aOutRate); + MOZ_ASSERT(rv == RESAMPLER_ERR_SUCCESS); + mOutRate = aOutRate; + } +} + +void DynamicResampler::WarmUpResampler(bool aSkipLatency) { + MOZ_ASSERT(mInputTail.Length()); + mIsWarmingUp = true; + for (uint32_t i = 0; i < mChannels; ++i) { + if (!mInputTail[i].Length()) { + continue; + } + uint32_t inFrames = mInputTail[i].Length(); + uint32_t outFrames = 5 * TailBuffer::MAXSIZE; // something big + if (mSampleFormat == AUDIO_FORMAT_S16) { + short outBuffer[5 * TailBuffer::MAXSIZE] = {}; + ResampleInternal(mInputTail[i].Buffer<short>(), &inFrames, outBuffer, + &outFrames, i); + MOZ_ASSERT(inFrames == (uint32_t)mInputTail[i].Length()); + } else { + float outBuffer[100] = {}; + ResampleInternal(mInputTail[i].Buffer<float>(), &inFrames, outBuffer, + &outFrames, i); + MOZ_ASSERT(inFrames == (uint32_t)mInputTail[i].Length()); + } + } + if (aSkipLatency) { + int inputLatency = speex_resampler_get_input_latency(mResampler); + MOZ_ASSERT(inputLatency > 0); + uint32_t ratioNum, ratioDen; + speex_resampler_get_ratio(mResampler, &ratioNum, &ratioDen); + // Ratio at this point is one so only skip the input latency. No special + // calculations are needed. + speex_resampler_set_skip_frac_num(mResampler, inputLatency * ratioDen); + } + mIsWarmingUp = false; +} + +void DynamicResampler::AppendInput(Span<const float* const> aInBuffer, + uint32_t aInFrames) { + MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_FLOAT32); + AppendInputInternal(aInBuffer, aInFrames); +} +void DynamicResampler::AppendInput(Span<const int16_t* const> aInBuffer, + uint32_t aInFrames) { + MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16); + AppendInputInternal(aInBuffer, aInFrames); +} + +void DynamicResampler::AppendInputSilence(const uint32_t aInFrames) { + MOZ_ASSERT(aInFrames); + MOZ_ASSERT(mChannels); + MOZ_ASSERT(mInternalInBuffer.Length() >= (uint32_t)mChannels); + for (uint32_t i = 0; i < mChannels; ++i) { + mInternalInBuffer[i].WriteSilence(aInFrames); + } +} + +uint32_t DynamicResampler::InFramesBufferSize() const { + return mSetBufferDuration.ToTicksAtRate(mInRate); +} + +uint32_t DynamicResampler::InFramesBuffered(uint32_t aChannelIndex) const { + MOZ_ASSERT(mChannels); + MOZ_ASSERT(aChannelIndex <= mChannels); + MOZ_ASSERT(aChannelIndex <= mInternalInBuffer.Length()); + if (!mIsPreBufferSet) { + return mPreBufferDuration.ToTicksAtRate(mInRate); + } + return mInternalInBuffer[aChannelIndex].AvailableRead(); +} + +} // namespace mozilla diff --git a/dom/media/driftcontrol/DynamicResampler.h b/dom/media/driftcontrol/DynamicResampler.h new file mode 100644 index 0000000000..c1b9000aa0 --- /dev/null +++ b/dom/media/driftcontrol/DynamicResampler.h @@ -0,0 +1,350 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_ +#define DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_ + +#include "AudioRingBuffer.h" +#include "AudioSegment.h" +#include "TimeUnits.h" +#include "WavDumper.h" + +#include <speex/speex_resampler.h> + +namespace mozilla { + +const uint32_t STEREO = 2; + +/** + * DynamicResampler allows updating on the fly the output sample rate and the + * number of channels. In addition to that, it maintains an internal buffer for + * the input data and allows pre-buffering as well. The Resample() method + * strives to provide the requested number of output frames by using the input + * data including any pre-buffering. If there are fewer frames in the internal + * buffer than is requested, the internal buffer is padded with enough silence + * to allow the requested to be resampled and returned. + * + * Input data buffering makes use of the AudioRingBuffer. The capacity of the + * buffer is initially 100ms of float audio and it is pre-allocated at the + * constructor. Should the input data grow beyond that, the input buffer is + * re-allocated on the fly. In addition to that, due to special feature of + * AudioRingBuffer, no extra copies take place when the input data is fed to the + * resampler. + * + * The sample format must be set before using any method. If the provided sample + * format is of type short the pre-allocated capacity of the input buffer + * becomes 200ms of short audio. + * + * The DynamicResampler is not thread-safe, so all the methods appart from the + * constructor must be called on the same thread. + */ +class DynamicResampler final { + public: + /** + * Provide the initial input and output rate and the amount of pre-buffering. + * The channel count will be set to stereo. Memory allocation will take + * place. The input buffer is non-interleaved. + */ + DynamicResampler( + uint32_t aInRate, uint32_t aOutRate, + media::TimeUnit aPreBufferDuration = media::TimeUnit::Zero()); + ~DynamicResampler(); + + /** + * Set the sample format type to float or short. + */ + void SetSampleFormat(AudioSampleFormat aFormat); + uint32_t GetOutRate() const { return mOutRate; } + uint32_t GetChannels() const { return mChannels; } + + /** + * Append `aInFrames` number of frames from `aInBuffer` to the internal input + * buffer. Memory copy/move takes place. + */ + void AppendInput(Span<const float* const> aInBuffer, uint32_t aInFrames); + void AppendInput(Span<const int16_t* const> aInBuffer, uint32_t aInFrames); + /** + * Append `aInFrames` number of frames of silence to the internal input + * buffer. Memory copy/move takes place. + */ + void AppendInputSilence(const uint32_t aInFrames); + /** + * Return the number of frames the internal input buffer can store. + */ + uint32_t InFramesBufferSize() const; + /** + * Return the number of frames stored in the internal input buffer. + */ + uint32_t InFramesBuffered(uint32_t aChannelIndex) const; + + /** + * Prepends existing input data with a silent pre-buffer if not already done. + * Data will be prepended so that after resampling aOutFrames worth of output + * data, the buffering level will be as close as possible to + * mPreBufferDuration, which is the desired buffering level. + */ + void EnsurePreBuffer(media::TimeUnit aDuration); + + /** + * Set the duration that should be used for pre-buffering. + */ + void SetPreBufferDuration(media::TimeUnit aDuration); + + /* + * Resample as much frames as needed from the internal input buffer to the + * `aOutBuffer` in order to provide all `aOutFrames`. + * + * On first call, prepends the input buffer with silence so that after + * resampling aOutFrames frames of data, the input buffer holds data as close + * as possible to the configured pre-buffer size. + * + * If there are not enough input frames to provide the requested output + * frames, the input buffer is padded with enough silence to allow the + * requested frames to be resampled, and the pre-buffer is reset so that the + * next call will be treated as the first. + * + * Returns true if the internal input buffer underran and had to be padded + * with silence, otherwise false. + */ + bool Resample(float* aOutBuffer, uint32_t aOutFrames, uint32_t aChannelIndex); + bool Resample(int16_t* aOutBuffer, uint32_t aOutFrames, + uint32_t aChannelIndex); + + /** + * Update the output rate or/and the channel count. If a value is not updated + * compared to the current one nothing happens. Changing the `aOutRate` + * results in recalculation in the resampler. Changing `aChannels` results in + * the reallocation of the internal input buffer with the exception of + * changes between mono to stereo and vice versa where no reallocation takes + * place. A stereo internal input buffer is always maintained even if the + * sound is mono. + */ + void UpdateResampler(uint32_t aOutRate, uint32_t aChannels); + + private: + template <typename T> + void AppendInputInternal(Span<const T* const>& aInBuffer, + uint32_t aInFrames) { + MOZ_ASSERT(aInBuffer.Length() == (uint32_t)mChannels); + for (uint32_t i = 0; i < mChannels; ++i) { + PushInFrames(aInBuffer[i], aInFrames, i); + } + } + + void ResampleInternal(const float* aInBuffer, uint32_t* aInFrames, + float* aOutBuffer, uint32_t* aOutFrames, + uint32_t aChannelIndex); + void ResampleInternal(const int16_t* aInBuffer, uint32_t* aInFrames, + int16_t* aOutBuffer, uint32_t* aOutFrames, + uint32_t aChannelIndex); + + template <typename T> + bool ResampleInternal(T* aOutBuffer, uint32_t aOutFrames, + uint32_t aChannelIndex) { + MOZ_ASSERT(mInRate); + MOZ_ASSERT(mOutRate); + MOZ_ASSERT(mChannels); + MOZ_ASSERT(aChannelIndex < mChannels); + MOZ_ASSERT(aChannelIndex < mInternalInBuffer.Length()); + MOZ_ASSERT(aOutFrames); + + if (mInRate == mOutRate) { + bool underrun = false; + if (uint32_t buffered = mInternalInBuffer[aChannelIndex].AvailableRead(); + buffered < aOutFrames) { + underrun = true; + mIsPreBufferSet = false; + mInternalInBuffer[aChannelIndex].WriteSilence(aOutFrames - buffered); + } + DebugOnly<uint32_t> numFramesRead = + mInternalInBuffer[aChannelIndex].Read(Span(aOutBuffer, aOutFrames)); + MOZ_ASSERT(numFramesRead == aOutFrames); + // Workaround to avoid discontinuity when the speex resampler operates + // again. Feed it with the last 20 frames to warm up the internal memory + // of the resampler and then skip memory equals to resampler's input + // latency. + mInputTail[aChannelIndex].StoreTail<T>(aOutBuffer, aOutFrames); + if (aChannelIndex == 0 && !mIsWarmingUp) { + mInputStreamFile.Write(aOutBuffer, aOutFrames); + mOutputStreamFile.Write(aOutBuffer, aOutFrames); + } + return underrun; + } + + uint32_t totalOutFramesNeeded = aOutFrames; + auto resample = [&] { + mInternalInBuffer[aChannelIndex].ReadNoCopy( + [&](const Span<const T>& aInBuffer) -> uint32_t { + if (!totalOutFramesNeeded) { + return 0; + } + uint32_t outFramesResampled = totalOutFramesNeeded; + uint32_t inFrames = aInBuffer.Length(); + ResampleInternal(aInBuffer.data(), &inFrames, aOutBuffer, + &outFramesResampled, aChannelIndex); + aOutBuffer += outFramesResampled; + totalOutFramesNeeded -= outFramesResampled; + mInputTail[aChannelIndex].StoreTail<T>(aInBuffer.To(inFrames)); + return inFrames; + }); + }; + + resample(); + + if (totalOutFramesNeeded == 0) { + return false; + } + + while (totalOutFramesNeeded > 0) { + MOZ_ASSERT(mInternalInBuffer[aChannelIndex].AvailableRead() == 0); + // Round up. + uint32_t totalInFramesNeeded = + ((CheckedUint32(totalOutFramesNeeded) * mInRate + mOutRate - 1) / + mOutRate) + .value(); + mInternalInBuffer[aChannelIndex].WriteSilence(totalInFramesNeeded); + resample(); + } + mIsPreBufferSet = false; + return true; + } + + template <typename T> + void PushInFrames(const T* aInBuffer, const uint32_t aInFrames, + uint32_t aChannelIndex) { + MOZ_ASSERT(aInBuffer); + MOZ_ASSERT(aInFrames); + MOZ_ASSERT(mChannels); + MOZ_ASSERT(aChannelIndex < mChannels); + MOZ_ASSERT(aChannelIndex < mInternalInBuffer.Length()); + EnsureInputBufferDuration(media::TimeUnit( + CheckedInt64(mInternalInBuffer[aChannelIndex].AvailableRead()) + + aInFrames, + mInRate)); + mInternalInBuffer[aChannelIndex].Write(Span(aInBuffer, aInFrames)); + } + + void WarmUpResampler(bool aSkipLatency); + + media::TimeUnit CalculateInputBufferDuration() const { + // Pre-allocate something big, twice the pre-buffer, or at least 100ms. + return std::max(mPreBufferDuration * 2, media::TimeUnit::FromSeconds(0.1)); + } + + bool EnsureInputBufferDuration(media::TimeUnit aDuration) { + if (aDuration <= mSetBufferDuration) { + // Buffer size is sufficient. + return true; + } + + // 5 second cap. + const media::TimeUnit cap = media::TimeUnit::FromSeconds(5); + if (mSetBufferDuration == cap) { + // Already at the cap. + return false; + } + + uint32_t sampleSize = 0; + if (mSampleFormat == AUDIO_FORMAT_FLOAT32) { + sampleSize = sizeof(float); + } else if (mSampleFormat == AUDIO_FORMAT_S16) { + sampleSize = sizeof(short); + } + + if (sampleSize == 0) { + // No sample format set, we wouldn't know how many bytes to allocate. + return true; + } + + // As a backoff strategy, at least double the previous size. + media::TimeUnit duration = mSetBufferDuration * 2; + + if (aDuration > duration) { + // A larger buffer than the normal backoff strategy provides is needed, or + // this is the first time setting the buffer size. Round up to the nearest + // 100ms, some jitter is expected. + duration = aDuration.ToBase<media::TimeUnit::CeilingPolicy>(10); + } + + duration = std::min(cap, duration); + + bool success = true; + for (auto& b : mInternalInBuffer) { + success = success && + b.SetLengthBytes(sampleSize * duration.ToTicksAtRate(mInRate)); + } + + if (success) { + // All buffers have the new size. + mSetBufferDuration = duration; + return true; + } + + const uint32_t sizeInFrames = + static_cast<uint32_t>(mSetBufferDuration.ToTicksAtRate(mInRate)); + // Allocating an input buffer failed. We stick with the old buffer size. + NS_WARNING(nsPrintfCString("Failed to allocate a buffer of %u bytes (%u " + "frames). Expect glitches.", + sampleSize * sizeInFrames, sizeInFrames) + .get()); + for (auto& b : mInternalInBuffer) { + MOZ_ALWAYS_TRUE(b.SetLengthBytes(sampleSize * sizeInFrames)); + } + return false; + } + + public: + const uint32_t mInRate; + + private: + bool mIsPreBufferSet = false; + bool mIsWarmingUp = false; + media::TimeUnit mPreBufferDuration; + media::TimeUnit mSetBufferDuration = media::TimeUnit::Zero(); + uint32_t mChannels = 0; + uint32_t mOutRate; + + AutoTArray<AudioRingBuffer, STEREO> mInternalInBuffer; + + SpeexResamplerState* mResampler = nullptr; + AudioSampleFormat mSampleFormat = AUDIO_FORMAT_SILENCE; + + class TailBuffer { + public: + template <typename T> + T* Buffer() { + return reinterpret_cast<T*>(mBuffer); + } + /* Store the MAXSIZE last elements of the buffer. */ + template <typename T> + void StoreTail(const Span<const T>& aInBuffer) { + StoreTail(aInBuffer.data(), aInBuffer.size()); + } + template <typename T> + void StoreTail(const T* aInBuffer, uint32_t aInFrames) { + if (aInFrames >= MAXSIZE) { + PodCopy(Buffer<T>(), aInBuffer + aInFrames - MAXSIZE, MAXSIZE); + mSize = MAXSIZE; + } else { + PodCopy(Buffer<T>(), aInBuffer, aInFrames); + mSize = aInFrames; + } + } + uint32_t Length() { return mSize; } + static const uint32_t MAXSIZE = 20; + + private: + float mBuffer[MAXSIZE] = {}; + uint32_t mSize = 0; + }; + AutoTArray<TailBuffer, STEREO> mInputTail; + + WavDumper mInputStreamFile; + WavDumper mOutputStreamFile; +}; + +} // namespace mozilla + +#endif // DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_ diff --git a/dom/media/driftcontrol/gtest/TestAudioChunkList.cpp b/dom/media/driftcontrol/gtest/TestAudioChunkList.cpp new file mode 100644 index 0000000000..34848821f5 --- /dev/null +++ b/dom/media/driftcontrol/gtest/TestAudioChunkList.cpp @@ -0,0 +1,226 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "gtest/gtest.h" + +#include "AudioChunkList.h" +#include "nsContentUtils.h" + +using namespace mozilla; + +TEST(TestAudioChunkList, Basic1) +{ + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + AudioChunkList list(256, 2, testPrincipal); + list.SetSampleFormat(AUDIO_FORMAT_FLOAT32); + EXPECT_EQ(list.ChunkCapacity(), 128u); + EXPECT_EQ(list.TotalCapacity(), 256u); + + AudioChunk& c1 = list.GetNext(); + float* c1_ch1 = c1.ChannelDataForWrite<float>(0); + float* c1_ch2 = c1.ChannelDataForWrite<float>(1); + EXPECT_EQ(c1.mPrincipalHandle, testPrincipal); + EXPECT_EQ(c1.mBufferFormat, AUDIO_FORMAT_FLOAT32); + for (uint32_t i = 0; i < list.ChunkCapacity(); ++i) { + c1_ch1[i] = c1_ch2[i] = 0.01f * static_cast<float>(i); + } + AudioChunk& c2 = list.GetNext(); + EXPECT_EQ(c2.mPrincipalHandle, testPrincipal); + EXPECT_EQ(c2.mBufferFormat, AUDIO_FORMAT_FLOAT32); + EXPECT_NE(c1.mBuffer.get(), c2.mBuffer.get()); + AudioChunk& c3 = list.GetNext(); + EXPECT_EQ(c3.mPrincipalHandle, testPrincipal); + EXPECT_EQ(c3.mBufferFormat, AUDIO_FORMAT_FLOAT32); + // Cycle + EXPECT_EQ(c1.mBuffer.get(), c3.mBuffer.get()); + float* c3_ch1 = c3.ChannelDataForWrite<float>(0); + float* c3_ch2 = c3.ChannelDataForWrite<float>(1); + for (uint32_t i = 0; i < list.ChunkCapacity(); ++i) { + EXPECT_FLOAT_EQ(c1_ch1[i], c3_ch1[i]); + EXPECT_FLOAT_EQ(c1_ch2[i], c3_ch2[i]); + } +} + +TEST(TestAudioChunkList, Basic2) +{ + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + AudioChunkList list(256, 2, testPrincipal); + list.SetSampleFormat(AUDIO_FORMAT_S16); + EXPECT_EQ(list.ChunkCapacity(), 256u); + EXPECT_EQ(list.TotalCapacity(), 512u); + + AudioChunk& c1 = list.GetNext(); + EXPECT_EQ(c1.mPrincipalHandle, testPrincipal); + EXPECT_EQ(c1.mBufferFormat, AUDIO_FORMAT_S16); + short* c1_ch1 = c1.ChannelDataForWrite<short>(0); + short* c1_ch2 = c1.ChannelDataForWrite<short>(1); + for (uint32_t i = 0; i < list.ChunkCapacity(); ++i) { + c1_ch1[i] = c1_ch2[i] = static_cast<short>(i); + } + AudioChunk& c2 = list.GetNext(); + EXPECT_EQ(c2.mPrincipalHandle, testPrincipal); + EXPECT_EQ(c2.mBufferFormat, AUDIO_FORMAT_S16); + EXPECT_NE(c1.mBuffer.get(), c2.mBuffer.get()); + AudioChunk& c3 = list.GetNext(); + EXPECT_EQ(c3.mPrincipalHandle, testPrincipal); + EXPECT_EQ(c3.mBufferFormat, AUDIO_FORMAT_S16); + AudioChunk& c4 = list.GetNext(); + EXPECT_EQ(c4.mPrincipalHandle, testPrincipal); + EXPECT_EQ(c4.mBufferFormat, AUDIO_FORMAT_S16); + // Cycle + AudioChunk& c5 = list.GetNext(); + EXPECT_EQ(c5.mPrincipalHandle, testPrincipal); + EXPECT_EQ(c5.mBufferFormat, AUDIO_FORMAT_S16); + EXPECT_EQ(c1.mBuffer.get(), c5.mBuffer.get()); + short* c5_ch1 = c5.ChannelDataForWrite<short>(0); + short* c5_ch2 = c5.ChannelDataForWrite<short>(1); + for (uint32_t i = 0; i < list.ChunkCapacity(); ++i) { + EXPECT_EQ(c1_ch1[i], c5_ch1[i]); + EXPECT_EQ(c1_ch2[i], c5_ch2[i]); + } +} + +TEST(TestAudioChunkList, Basic3) +{ + AudioChunkList list(260, 2, PRINCIPAL_HANDLE_NONE); + list.SetSampleFormat(AUDIO_FORMAT_FLOAT32); + EXPECT_EQ(list.ChunkCapacity(), 128u); + EXPECT_EQ(list.TotalCapacity(), 256u + 128u); + + AudioChunk& c1 = list.GetNext(); + AudioChunk& c2 = list.GetNext(); + EXPECT_NE(c1.mBuffer.get(), c2.mBuffer.get()); + AudioChunk& c3 = list.GetNext(); + EXPECT_NE(c1.mBuffer.get(), c3.mBuffer.get()); + AudioChunk& c4 = list.GetNext(); + EXPECT_EQ(c1.mBuffer.get(), c4.mBuffer.get()); +} + +TEST(TestAudioChunkList, Basic4) +{ + AudioChunkList list(260, 2, PRINCIPAL_HANDLE_NONE); + list.SetSampleFormat(AUDIO_FORMAT_S16); + EXPECT_EQ(list.ChunkCapacity(), 256u); + EXPECT_EQ(list.TotalCapacity(), 512u + 256u); + + AudioChunk& c1 = list.GetNext(); + AudioChunk& c2 = list.GetNext(); + EXPECT_NE(c1.mBuffer.get(), c2.mBuffer.get()); + AudioChunk& c3 = list.GetNext(); + EXPECT_NE(c1.mBuffer.get(), c3.mBuffer.get()); + AudioChunk& c4 = list.GetNext(); + EXPECT_EQ(c1.mBuffer.get(), c4.mBuffer.get()); +} + +TEST(TestAudioChunkList, UpdateChannels) +{ + AudioChunkList list(256, 2, PRINCIPAL_HANDLE_NONE); + list.SetSampleFormat(AUDIO_FORMAT_FLOAT32); + + AudioChunk& c1 = list.GetNext(); + AudioChunk& c2 = list.GetNext(); + EXPECT_EQ(c1.ChannelCount(), 2u); + EXPECT_EQ(c2.ChannelCount(), 2u); + + // Update to Quad + list.Update(4); + + AudioChunk& c3 = list.GetNext(); + AudioChunk& c4 = list.GetNext(); + EXPECT_EQ(c3.ChannelCount(), 4u); + EXPECT_EQ(c4.ChannelCount(), 4u); +} + +TEST(TestAudioChunkList, UpdateBetweenMonoAndStereo) +{ + AudioChunkList list(256, 2, PRINCIPAL_HANDLE_NONE); + list.SetSampleFormat(AUDIO_FORMAT_FLOAT32); + + AudioChunk& c1 = list.GetNext(); + float* c1_ch1 = c1.ChannelDataForWrite<float>(0); + float* c1_ch2 = c1.ChannelDataForWrite<float>(1); + for (uint32_t i = 0; i < list.ChunkCapacity(); ++i) { + c1_ch1[i] = c1_ch2[i] = 0.01f * static_cast<float>(i); + } + + AudioChunk& c2 = list.GetNext(); + EXPECT_EQ(c1.ChannelCount(), 2u); + EXPECT_EQ(c2.ChannelCount(), 2u); + + // Downmix to mono + list.Update(1); + + AudioChunk& c3 = list.GetNext(); + float* c3_ch1 = c3.ChannelDataForWrite<float>(0); + for (uint32_t i = 0; i < list.ChunkCapacity(); ++i) { + EXPECT_FLOAT_EQ(c3_ch1[i], c1_ch1[i]); + } + + AudioChunk& c4 = list.GetNext(); + EXPECT_EQ(c3.ChannelCount(), 1u); + EXPECT_EQ(c4.ChannelCount(), 1u); + EXPECT_EQ(static_cast<SharedChannelArrayBuffer<float>*>(c3.mBuffer.get()) + ->mBuffers[0] + .Length(), + list.ChunkCapacity()); + + // Upmix to stereo + list.Update(2); + + AudioChunk& c5 = list.GetNext(); + AudioChunk& c6 = list.GetNext(); + EXPECT_EQ(c5.ChannelCount(), 2u); + EXPECT_EQ(c6.ChannelCount(), 2u); + EXPECT_EQ(static_cast<SharedChannelArrayBuffer<float>*>(c5.mBuffer.get()) + ->mBuffers[0] + .Length(), + list.ChunkCapacity()); + EXPECT_EQ(static_cast<SharedChannelArrayBuffer<float>*>(c5.mBuffer.get()) + ->mBuffers[1] + .Length(), + list.ChunkCapacity()); + + // Downmix to mono + list.Update(1); + + AudioChunk& c7 = list.GetNext(); + float* c7_ch1 = c7.ChannelDataForWrite<float>(0); + for (uint32_t i = 0; i < list.ChunkCapacity(); ++i) { + EXPECT_FLOAT_EQ(c7_ch1[i], c1_ch1[i]); + } + + AudioChunk& c8 = list.GetNext(); + EXPECT_EQ(c7.ChannelCount(), 1u); + EXPECT_EQ(c8.ChannelCount(), 1u); + EXPECT_EQ(static_cast<SharedChannelArrayBuffer<float>*>(c7.mBuffer.get()) + ->mBuffers[0] + .Length(), + list.ChunkCapacity()); +} + +TEST(TestAudioChunkList, ConsumeAndForget) +{ + AudioSegment s; + AudioChunkList list(256, 2, PRINCIPAL_HANDLE_NONE); + list.SetSampleFormat(AUDIO_FORMAT_FLOAT32); + + AudioChunk& c1 = list.GetNext(); + AudioChunk tmp1 = c1; + s.AppendAndConsumeChunk(std::move(tmp1)); + EXPECT_FALSE(c1.mBuffer.get() == nullptr); + EXPECT_EQ(c1.ChannelData<float>().Length(), 2u); + + AudioChunk& c2 = list.GetNext(); + AudioChunk tmp2 = c2; + s.AppendAndConsumeChunk(std::move(tmp2)); + EXPECT_FALSE(c2.mBuffer.get() == nullptr); + EXPECT_EQ(c2.ChannelData<float>().Length(), 2u); + + s.ForgetUpTo(256); + list.GetNext(); + list.GetNext(); +} diff --git a/dom/media/driftcontrol/gtest/TestAudioDriftCorrection.cpp b/dom/media/driftcontrol/gtest/TestAudioDriftCorrection.cpp new file mode 100644 index 0000000000..c13f443d37 --- /dev/null +++ b/dom/media/driftcontrol/gtest/TestAudioDriftCorrection.cpp @@ -0,0 +1,529 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "gtest/gtest.h" + +#include "AudioDriftCorrection.h" +#include "AudioGenerator.h" +#include "AudioVerifier.h" +#include "nsContentUtils.h" + +using namespace mozilla; + +template <class T> +AudioChunk CreateAudioChunk(uint32_t aFrames, uint32_t aChannels, + AudioSampleFormat aSampleFormat); + +void testAudioCorrection(int32_t aSourceRate, int32_t aTargetRate, + bool aTestMonoToStereoInput = false) { + const uint32_t frequency = 100; + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + AudioDriftCorrection ad(aSourceRate, aTargetRate, testPrincipal); + + uint8_t numChannels = 1; + AudioGenerator<AudioDataValue> tone(numChannels, aSourceRate, frequency); + AudioVerifier<AudioDataValue> inToneVerifier(aSourceRate, frequency); + AudioVerifier<AudioDataValue> outToneVerifier(aTargetRate, frequency); + + // Run for some time: 3 * 5000 = 15000 iterations + for (uint32_t j = 0; j < 3; ++j) { + TrackTime sourceFramesIteration = 0; + TrackTime targetFramesIteration = 0; + + // apply some drift (+/- .2%) + const int8_t additionalDriftFrames = + ((j % 2 == 0) ? aSourceRate : -aSourceRate) * 2 / 1000; + + // If the number of frames before changing channel count (and thereby + // resetting the resampler) is very low, the measured buffering level curve + // may look odd, as each resampler reset will reset the (possibly + // fractional) output frame counter. + const uint32_t numFramesBeforeChangingChannelCount = aSourceRate; + uint32_t numFramesAtCurrentChannelCount = 0; + + // 50 seconds, allows for at least 50 correction changes, to stabilize + // on the current drift. + for (uint32_t n = 0; n < 5000; ++n) { + const TrackTime sourceFrames = + (n + 1) * (aSourceRate + additionalDriftFrames) / 100 - + sourceFramesIteration; + const TrackTime targetFrames = + (n + 1) * aTargetRate / 100 - targetFramesIteration; + AudioSegment inSegment; + if (aTestMonoToStereoInput) { + // Create the input (sine tone) of two chunks. + const TrackTime sourceFramesPart1 = std::min<TrackTime>( + sourceFrames, numFramesBeforeChangingChannelCount - + numFramesAtCurrentChannelCount); + tone.Generate(inSegment, sourceFramesPart1); + numFramesAtCurrentChannelCount += sourceFramesPart1; + if (numFramesBeforeChangingChannelCount == + numFramesAtCurrentChannelCount) { + tone.SetChannelsCount(numChannels = (numChannels % 2) + 1); + numFramesAtCurrentChannelCount = sourceFrames - sourceFramesPart1; + tone.Generate(inSegment, numFramesAtCurrentChannelCount); + } + } else { + // Create the input (sine tone) + tone.Generate(inSegment, sourceFrames); + } + inToneVerifier.AppendData(inSegment); + + // Get the output of the correction + AudioSegment outSegment = ad.RequestFrames(inSegment, targetFrames); + EXPECT_EQ(outSegment.GetDuration(), targetFrames); + for (AudioSegment::ConstChunkIterator ci(outSegment); !ci.IsEnded(); + ci.Next()) { + EXPECT_EQ(ci->mPrincipalHandle, testPrincipal); + } + outToneVerifier.AppendData(outSegment); + sourceFramesIteration += sourceFrames; + targetFramesIteration += targetFrames; + } + } + + // Initial buffering is 50ms, which is then expected to be reduced as the + // drift adaptation stabilizes. + EXPECT_LT(ad.CurrentBuffering(), aSourceRate * 50U / 1000); + // Desired buffering should not go lower than some 130% of the source buffer + // size per-iteration. + EXPECT_GT(ad.CurrentBuffering(), aSourceRate * 10U / 1000); + + EXPECT_EQ(ad.NumUnderruns(), 0U); + + EXPECT_FLOAT_EQ(inToneVerifier.EstimatedFreq(), tone.mFrequency); + EXPECT_EQ(inToneVerifier.PreSilenceSamples(), 0U); + EXPECT_EQ(inToneVerifier.CountDiscontinuities(), 0U); + + EXPECT_NEAR(outToneVerifier.EstimatedFreq(), tone.mFrequency, 1.0f); + // The expected pre-silence is equal to the initial desired buffering (50ms) + // minus what is left after resampling the first input segment. + const auto buffering = media::TimeUnit::FromSeconds(0.05); + const auto sourceStep = + media::TimeUnit(aSourceRate * 1002 / 1000 / 100, aSourceRate); + const auto targetStep = media::TimeUnit(aTargetRate / 100, aTargetRate); + EXPECT_NEAR(static_cast<int64_t>(outToneVerifier.PreSilenceSamples()), + (targetStep + buffering - sourceStep) + .ToBase(aSourceRate) + .ToBase<media::TimeUnit::CeilingPolicy>(aTargetRate) + .ToTicksAtRate(aTargetRate), + 1U); + EXPECT_EQ(outToneVerifier.CountDiscontinuities(), 0U); +} + +TEST(TestAudioDriftCorrection, Basic) +{ + printf("Testing AudioCorrection 48 -> 48\n"); + testAudioCorrection(48000, 48000); + printf("Testing AudioCorrection 48 -> 44.1\n"); + testAudioCorrection(48000, 44100); + printf("Testing AudioCorrection 44.1 -> 48\n"); + testAudioCorrection(44100, 48000); + printf("Testing AudioCorrection 23458 -> 25113\n"); + testAudioCorrection(23458, 25113); +} + +TEST(TestAudioDriftCorrection, MonoToStereoInput) +{ + constexpr bool testMonoToStereoInput = true; + printf("Testing MonoToStereoInput 48 -> 48\n"); + testAudioCorrection(48000, 48000, testMonoToStereoInput); + printf("Testing MonoToStereoInput 48 -> 44.1\n"); + testAudioCorrection(48000, 44100, testMonoToStereoInput); + printf("Testing MonoToStereoInput 44.1 -> 48\n"); + testAudioCorrection(44100, 48000, testMonoToStereoInput); +} + +TEST(TestAudioDriftCorrection, NotEnoughFrames) +{ + const uint32_t frequency = 100; + const uint32_t sampleRateTransmitter = 48000; + const uint32_t sampleRateReceiver = 48000; + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + AudioDriftCorrection ad(sampleRateTransmitter, sampleRateReceiver, + testPrincipal); + const uint32_t targetFrames = sampleRateReceiver / 100; + + AudioGenerator<AudioDataValue> tone(1, sampleRateTransmitter, frequency); + AudioVerifier<AudioDataValue> outToneVerifier(sampleRateReceiver, frequency); + + for (uint32_t i = 0; i < 7; ++i) { + // Input is something small, 10 frames here, in order to dry out fast, + // after 4 iterations (pre-buffer = 2400) + AudioSegment inSegment; + tone.Generate(inSegment, 10); + + AudioSegment outSegment = ad.RequestFrames(inSegment, targetFrames); + EXPECT_EQ(outSegment.GetDuration(), targetFrames); + EXPECT_FALSE(outSegment.IsNull()); + for (AudioSegment::ConstChunkIterator ci(outSegment); !ci.IsEnded(); + ci.Next()) { + if (i < 5) { + if (!ci->IsNull()) { + EXPECT_EQ(ci->mPrincipalHandle, testPrincipal); + } + } + } + + outToneVerifier.AppendData(outSegment); + } + EXPECT_EQ(ad.BufferSize(), 4800U); + EXPECT_EQ(ad.NumUnderruns(), 1u); + EXPECT_EQ(outToneVerifier.CountDiscontinuities(), 1u); +} + +TEST(TestAudioDriftCorrection, CrashInAudioResampler) +{ + const uint32_t sampleRateTransmitter = 48000; + const uint32_t sampleRateReceiver = 48000; + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + AudioDriftCorrection ad(sampleRateTransmitter, sampleRateReceiver, + testPrincipal); + const uint32_t targetFrames = sampleRateReceiver / 100; + + for (uint32_t i = 0; i < 100; ++i) { + AudioChunk chunk = CreateAudioChunk<float>(sampleRateTransmitter / 1000, 1, + AUDIO_FORMAT_FLOAT32); + AudioSegment inSegment; + inSegment.AppendAndConsumeChunk(std::move(chunk)); + + AudioSegment outSegment = ad.RequestFrames(inSegment, targetFrames); + EXPECT_EQ(outSegment.GetDuration(), targetFrames); + for (AudioSegment::ConstChunkIterator ci(outSegment); !ci.IsEnded(); + ci.Next()) { + if (!ci->IsNull()) { // Don't check the data if ad is dried out. + EXPECT_EQ(ci->mPrincipalHandle, testPrincipal); + } + } + } +} + +TEST(TestAudioDriftCorrection, HighLatencyProducerLowLatencyConsumer) +{ + constexpr uint32_t transmitterBlockSize = 2048; + constexpr uint32_t receiverBlockSize = 128; + constexpr uint32_t sampleRate = 48000; + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + AudioDriftCorrection ad(sampleRate, sampleRate, testPrincipal); + + uint32_t numBlocksProduced = 0; + for (uint32_t i = 0; i < (sampleRate / 1000) * 500; i += receiverBlockSize) { + AudioSegment inSegment; + if ((i / transmitterBlockSize) > numBlocksProduced) { + AudioChunk chunk = CreateAudioChunk<float>(transmitterBlockSize, 1, + AUDIO_FORMAT_FLOAT32); + inSegment.AppendAndConsumeChunk(std::move(chunk)); + ++numBlocksProduced; + } + + AudioSegment outSegment = ad.RequestFrames(inSegment, receiverBlockSize); + EXPECT_EQ(outSegment.GetDuration(), receiverBlockSize); + } + + // Input is stable so no corrections should occur. + EXPECT_EQ(ad.NumCorrectionChanges(), 0U); +} + +TEST(TestAudioDriftCorrection, LargerTransmitterBlockSizeThanDesiredBuffering) +{ + constexpr uint32_t transmitterBlockSize = 4096; + constexpr uint32_t receiverBlockSize = 128; + constexpr uint32_t sampleRate = 48000; + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + AudioDriftCorrection ad(sampleRate, sampleRate, testPrincipal); + + uint32_t numBlocksTransmitted = 0; + for (uint32_t i = 0; i < (sampleRate / 1000) * 500; i += receiverBlockSize) { + AudioSegment inSegment; + if (uint32_t currentBlock = i / transmitterBlockSize; + currentBlock > numBlocksTransmitted) { + AudioChunk chunk = CreateAudioChunk<float>(transmitterBlockSize, 1, + AUDIO_FORMAT_FLOAT32); + inSegment.AppendAndConsumeChunk(std::move(chunk)); + numBlocksTransmitted = currentBlock; + } + + AudioSegment outSegment = ad.RequestFrames(inSegment, receiverBlockSize); + EXPECT_EQ(outSegment.GetDuration(), receiverBlockSize); + + if (numBlocksTransmitted > 0) { + EXPECT_GT(ad.CurrentBuffering(), 0U); + } + } + + // Input is stable so no corrections should occur. + EXPECT_EQ(ad.NumCorrectionChanges(), 0U); + // The drift correction buffer size had to be larger than the desired (the + // buffer size is twice the initial buffering level), to accomodate the large + // input block size. + EXPECT_EQ(ad.BufferSize(), 9600U); +} + +TEST(TestAudioDriftCorrection, LargerReceiverBlockSizeThanDesiredBuffering) +{ + constexpr uint32_t transmitterBlockSize = 128; + constexpr uint32_t receiverBlockSize = 4096; + constexpr uint32_t sampleRate = 48000; + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + AudioDriftCorrection ad(sampleRate, sampleRate, testPrincipal); + + for (uint32_t i = 0; i < (sampleRate / 1000) * 500; + i += transmitterBlockSize) { + AudioSegment inSegment; + AudioChunk chunk = + CreateAudioChunk<float>(transmitterBlockSize, 1, AUDIO_FORMAT_FLOAT32); + inSegment.AppendAndConsumeChunk(std::move(chunk)); + + if (i % receiverBlockSize == 0) { + AudioSegment outSegment = ad.RequestFrames(inSegment, receiverBlockSize); + EXPECT_EQ(outSegment.GetDuration(), receiverBlockSize); + } + + if (i >= receiverBlockSize) { + EXPECT_GT(ad.CurrentBuffering(), 0U); + } + } + + // Input is stable so no corrections should occur. + EXPECT_EQ(ad.NumCorrectionChanges(), 0U); + // The drift correction buffer size had to be larger than the desired (the + // buffer size is twice the initial buffering level), to accomodate the large + // input block size that gets buffered in the resampler only when processing + // output. + EXPECT_EQ(ad.BufferSize(), 19200U); +} + +TEST(TestAudioDriftCorrection, DynamicInputBufferSizeChanges) +{ + constexpr uint32_t transmitterBlockSize1 = 2048; + constexpr uint32_t transmitterBlockSize2 = 4096; + constexpr uint32_t receiverBlockSize = 128; + constexpr uint32_t sampleRate = 48000; + constexpr uint32_t frequencyHz = 100; + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + AudioDriftCorrection ad(sampleRate, sampleRate, testPrincipal); + + AudioGenerator<AudioDataValue> tone(1, sampleRate, frequencyHz); + AudioVerifier<AudioDataValue> inToneVerifier(sampleRate, frequencyHz); + AudioVerifier<AudioDataValue> outToneVerifier(sampleRate, frequencyHz); + + TrackTime totalFramesTransmitted = 0; + TrackTime totalFramesReceived = 0; + + const auto produceSomeData = [&](uint32_t aTransmitterBlockSize, + uint32_t aDuration) { + TrackTime transmittedFramesStart = totalFramesTransmitted; + TrackTime receivedFramesStart = totalFramesReceived; + uint32_t numBlocksTransmitted = 0; + for (uint32_t i = 0; i < aDuration; i += receiverBlockSize) { + AudioSegment inSegment; + if (((receivedFramesStart - transmittedFramesStart + i) / + aTransmitterBlockSize) > numBlocksTransmitted) { + tone.Generate(inSegment, aTransmitterBlockSize); + MOZ_ASSERT(!inSegment.IsNull()); + inToneVerifier.AppendData(inSegment); + MOZ_ASSERT(!inSegment.IsNull()); + ++numBlocksTransmitted; + totalFramesTransmitted += aTransmitterBlockSize; + } + + AudioSegment outSegment = ad.RequestFrames(inSegment, receiverBlockSize); + EXPECT_EQ(outSegment.GetDuration(), receiverBlockSize); + outToneVerifier.AppendData(outSegment); + totalFramesReceived += receiverBlockSize; + } + }; + + produceSomeData(transmitterBlockSize1, 5 * sampleRate); + EXPECT_EQ(ad.BufferSize(), 4800U); + // Input is stable so no corrections should occur. + EXPECT_EQ(ad.NumCorrectionChanges(), 0U); + EXPECT_EQ(ad.NumUnderruns(), 0U); + + // Increase input latency. We expect this to underrun, but only once as the + // drift correction adapts its buffer size and desired buffering level. + produceSomeData(transmitterBlockSize2, 10 * sampleRate); + auto numCorrectionChanges = ad.NumCorrectionChanges(); + EXPECT_EQ(ad.NumUnderruns(), 1U); + + // Adapting to the new input block size should have stabilized. + EXPECT_GT(ad.BufferSize(), transmitterBlockSize2); + produceSomeData(transmitterBlockSize2, 10 * sampleRate); + EXPECT_EQ(ad.NumCorrectionChanges(), numCorrectionChanges); + EXPECT_EQ(ad.NumUnderruns(), 1U); + + // Decrease input latency. We expect the drift correction to gradually + // decrease its desired buffering level. + produceSomeData(transmitterBlockSize1, 100 * sampleRate); + numCorrectionChanges = ad.NumCorrectionChanges(); + EXPECT_EQ(ad.NumUnderruns(), 1U); + + // Adapting to the new input block size should have stabilized. + EXPECT_EQ(ad.BufferSize(), 9600U); + produceSomeData(transmitterBlockSize1, 20 * sampleRate); + EXPECT_NEAR(ad.NumCorrectionChanges(), numCorrectionChanges, 1U); + EXPECT_EQ(ad.NumUnderruns(), 1U); + + EXPECT_NEAR(inToneVerifier.EstimatedFreq(), tone.mFrequency, 1.0f); + EXPECT_EQ(inToneVerifier.PreSilenceSamples(), 0U); + EXPECT_EQ(inToneVerifier.CountDiscontinuities(), 0U); + + EXPECT_NEAR(outToneVerifier.EstimatedFreq(), tone.mFrequency, 1.0f); + // The expected pre-silence is equal to the desired buffering plus what's + // needed to resample the first input segment. + EXPECT_EQ(outToneVerifier.PreSilenceSamples(), 2528U); + // One mid-stream period of silence from increasing the input buffer size, + // causing an underrun. Counts as two discontinuities. + EXPECT_EQ(outToneVerifier.CountDiscontinuities(), 2U); +} + +/** + * This is helpful to run together with + * MOZ_LOG=raw,DriftControllerGraphs:5 MOZ_LOG_FILE=./plot_values.csv + * to be able to plot the step response of a change in source clock rate (i.e. + * drift). Useful for calculating and verifying PID coefficients. + */ +TEST(TestAudioDriftCorrection, DriftStepResponse) +{ + constexpr uint32_t nominalRate = 48000; + constexpr uint32_t interval = nominalRate; + constexpr uint32_t inputRate = nominalRate * 1005 / 1000; // 0.5% drift + constexpr uint32_t inputInterval = inputRate; + constexpr uint32_t iterations = 200; + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + AudioGenerator<AudioDataValue> tone(1, nominalRate, 440); + AudioDriftCorrection ad(nominalRate, nominalRate, testPrincipal); + for (uint32_t i = 0; i < interval * iterations; i += interval / 100) { + AudioSegment inSegment; + tone.Generate(inSegment, inputInterval / 100); + ad.RequestFrames(inSegment, interval / 100); + } + + EXPECT_EQ(ad.BufferSize(), 4800U); + EXPECT_EQ(ad.NumUnderruns(), 0u); +} + +/** + * Similar to DriftStepResponse but will underrun to allow testing the underrun + * handling. This is helpful to run together with + * MOZ_LOG=raw,DriftControllerGraphs:5 MOZ_LOG_FILE=./plot_values.csv + */ +TEST(TestAudioDriftCorrection, DriftStepResponseUnderrun) +{ + constexpr uint32_t nominalRate = 48000; + constexpr uint32_t interval = nominalRate; + constexpr uint32_t iterations = 200; + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + uint32_t inputRate = nominalRate * 1005 / 1000; // 0.5% drift + uint32_t inputInterval = inputRate; + AudioGenerator<AudioDataValue> tone(1, nominalRate, 440); + AudioDriftCorrection ad(nominalRate, nominalRate, testPrincipal); + for (uint32_t i = 0; i < interval * iterations; i += interval / 100) { + AudioSegment inSegment; + tone.Generate(inSegment, inputInterval / 100); + ad.RequestFrames(inSegment, interval / 100); + } + + inputRate = nominalRate * 998 / 1000; // -0.2% drift + inputInterval = inputRate; + for (uint32_t i = 0; i < interval * iterations; i += interval / 100) { + AudioSegment inSegment; + tone.Generate(inSegment, inputInterval / 100); + ad.RequestFrames(inSegment, interval / 100); + } + + EXPECT_EQ(ad.BufferSize(), 4800U); + EXPECT_EQ(ad.NumUnderruns(), 1u); +} + +/** + * Similar to DriftStepResponse but with a high-latency input, and will underrun + * to allow testing the underrun handling. This is helpful to run together with + * MOZ_LOG=raw,DriftControllerGraphs:5 MOZ_LOG_FILE=./plot_values.csv + */ +TEST(TestAudioDriftCorrection, DriftStepResponseUnderrunHighLatencyInput) +{ + constexpr uint32_t nominalRate = 48000; + constexpr uint32_t interval = nominalRate; + constexpr uint32_t iterations = 200; + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + uint32_t inputRate = nominalRate * 1005 / 1000; // 0.5% drift + uint32_t inputInterval = inputRate; + AudioGenerator<AudioDataValue> tone(1, nominalRate, 440); + AudioDriftCorrection ad(nominalRate, nominalRate, testPrincipal); + for (uint32_t i = 0; i < interval * iterations; i += interval / 100) { + AudioSegment inSegment; + if (i > 0 && i % interval == 0) { + tone.Generate(inSegment, inputInterval); + } + ad.RequestFrames(inSegment, interval / 100); + } + + inputRate = nominalRate * 995 / 1000; // -0.5% drift + inputInterval = inputRate; + for (uint32_t i = 0; i < interval * iterations; i += interval / 100) { + AudioSegment inSegment; + if (i > 0 && i % interval == 0) { + tone.Generate(inSegment, inputInterval); + } + ad.RequestFrames(inSegment, interval / 100); + } + + EXPECT_EQ(ad.BufferSize(), 220800U); + EXPECT_EQ(ad.NumUnderruns(), 1u); +} + +/** + * Similar to DriftStepResponse but with a high-latency input, and will overrun + * (input callback buffer is larger than AudioDriftCorrection's ring buffer for + * input data) to allow testing the overrun handling. This is helpful to run + * together with + * MOZ_LOG=raw,DriftControllerGraphs:5 MOZ_LOG_FILE=./plot_values.csv + */ +TEST(TestAudioDriftCorrection, DriftStepResponseOverrun) +{ + constexpr uint32_t nominalRate = 48000; + constexpr uint32_t interval = nominalRate; + constexpr uint32_t inputRate = nominalRate * 1005 / 1000; // 0.5% drift + constexpr uint32_t inputInterval = inputRate; + constexpr uint32_t iterations = 200; + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + + AudioGenerator<AudioDataValue> tone(1, nominalRate, 440); + AudioDriftCorrection ad(nominalRate, nominalRate, testPrincipal); + + for (uint32_t i = 0; i < interval * iterations; i += interval / 100) { + AudioSegment inSegment; + tone.Generate(inSegment, inputInterval / 100); + ad.RequestFrames(inSegment, interval / 100); + } + + // Change input callbacks to 2000ms (+0.5% drift) = 48200 frames, which will + // overrun the ring buffer. + for (uint32_t i = 0; i < interval * iterations; i += interval / 100) { + AudioSegment inSegment; + if (i > 0 && i % interval == 0) { + // This simulates the input stream latency increasing externally. It's + // building up a second worth of data before the next callback. This also + // causes an underrun. + tone.Generate(inSegment, inputInterval); + } + ad.RequestFrames(inSegment, interval / 100); + } + + EXPECT_EQ(ad.BufferSize(), 105600U); + EXPECT_EQ(ad.NumUnderruns(), 1u); +} diff --git a/dom/media/driftcontrol/gtest/TestAudioResampler.cpp b/dom/media/driftcontrol/gtest/TestAudioResampler.cpp new file mode 100644 index 0000000000..f04bc87314 --- /dev/null +++ b/dom/media/driftcontrol/gtest/TestAudioResampler.cpp @@ -0,0 +1,677 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "gtest/gtest.h" + +#include "AudioResampler.h" +#include "nsContentUtils.h" + +using namespace mozilla; + +template <class T> +AudioChunk CreateAudioChunk(uint32_t aFrames, uint32_t aChannels, + AudioSampleFormat aSampleFormat) { + AudioChunk chunk; + nsTArray<nsTArray<T>> buffer; + buffer.AppendElements(aChannels); + + nsTArray<const T*> bufferPtrs; + bufferPtrs.AppendElements(aChannels); + + for (uint32_t i = 0; i < aChannels; ++i) { + T* ptr = buffer[i].AppendElements(aFrames); + bufferPtrs[i] = ptr; + for (uint32_t j = 0; j < aFrames; ++j) { + if (aSampleFormat == AUDIO_FORMAT_FLOAT32) { + ptr[j] = 0.01 * j; + } else { + ptr[j] = j; + } + } + } + + chunk.mBuffer = new mozilla::SharedChannelArrayBuffer(std::move(buffer)); + chunk.mBufferFormat = aSampleFormat; + chunk.mChannelData.AppendElements(aChannels); + for (uint32_t i = 0; i < aChannels; ++i) { + chunk.mChannelData[i] = bufferPtrs[i]; + } + chunk.mDuration = aFrames; + return chunk; +} + +template <class T> +AudioSegment CreateAudioSegment(uint32_t aFrames, uint32_t aChannels, + AudioSampleFormat aSampleFormat) { + AudioSegment segment; + AudioChunk chunk = CreateAudioChunk<T>(aFrames, aChannels, aSampleFormat); + segment.AppendAndConsumeChunk(std::move(chunk)); + return segment; +} + +TEST(TestAudioResampler, OutAudioSegment_Float) +{ + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + + uint32_t in_frames = 10; + uint32_t out_frames = 40; + uint32_t channels = 2; + uint32_t in_rate = 24000; + uint32_t out_rate = 48000; + + uint32_t pre_buffer = 21; + + AudioResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate), + testPrincipal); + + AudioSegment inSegment = + CreateAudioSegment<float>(in_frames, channels, AUDIO_FORMAT_FLOAT32); + dr.AppendInput(inSegment); + + out_frames = 20u; + bool hasUnderrun = false; + AudioSegment s = dr.Resample(out_frames, &hasUnderrun); + EXPECT_FALSE(hasUnderrun); + EXPECT_EQ(s.GetDuration(), 20); + EXPECT_EQ(s.GetType(), MediaSegment::AUDIO); + + for (AudioSegment::ChunkIterator ci(s); !ci.IsEnded(); ci.Next()) { + AudioChunk& c = *ci; + EXPECT_EQ(c.mPrincipalHandle, testPrincipal); + EXPECT_EQ(c.ChannelCount(), 2u); + for (uint32_t i = 0; i < out_frames; ++i) { + // The first input segment is part of the pre buffer, so 21-10=11 of the + // input is silence. They make up 22 silent output frames after + // resampling. + EXPECT_FLOAT_EQ(c.ChannelData<float>()[0][i], 0.0); + EXPECT_FLOAT_EQ(c.ChannelData<float>()[1][i], 0.0); + } + } + + // Update out rate + out_rate = 44100; + dr.UpdateOutRate(out_rate); + out_frames = in_frames * out_rate / in_rate; + EXPECT_EQ(out_frames, 18u); + // Even if we provide no input if we have enough buffered input, we can create + // output + hasUnderrun = false; + AudioSegment s1 = dr.Resample(out_frames, &hasUnderrun); + EXPECT_FALSE(hasUnderrun); + EXPECT_EQ(s1.GetDuration(), out_frames); + EXPECT_EQ(s1.GetType(), MediaSegment::AUDIO); + for (AudioSegment::ConstChunkIterator ci(s1); !ci.IsEnded(); ci.Next()) { + EXPECT_EQ(ci->mPrincipalHandle, testPrincipal); + } +} + +TEST(TestAudioResampler, OutAudioSegment_Short) +{ + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + + uint32_t in_frames = 10; + uint32_t out_frames = 40; + uint32_t channels = 2; + uint32_t in_rate = 24000; + uint32_t out_rate = 48000; + + uint32_t pre_buffer = 21; + + AudioResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate), + testPrincipal); + + AudioSegment inSegment = + CreateAudioSegment<short>(in_frames, channels, AUDIO_FORMAT_S16); + dr.AppendInput(inSegment); + + out_frames = 20u; + bool hasUnderrun = false; + AudioSegment s = dr.Resample(out_frames, &hasUnderrun); + EXPECT_FALSE(hasUnderrun); + EXPECT_EQ(s.GetDuration(), 20); + EXPECT_EQ(s.GetType(), MediaSegment::AUDIO); + + for (AudioSegment::ChunkIterator ci(s); !ci.IsEnded(); ci.Next()) { + AudioChunk& c = *ci; + EXPECT_EQ(c.mPrincipalHandle, testPrincipal); + EXPECT_EQ(c.ChannelCount(), 2u); + for (uint32_t i = 0; i < out_frames; ++i) { + // The first input segment is part of the pre buffer, so 21-10=11 of the + // input is silence. They make up 22 silent output frames after + // resampling. + EXPECT_FLOAT_EQ(c.ChannelData<short>()[0][i], 0.0); + EXPECT_FLOAT_EQ(c.ChannelData<short>()[1][i], 0.0); + } + } + + // Update out rate + out_rate = 44100; + dr.UpdateOutRate(out_rate); + out_frames = in_frames * out_rate / in_rate; + EXPECT_EQ(out_frames, 18u); + // Even if we provide no input if we have enough buffered input, we can create + // output + hasUnderrun = false; + AudioSegment s1 = dr.Resample(out_frames, &hasUnderrun); + EXPECT_FALSE(hasUnderrun); + EXPECT_EQ(s1.GetDuration(), out_frames); + EXPECT_EQ(s1.GetType(), MediaSegment::AUDIO); + for (AudioSegment::ConstChunkIterator ci(s1); !ci.IsEnded(); ci.Next()) { + EXPECT_EQ(ci->mPrincipalHandle, testPrincipal); + } +} + +TEST(TestAudioResampler, OutAudioSegmentLargerThanResampledInput_Float) +{ + const uint32_t in_frames = 130; + const uint32_t out_frames = 300; + uint32_t channels = 2; + uint32_t in_rate = 24000; + uint32_t out_rate = 48000; + + uint32_t pre_buffer = 5; + + AudioResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate), + PRINCIPAL_HANDLE_NONE); + + AudioSegment inSegment = + CreateAudioSegment<float>(in_frames, channels, AUDIO_FORMAT_FLOAT32); + + // Set the pre-buffer. + dr.AppendInput(inSegment); + bool hasUnderrun = false; + AudioSegment s = dr.Resample(300, &hasUnderrun); + EXPECT_FALSE(hasUnderrun); + EXPECT_EQ(s.GetDuration(), 300); + EXPECT_EQ(s.GetType(), MediaSegment::AUDIO); + + dr.AppendInput(inSegment); + + AudioSegment s2 = dr.Resample(out_frames, &hasUnderrun); + EXPECT_TRUE(hasUnderrun); + EXPECT_EQ(s2.GetDuration(), 300); + EXPECT_EQ(s2.GetType(), MediaSegment::AUDIO); +} + +TEST(TestAudioResampler, InAudioSegment_Float) +{ + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + + uint32_t in_frames = 10; + uint32_t out_frames = 20; + uint32_t channels = 2; + uint32_t in_rate = 24000; + uint32_t out_rate = 48000; + + uint32_t pre_buffer = 10; + AudioResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate), + testPrincipal); + + AudioSegment inSegment; + + AudioChunk chunk1; + chunk1.SetNull(in_frames / 2); + inSegment.AppendAndConsumeChunk(std::move(chunk1)); + + AudioChunk chunk2; + nsTArray<nsTArray<float>> buffer; + buffer.AppendElements(channels); + + nsTArray<const float*> bufferPtrs; + bufferPtrs.AppendElements(channels); + + for (uint32_t i = 0; i < channels; ++i) { + float* ptr = buffer[i].AppendElements(5); + bufferPtrs[i] = ptr; + for (uint32_t j = 0; j < 5; ++j) { + ptr[j] = 0.01f * j; + } + } + + chunk2.mBuffer = new mozilla::SharedChannelArrayBuffer(std::move(buffer)); + chunk2.mBufferFormat = AUDIO_FORMAT_FLOAT32; + chunk2.mChannelData.AppendElements(channels); + for (uint32_t i = 0; i < channels; ++i) { + chunk2.mChannelData[i] = bufferPtrs[i]; + } + chunk2.mDuration = in_frames / 2; + inSegment.AppendAndConsumeChunk(std::move(chunk2)); + + dr.AppendInput(inSegment); + bool hasUnderrun = false; + AudioSegment outSegment = dr.Resample(out_frames, &hasUnderrun); + EXPECT_FALSE(hasUnderrun); + // inSegment contains 10 frames, 5 null, 5 non-null. They're part of the pre + // buffer which is 10, meaning there are no extra pre buffered silence frames. + EXPECT_EQ(outSegment.GetDuration(), out_frames); + EXPECT_EQ(outSegment.MaxChannelCount(), 2u); + + // Add another 5 null and 5 non-null frames. + dr.AppendInput(inSegment); + AudioSegment outSegment2 = dr.Resample(out_frames, &hasUnderrun); + EXPECT_FALSE(hasUnderrun); + EXPECT_EQ(outSegment2.GetDuration(), out_frames); + EXPECT_EQ(outSegment2.MaxChannelCount(), 2u); + for (AudioSegment::ConstChunkIterator ci(outSegment2); !ci.IsEnded(); + ci.Next()) { + EXPECT_EQ(ci->mPrincipalHandle, testPrincipal); + } +} + +TEST(TestAudioResampler, InAudioSegment_Short) +{ + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + + uint32_t in_frames = 10; + uint32_t out_frames = 20; + uint32_t channels = 2; + uint32_t in_rate = 24000; + uint32_t out_rate = 48000; + + uint32_t pre_buffer = 10; + AudioResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate), + testPrincipal); + + AudioSegment inSegment; + + // The null chunk at the beginning will be ignored. + AudioChunk chunk1; + chunk1.SetNull(in_frames / 2); + inSegment.AppendAndConsumeChunk(std::move(chunk1)); + + AudioChunk chunk2; + nsTArray<nsTArray<short>> buffer; + buffer.AppendElements(channels); + + nsTArray<const short*> bufferPtrs; + bufferPtrs.AppendElements(channels); + + for (uint32_t i = 0; i < channels; ++i) { + short* ptr = buffer[i].AppendElements(5); + bufferPtrs[i] = ptr; + for (uint32_t j = 0; j < 5; ++j) { + ptr[j] = j; + } + } + + chunk2.mBuffer = new mozilla::SharedChannelArrayBuffer(std::move(buffer)); + chunk2.mBufferFormat = AUDIO_FORMAT_S16; + chunk2.mChannelData.AppendElements(channels); + for (uint32_t i = 0; i < channels; ++i) { + chunk2.mChannelData[i] = bufferPtrs[i]; + } + chunk2.mDuration = in_frames / 2; + inSegment.AppendAndConsumeChunk(std::move(chunk2)); + + dr.AppendInput(inSegment); + bool hasUnderrun = false; + AudioSegment outSegment = dr.Resample(out_frames, &hasUnderrun); + EXPECT_FALSE(hasUnderrun); + // inSegment contains 10 frames, 5 null, 5 non-null. They're part of the pre + // buffer which is 10, meaning there are no extra pre buffered silence frames. + EXPECT_EQ(outSegment.GetDuration(), out_frames); + EXPECT_EQ(outSegment.MaxChannelCount(), 2u); + + // Add another 5 null and 5 non-null frames. + dr.AppendInput(inSegment); + AudioSegment outSegment2 = dr.Resample(out_frames, &hasUnderrun); + EXPECT_FALSE(hasUnderrun); + EXPECT_EQ(outSegment2.GetDuration(), out_frames); + EXPECT_EQ(outSegment2.MaxChannelCount(), 2u); + for (AudioSegment::ConstChunkIterator ci(outSegment2); !ci.IsEnded(); + ci.Next()) { + EXPECT_EQ(ci->mPrincipalHandle, testPrincipal); + } +} + +TEST(TestAudioResampler, ChannelChange_MonoToStereo) +{ + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + + uint32_t in_frames = 10; + uint32_t out_frames = 40; + uint32_t in_rate = 24000; + uint32_t out_rate = 48000; + + uint32_t pre_buffer = 0; + + AudioResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate), + testPrincipal); + + AudioChunk monoChunk = + CreateAudioChunk<float>(in_frames, 1, AUDIO_FORMAT_FLOAT32); + AudioChunk stereoChunk = + CreateAudioChunk<float>(in_frames, 2, AUDIO_FORMAT_FLOAT32); + + AudioSegment inSegment; + inSegment.AppendAndConsumeChunk(std::move(monoChunk)); + inSegment.AppendAndConsumeChunk(std::move(stereoChunk)); + dr.AppendInput(inSegment); + + bool hasUnderrun = false; + AudioSegment s = dr.Resample(out_frames, &hasUnderrun); + EXPECT_FALSE(hasUnderrun); + EXPECT_EQ(s.GetDuration(), 40); + EXPECT_EQ(s.GetType(), MediaSegment::AUDIO); + EXPECT_EQ(s.MaxChannelCount(), 2u); + for (AudioSegment::ConstChunkIterator ci(s); !ci.IsEnded(); ci.Next()) { + EXPECT_EQ(ci->mPrincipalHandle, testPrincipal); + } +} + +TEST(TestAudioResampler, ChannelChange_StereoToMono) +{ + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + + uint32_t in_frames = 10; + uint32_t out_frames = 40; + uint32_t in_rate = 24000; + uint32_t out_rate = 48000; + + uint32_t pre_buffer = 0; + + AudioResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate), + testPrincipal); + + AudioChunk monoChunk = + CreateAudioChunk<float>(in_frames, 1, AUDIO_FORMAT_FLOAT32); + AudioChunk stereoChunk = + CreateAudioChunk<float>(in_frames, 2, AUDIO_FORMAT_FLOAT32); + + AudioSegment inSegment; + inSegment.AppendAndConsumeChunk(std::move(stereoChunk)); + inSegment.AppendAndConsumeChunk(std::move(monoChunk)); + dr.AppendInput(inSegment); + + bool hasUnderrun = false; + AudioSegment s = dr.Resample(out_frames, &hasUnderrun); + EXPECT_FALSE(hasUnderrun); + EXPECT_EQ(s.GetDuration(), 40); + EXPECT_EQ(s.GetType(), MediaSegment::AUDIO); + EXPECT_EQ(s.MaxChannelCount(), 1u); + for (AudioSegment::ConstChunkIterator ci(s); !ci.IsEnded(); ci.Next()) { + EXPECT_EQ(ci->mPrincipalHandle, testPrincipal); + } +} + +TEST(TestAudioResampler, ChannelChange_StereoToQuad) +{ + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + + uint32_t in_frames = 10; + uint32_t out_frames = 40; + uint32_t in_rate = 24000; + uint32_t out_rate = 48000; + + uint32_t pre_buffer = 0; + + AudioResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate), + testPrincipal); + + AudioChunk stereoChunk = + CreateAudioChunk<float>(in_frames, 2, AUDIO_FORMAT_FLOAT32); + AudioChunk quadChunk = + CreateAudioChunk<float>(in_frames, 4, AUDIO_FORMAT_FLOAT32); + + AudioSegment inSegment; + inSegment.AppendAndConsumeChunk(std::move(stereoChunk)); + inSegment.AppendAndConsumeChunk(std::move(quadChunk)); + dr.AppendInput(inSegment); + + bool hasUnderrun = false; + AudioSegment s = dr.Resample(out_frames, &hasUnderrun); + EXPECT_FALSE(hasUnderrun); + EXPECT_EQ(s.GetDuration(), 40u); + EXPECT_EQ(s.GetType(), MediaSegment::AUDIO); + + AudioSegment s2 = dr.Resample(out_frames / 2, &hasUnderrun); + EXPECT_TRUE(hasUnderrun); + EXPECT_EQ(s2.GetDuration(), 20u); + EXPECT_EQ(s2.GetType(), MediaSegment::AUDIO); + for (AudioSegment::ConstChunkIterator ci(s); !ci.IsEnded(); ci.Next()) { + EXPECT_EQ(ci->mPrincipalHandle, testPrincipal); + } +} + +TEST(TestAudioResampler, ChannelChange_QuadToStereo) +{ + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + + uint32_t in_frames = 10; + uint32_t out_frames = 40; + uint32_t in_rate = 24000; + uint32_t out_rate = 48000; + + AudioResampler dr(in_rate, out_rate, media::TimeUnit::Zero(), testPrincipal); + + AudioChunk stereoChunk = + CreateAudioChunk<float>(in_frames, 2, AUDIO_FORMAT_FLOAT32); + AudioChunk quadChunk = + CreateAudioChunk<float>(in_frames, 4, AUDIO_FORMAT_FLOAT32); + + AudioSegment inSegment; + inSegment.AppendAndConsumeChunk(std::move(quadChunk)); + inSegment.AppendAndConsumeChunk(std::move(stereoChunk)); + dr.AppendInput(inSegment); + + bool hasUnderrun = false; + AudioSegment s = dr.Resample(out_frames, &hasUnderrun); + EXPECT_FALSE(hasUnderrun); + EXPECT_EQ(s.GetDuration(), 40u); + EXPECT_EQ(s.GetType(), MediaSegment::AUDIO); + + AudioSegment s2 = dr.Resample(out_frames / 2, &hasUnderrun); + EXPECT_TRUE(hasUnderrun); + EXPECT_EQ(s2.GetDuration(), 20u); + EXPECT_EQ(s2.GetType(), MediaSegment::AUDIO); + for (AudioSegment::ConstChunkIterator ci(s); !ci.IsEnded(); ci.Next()) { + EXPECT_EQ(ci->mPrincipalHandle, testPrincipal); + } +} + +void printAudioSegment(const AudioSegment& segment); + +TEST(TestAudioResampler, ChannelChange_Discontinuity) +{ + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + + uint32_t in_rate = 24000; + uint32_t out_rate = 48000; + + const float amplitude = 0.5; + const float frequency = 200; + const float phase = 0.0; + float time = 0.0; + const float deltaTime = 1.0f / static_cast<float>(in_rate); + + uint32_t in_frames = in_rate / 100; + uint32_t out_frames = out_rate / 100; + AudioResampler dr(in_rate, out_rate, media::TimeUnit::Zero(), testPrincipal); + + AudioChunk monoChunk = + CreateAudioChunk<float>(in_frames, 1, AUDIO_FORMAT_FLOAT32); + for (uint32_t i = 0; i < monoChunk.GetDuration(); ++i) { + double value = amplitude * sin(2 * M_PI * frequency * time + phase); + monoChunk.ChannelDataForWrite<float>(0)[i] = static_cast<float>(value); + time += deltaTime; + } + AudioChunk stereoChunk = + CreateAudioChunk<float>(in_frames, 2, AUDIO_FORMAT_FLOAT32); + for (uint32_t i = 0; i < stereoChunk.GetDuration(); ++i) { + double value = amplitude * sin(2 * M_PI * frequency * time + phase); + stereoChunk.ChannelDataForWrite<float>(0)[i] = static_cast<float>(value); + if (stereoChunk.ChannelCount() == 2) { + stereoChunk.ChannelDataForWrite<float>(1)[i] = value; + } + time += deltaTime; + } + + AudioSegment inSegment; + inSegment.AppendAndConsumeChunk(std::move(stereoChunk)); + // printAudioSegment(inSegment); + + dr.AppendInput(inSegment); + bool hasUnderrun = false; + AudioSegment s = dr.Resample(out_frames, &hasUnderrun); + EXPECT_FALSE(hasUnderrun); + // printAudioSegment(s); + + AudioSegment inSegment2; + inSegment2.AppendAndConsumeChunk(std::move(monoChunk)); + // The resampler here is updated due to the channel change and that creates + // discontinuity. + dr.AppendInput(inSegment2); + AudioSegment s2 = dr.Resample(out_frames, &hasUnderrun); + EXPECT_FALSE(hasUnderrun); + // printAudioSegment(s2); + + EXPECT_EQ(s2.GetDuration(), 480); + EXPECT_EQ(s2.GetType(), MediaSegment::AUDIO); + EXPECT_EQ(s2.MaxChannelCount(), 1u); + for (AudioSegment::ConstChunkIterator ci(s2); !ci.IsEnded(); ci.Next()) { + EXPECT_EQ(ci->mPrincipalHandle, testPrincipal); + } +} + +TEST(TestAudioResampler, ChannelChange_Discontinuity2) +{ + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + + uint32_t in_rate = 24000; + uint32_t out_rate = 48000; + + const float amplitude = 0.5; + const float frequency = 200; + const float phase = 0.0; + float time = 0.0; + const float deltaTime = 1.0f / static_cast<float>(in_rate); + + uint32_t in_frames = in_rate / 100; + uint32_t out_frames = out_rate / 100; + AudioResampler dr(in_rate, out_rate, media::TimeUnit(10, in_rate), + testPrincipal); + + AudioChunk monoChunk = + CreateAudioChunk<float>(in_frames / 2, 1, AUDIO_FORMAT_FLOAT32); + for (uint32_t i = 0; i < monoChunk.GetDuration(); ++i) { + double value = amplitude * sin(2 * M_PI * frequency * time + phase); + monoChunk.ChannelDataForWrite<float>(0)[i] = static_cast<float>(value); + time += deltaTime; + } + AudioChunk stereoChunk = + CreateAudioChunk<float>(in_frames / 2, 2, AUDIO_FORMAT_FLOAT32); + for (uint32_t i = 0; i < stereoChunk.GetDuration(); ++i) { + double value = amplitude * sin(2 * M_PI * frequency * time + phase); + stereoChunk.ChannelDataForWrite<float>(0)[i] = static_cast<float>(value); + if (stereoChunk.ChannelCount() == 2) { + stereoChunk.ChannelDataForWrite<float>(1)[i] = value; + } + time += deltaTime; + } + + AudioSegment inSegment; + inSegment.AppendAndConsumeChunk(std::move(monoChunk)); + inSegment.AppendAndConsumeChunk(std::move(stereoChunk)); + // printAudioSegment(inSegment); + + dr.AppendInput(inSegment); + bool hasUnderrun = false; + AudioSegment s1 = dr.Resample(out_frames, &hasUnderrun); + EXPECT_FALSE(hasUnderrun); + // printAudioSegment(s1); + + EXPECT_EQ(s1.GetDuration(), 480); + EXPECT_EQ(s1.GetType(), MediaSegment::AUDIO); + EXPECT_EQ(s1.MaxChannelCount(), 2u); + for (AudioSegment::ConstChunkIterator ci(s1); !ci.IsEnded(); ci.Next()) { + EXPECT_EQ(ci->mPrincipalHandle, testPrincipal); + } + + // The resampler here is updated due to the channel change and that creates + // discontinuity. + dr.AppendInput(inSegment); + AudioSegment s2 = dr.Resample(out_frames, &hasUnderrun); + EXPECT_FALSE(hasUnderrun); + // printAudioSegment(s2); + + EXPECT_EQ(s2.GetDuration(), 480); + EXPECT_EQ(s2.GetType(), MediaSegment::AUDIO); + EXPECT_EQ(s2.MaxChannelCount(), 2u); + for (AudioSegment::ConstChunkIterator ci(s2); !ci.IsEnded(); ci.Next()) { + EXPECT_EQ(ci->mPrincipalHandle, testPrincipal); + } +} + +TEST(TestAudioResampler, ChannelChange_Discontinuity3) +{ + const PrincipalHandle testPrincipal = + MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); + + uint32_t in_rate = 48000; + uint32_t out_rate = 48000; + + const float amplitude = 0.5; + const float frequency = 200; + const float phase = 0.0; + float time = 0.0; + const float deltaTime = 1.0f / static_cast<float>(in_rate); + + uint32_t in_frames = in_rate / 100; + uint32_t out_frames = out_rate / 100; + AudioResampler dr(in_rate, out_rate, media::TimeUnit(10, in_rate), + testPrincipal); + + AudioChunk stereoChunk = + CreateAudioChunk<float>(in_frames, 2, AUDIO_FORMAT_FLOAT32); + for (uint32_t i = 0; i < stereoChunk.GetDuration(); ++i) { + double value = amplitude * sin(2 * M_PI * frequency * time + phase); + stereoChunk.ChannelDataForWrite<float>(0)[i] = static_cast<float>(value); + if (stereoChunk.ChannelCount() == 2) { + stereoChunk.ChannelDataForWrite<float>(1)[i] = value; + } + time += deltaTime; + } + + AudioSegment inSegment; + inSegment.AppendAndConsumeChunk(std::move(stereoChunk)); + // printAudioSegment(inSegment); + + dr.AppendInput(inSegment); + bool hasUnderrun = false; + AudioSegment s = dr.Resample(out_frames, &hasUnderrun); + EXPECT_FALSE(hasUnderrun); + // printAudioSegment(s); + + EXPECT_EQ(s.GetDuration(), 480); + EXPECT_EQ(s.GetType(), MediaSegment::AUDIO); + EXPECT_EQ(s.MaxChannelCount(), 2u); + + // The resampler here is updated due to the rate change. This is because the + // in and out rate was the same so a pass through logic was used. By updating + // the out rate to something different than the in rate, the resampler will + // start being used and discontinuity will exist. + dr.UpdateOutRate(out_rate + 400); + dr.AppendInput(inSegment); + AudioSegment s2 = dr.Resample(out_frames, &hasUnderrun); + EXPECT_FALSE(hasUnderrun); + // printAudioSegment(s2); + + EXPECT_EQ(s2.GetDuration(), 480); + EXPECT_EQ(s2.GetType(), MediaSegment::AUDIO); + EXPECT_EQ(s2.MaxChannelCount(), 2u); + for (AudioSegment::ConstChunkIterator ci(s2); !ci.IsEnded(); ci.Next()) { + EXPECT_EQ(ci->mPrincipalHandle, testPrincipal); + } +} diff --git a/dom/media/driftcontrol/gtest/TestDriftController.cpp b/dom/media/driftcontrol/gtest/TestDriftController.cpp new file mode 100644 index 0000000000..33486f945f --- /dev/null +++ b/dom/media/driftcontrol/gtest/TestDriftController.cpp @@ -0,0 +1,168 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "gtest/gtest.h" + +#include "DriftController.h" +#include "mozilla/Maybe.h" + +using namespace mozilla; + +TEST(TestDriftController, Basic) +{ + // The buffer level is the only input to the controller logic. + constexpr uint32_t buffered = 5 * 480; + constexpr uint32_t bufferedLow = 3 * 480; + constexpr uint32_t bufferedHigh = 7 * 480; + + DriftController c(48000, 48000, media::TimeUnit::FromSeconds(0.05)); + EXPECT_EQ(c.GetCorrectedTargetRate(), 48000U); + + // The adjustment interval is 1s. + const auto oneSec = media::TimeUnit(48000, 48000); + + c.UpdateClock(oneSec, oneSec, buffered, 0); + EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + + c.UpdateClock(oneSec, oneSec, bufferedLow, 0); + EXPECT_EQ(c.GetCorrectedTargetRate(), 48048u); + + c.UpdateClock(oneSec, oneSec, bufferedHigh, 0); + EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + + c.UpdateClock(oneSec, oneSec, bufferedHigh, 0); + EXPECT_EQ(c.GetCorrectedTargetRate(), 47952u); +} + +TEST(TestDriftController, BasicResampler) +{ + // The buffer level is the only input to the controller logic. + constexpr uint32_t buffered = 5 * 240; + constexpr uint32_t bufferedLow = 3 * 240; + constexpr uint32_t bufferedHigh = 7 * 240; + + DriftController c(24000, 48000, media::TimeUnit::FromSeconds(0.05)); + + // The adjustment interval is 1s. + const auto oneSec = media::TimeUnit(48000, 48000); + + c.UpdateClock(oneSec, oneSec, buffered, 0); + EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + + // low + c.UpdateClock(oneSec, oneSec, bufferedLow, 0); + EXPECT_EQ(c.GetCorrectedTargetRate(), 48048u); + + // high + c.UpdateClock(oneSec, oneSec, bufferedHigh, 0); + EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + + // high + c.UpdateClock(oneSec, oneSec, bufferedHigh, 0); + EXPECT_EQ(c.GetCorrectedTargetRate(), 47964u); +} + +TEST(TestDriftController, BufferedInput) +{ + // The buffer level is the only input to the controller logic. + constexpr uint32_t buffered = 5 * 480; + constexpr uint32_t bufferedLow = 3 * 480; + constexpr uint32_t bufferedHigh = 7 * 480; + + DriftController c(48000, 48000, media::TimeUnit::FromSeconds(0.05)); + EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + + // The adjustment interval is 1s. + const auto oneSec = media::TimeUnit(48000, 48000); + + c.UpdateClock(oneSec, oneSec, buffered, 0); + EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + + // 0 buffered when updating correction + c.UpdateClock(oneSec, oneSec, 0, 0); + EXPECT_EQ(c.GetCorrectedTargetRate(), 48048u); + + c.UpdateClock(oneSec, oneSec, bufferedLow, 0); + EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + + c.UpdateClock(oneSec, oneSec, buffered, 0); + EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + + c.UpdateClock(oneSec, oneSec, bufferedHigh, 0); + EXPECT_EQ(c.GetCorrectedTargetRate(), 47952u); +} + +TEST(TestDriftController, BufferedInputWithResampling) +{ + // The buffer level is the only input to the controller logic. + constexpr uint32_t buffered = 5 * 240; + constexpr uint32_t bufferedLow = 3 * 240; + constexpr uint32_t bufferedHigh = 7 * 240; + + DriftController c(24000, 48000, media::TimeUnit::FromSeconds(0.05)); + EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + + // The adjustment interval is 1s. + const auto oneSec = media::TimeUnit(24000, 24000); + + c.UpdateClock(oneSec, oneSec, buffered, 0); + EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + + // 0 buffered when updating correction + c.UpdateClock(oneSec, oneSec, 0, 0); + EXPECT_EQ(c.GetCorrectedTargetRate(), 48048u); + + c.UpdateClock(oneSec, oneSec, bufferedLow, 0); + EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + + c.UpdateClock(oneSec, oneSec, buffered, 0); + EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + + c.UpdateClock(oneSec, oneSec, bufferedHigh, 0); + EXPECT_EQ(c.GetCorrectedTargetRate(), 47952u); +} + +TEST(TestDriftController, SmallError) +{ + // The buffer level is the only input to the controller logic. + constexpr uint32_t buffered = 5 * 480; + constexpr uint32_t bufferedLow = buffered - 48; + constexpr uint32_t bufferedHigh = buffered + 48; + + DriftController c(48000, 48000, media::TimeUnit::FromSeconds(0.05)); + EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + + // The adjustment interval is 1s. + const auto oneSec = media::TimeUnit(48000, 48000); + + c.UpdateClock(oneSec, oneSec, buffered, 0); + EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + + c.UpdateClock(oneSec, oneSec, bufferedLow, 0); + EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + + c.UpdateClock(oneSec, oneSec, bufferedHigh, 0); + EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + c.UpdateClock(oneSec, oneSec, bufferedHigh, 0); + EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); +} + +TEST(TestDriftController, SmallBufferedFrames) +{ + // The buffer level is the only input to the controller logic. + constexpr uint32_t bufferedLow = 3 * 480; + + DriftController c(48000, 48000, media::TimeUnit::FromSeconds(0.05)); + media::TimeUnit oneSec = media::TimeUnit::FromSeconds(1); + media::TimeUnit hundredMillis = oneSec / 10; + + EXPECT_EQ(c.GetCorrectedTargetRate(), 48000U); + for (uint32_t i = 0; i < 9; ++i) { + c.UpdateClock(hundredMillis, hundredMillis, bufferedLow, 0); + } + EXPECT_EQ(c.GetCorrectedTargetRate(), 48000U); + c.UpdateClock(hundredMillis, hundredMillis, bufferedLow, 0); + EXPECT_EQ(c.GetCorrectedTargetRate(), 48048U); +} diff --git a/dom/media/driftcontrol/gtest/TestDynamicResampler.cpp b/dom/media/driftcontrol/gtest/TestDynamicResampler.cpp new file mode 100644 index 0000000000..fb8ac52ae4 --- /dev/null +++ b/dom/media/driftcontrol/gtest/TestDynamicResampler.cpp @@ -0,0 +1,722 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "gtest/gtest.h" + +#include "DynamicResampler.h" + +using namespace mozilla; + +TEST(TestDynamicResampler, SameRates_Float1) +{ + const uint32_t in_frames = 100; + const uint32_t out_frames = 100; + uint32_t channels = 2; + uint32_t in_rate = 44100; + uint32_t out_rate = 44100; + + DynamicResampler dr(in_rate, out_rate); + dr.SetSampleFormat(AUDIO_FORMAT_FLOAT32); + EXPECT_EQ(dr.GetOutRate(), out_rate); + EXPECT_EQ(dr.GetChannels(), channels); + + // float in_ch1[] = {.1, .2, .3, .4, .5, .6, .7, .8, .9, 1.0}; + // float in_ch2[] = {.1, .2, .3, .4, .5, .6, .7, .8, .9, 1.0}; + float in_ch1[in_frames] = {}; + float in_ch2[in_frames] = {}; + AutoTArray<const float*, 2> in_buffer; + in_buffer.AppendElements(channels); + in_buffer[0] = in_ch1; + in_buffer[1] = in_ch2; + + float out_ch1[out_frames] = {}; + float out_ch2[out_frames] = {}; + + // Warm up with zeros + dr.AppendInput(in_buffer, in_frames); + bool hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_FALSE(hasUnderrun); + for (uint32_t i = 0; i < out_frames; ++i) { + EXPECT_FLOAT_EQ(in_ch1[i], out_ch1[i]); + EXPECT_FLOAT_EQ(in_ch2[i], out_ch2[i]); + } + + // Continue with non zero + for (uint32_t i = 0; i < in_frames; ++i) { + in_ch1[i] = in_ch2[i] = 0.01f * i; + } + dr.AppendInput(in_buffer, in_frames); + hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_FALSE(hasUnderrun); + for (uint32_t i = 0; i < out_frames; ++i) { + EXPECT_FLOAT_EQ(in_ch1[i], out_ch1[i]); + EXPECT_FLOAT_EQ(in_ch2[i], out_ch2[i]); + } + + // No more frames in the input buffer + hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_TRUE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_TRUE(hasUnderrun); +} + +TEST(TestDynamicResampler, SameRates_Short1) +{ + uint32_t in_frames = 2; + uint32_t out_frames = 2; + uint32_t channels = 2; + uint32_t in_rate = 44100; + uint32_t out_rate = 44100; + + DynamicResampler dr(in_rate, out_rate); + dr.SetSampleFormat(AUDIO_FORMAT_S16); + EXPECT_EQ(dr.GetOutRate(), out_rate); + EXPECT_EQ(dr.GetChannels(), channels); + + short in_ch1[] = {1, 2, 3}; + short in_ch2[] = {4, 5, 6}; + AutoTArray<const short*, 2> in_buffer; + in_buffer.AppendElements(channels); + in_buffer[0] = in_ch1; + in_buffer[1] = in_ch2; + + short out_ch1[3] = {}; + short out_ch2[3] = {}; + + dr.AppendInput(in_buffer, in_frames); + bool hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_FALSE(hasUnderrun); + for (uint32_t i = 0; i < out_frames; ++i) { + EXPECT_EQ(in_ch1[i], out_ch1[i]); + EXPECT_EQ(in_ch2[i], out_ch2[i]); + } + + // No more frames in the input buffer + hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_TRUE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_TRUE(hasUnderrun); +} + +TEST(TestDynamicResampler, SameRates_Float2) +{ + uint32_t in_frames = 3; + uint32_t out_frames = 2; + uint32_t channels = 2; + uint32_t in_rate = 44100; + uint32_t out_rate = 44100; + + DynamicResampler dr(in_rate, out_rate); + dr.SetSampleFormat(AUDIO_FORMAT_FLOAT32); + + float in_ch1[] = {0.1, 0.2, 0.3}; + float in_ch2[] = {0.4, 0.5, 0.6}; + AutoTArray<const float*, 2> in_buffer; + in_buffer.AppendElements(channels); + in_buffer[0] = in_ch1; + in_buffer[1] = in_ch2; + + float out_ch1[3] = {}; + float out_ch2[3] = {}; + + dr.AppendInput(in_buffer, in_frames); + bool hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_FALSE(hasUnderrun); + for (uint32_t i = 0; i < out_frames; ++i) { + EXPECT_FLOAT_EQ(in_ch1[i], out_ch1[i]); + EXPECT_FLOAT_EQ(in_ch2[i], out_ch2[i]); + } + + out_frames = 1; + hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_FALSE(hasUnderrun); + for (uint32_t i = 0; i < out_frames; ++i) { + EXPECT_FLOAT_EQ(in_ch1[i + 2], out_ch1[i]); + EXPECT_FLOAT_EQ(in_ch2[i + 2], out_ch2[i]); + } + + // No more frames, the input buffer has drained + hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_TRUE(hasUnderrun); + dr.Resample(out_ch2, out_frames, 1); + EXPECT_TRUE(hasUnderrun); +} + +TEST(TestDynamicResampler, SameRates_Short2) +{ + uint32_t in_frames = 3; + uint32_t out_frames = 2; + uint32_t channels = 2; + uint32_t in_rate = 44100; + uint32_t out_rate = 44100; + + DynamicResampler dr(in_rate, out_rate); + dr.SetSampleFormat(AUDIO_FORMAT_S16); + + short in_ch1[] = {1, 2, 3}; + short in_ch2[] = {4, 5, 6}; + AutoTArray<const short*, 2> in_buffer; + in_buffer.AppendElements(channels); + in_buffer[0] = in_ch1; + in_buffer[1] = in_ch2; + + short out_ch1[3] = {}; + short out_ch2[3] = {}; + + dr.AppendInput(in_buffer, in_frames); + bool hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_FALSE(hasUnderrun); + for (uint32_t i = 0; i < out_frames; ++i) { + EXPECT_EQ(in_ch1[i], out_ch1[i]); + EXPECT_EQ(in_ch2[i], out_ch2[i]); + } + + out_frames = 1; + hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_FALSE(hasUnderrun); + for (uint32_t i = 0; i < out_frames; ++i) { + EXPECT_EQ(in_ch1[i + 2], out_ch1[i]); + EXPECT_EQ(in_ch2[i + 2], out_ch2[i]); + } + + // No more frames, the input buffer has drained + hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_TRUE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_TRUE(hasUnderrun); +} + +TEST(TestDynamicResampler, SameRates_Float3) +{ + uint32_t in_frames = 2; + uint32_t out_frames = 3; + uint32_t channels = 2; + uint32_t in_rate = 44100; + uint32_t out_rate = 44100; + + DynamicResampler dr(in_rate, out_rate); + dr.SetSampleFormat(AUDIO_FORMAT_FLOAT32); + + float in_ch1[] = {0.1, 0.2, 0.3}; + float in_ch2[] = {0.4, 0.5, 0.6}; + AutoTArray<const float*, 2> in_buffer; + in_buffer.AppendElements(channels); + in_buffer[0] = in_ch1; + in_buffer[1] = in_ch2; + + float out_ch1[3] = {}; + float out_ch2[3] = {}; + + // Not enough frames in the input buffer + dr.AppendInput(in_buffer, in_frames); + bool hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_TRUE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_TRUE(hasUnderrun); + + // Add one frame + in_buffer[0] = in_ch1 + 2; + in_buffer[1] = in_ch2 + 2; + dr.AppendInput(in_buffer, 1); + out_frames = 1; + hasUnderrun = dr.Resample(out_ch1 + 2, out_frames, 0); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2 + 2, out_frames, 1); + EXPECT_FALSE(hasUnderrun); + for (uint32_t i = 0; i < 3; ++i) { + EXPECT_FLOAT_EQ(in_ch1[i], out_ch1[i]); + EXPECT_FLOAT_EQ(in_ch2[i], out_ch2[i]); + } +} + +TEST(TestDynamicResampler, SameRates_Short3) +{ + uint32_t in_frames = 2; + uint32_t out_frames = 3; + uint32_t channels = 2; + uint32_t in_rate = 44100; + uint32_t out_rate = 44100; + + DynamicResampler dr(in_rate, out_rate); + dr.SetSampleFormat(AUDIO_FORMAT_S16); + + short in_ch1[] = {1, 2, 3}; + short in_ch2[] = {4, 5, 6}; + AutoTArray<const short*, 2> in_buffer; + in_buffer.AppendElements(channels); + in_buffer[0] = in_ch1; + in_buffer[1] = in_ch2; + + short out_ch1[3] = {}; + short out_ch2[3] = {}; + + // Not enough frames in the input buffer + dr.AppendInput(in_buffer, in_frames); + bool hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_TRUE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_TRUE(hasUnderrun); + + // Add one frame + in_buffer[0] = in_ch1 + 2; + in_buffer[1] = in_ch2 + 2; + dr.AppendInput(in_buffer, 1); + out_frames = 1; + hasUnderrun = dr.Resample(out_ch1 + 2, out_frames, 0); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2 + 2, out_frames, 1); + EXPECT_FALSE(hasUnderrun); + for (uint32_t i = 0; i < 3; ++i) { + EXPECT_EQ(in_ch1[i], out_ch1[i]); + EXPECT_EQ(in_ch2[i], out_ch2[i]); + } +} + +TEST(TestDynamicResampler, UpdateOutRate_Float) +{ + uint32_t in_frames = 10; + uint32_t out_frames = 40; + uint32_t channels = 2; + uint32_t in_rate = 24000; + uint32_t out_rate = 48000; + + uint32_t pre_buffer = 20; + + DynamicResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate)); + dr.SetSampleFormat(AUDIO_FORMAT_FLOAT32); + EXPECT_EQ(dr.GetOutRate(), out_rate); + EXPECT_EQ(dr.GetChannels(), channels); + + float in_ch1[10] = {}; + float in_ch2[10] = {}; + for (uint32_t i = 0; i < in_frames; ++i) { + in_ch1[i] = in_ch2[i] = 0.01f * i; + } + AutoTArray<const float*, 2> in_buffer; + in_buffer.AppendElements(channels); + in_buffer[0] = in_ch1; + in_buffer[1] = in_ch2; + + float out_ch1[40] = {}; + float out_ch2[40] = {}; + + dr.AppendInputSilence(pre_buffer - in_frames); + dr.AppendInput(in_buffer, in_frames); + out_frames = 20u; + bool hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_FALSE(hasUnderrun); + for (uint32_t i = 0; i < out_frames; ++i) { + // Half the input pre-buffer (10) is silence, and half the output (20). + EXPECT_FLOAT_EQ(out_ch1[i], 0.0); + EXPECT_FLOAT_EQ(out_ch2[i], 0.0); + } + + // Update out rate + out_rate = 44100; + dr.UpdateResampler(out_rate, channels); + EXPECT_EQ(dr.GetOutRate(), out_rate); + EXPECT_EQ(dr.GetChannels(), channels); + out_frames = in_frames * out_rate / in_rate; + EXPECT_EQ(out_frames, 18u); + // Even if we provide no input if we have enough buffered input, we can create + // output + hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_FALSE(hasUnderrun); +} + +TEST(TestDynamicResampler, UpdateOutRate_Short) +{ + uint32_t in_frames = 10; + uint32_t out_frames = 40; + uint32_t channels = 2; + uint32_t in_rate = 24000; + uint32_t out_rate = 48000; + + uint32_t pre_buffer = 20; + + DynamicResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate)); + dr.SetSampleFormat(AUDIO_FORMAT_S16); + EXPECT_EQ(dr.GetOutRate(), out_rate); + EXPECT_EQ(dr.GetChannels(), channels); + + short in_ch1[10] = {}; + short in_ch2[10] = {}; + for (uint32_t i = 0; i < in_frames; ++i) { + in_ch1[i] = in_ch2[i] = i; + } + AutoTArray<const short*, 2> in_buffer; + in_buffer.AppendElements(channels); + in_buffer[0] = in_ch1; + in_buffer[1] = in_ch2; + + short out_ch1[40] = {}; + short out_ch2[40] = {}; + + dr.AppendInputSilence(pre_buffer - in_frames); + dr.AppendInput(in_buffer, in_frames); + out_frames = 20u; + bool hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_FALSE(hasUnderrun); + for (uint32_t i = 0; i < out_frames; ++i) { + // Half the input pre-buffer (10) is silence, and half the output (20). + EXPECT_EQ(out_ch1[i], 0.0); + EXPECT_EQ(out_ch2[i], 0.0); + } + + // Update out rate + out_rate = 44100; + dr.UpdateResampler(out_rate, channels); + EXPECT_EQ(dr.GetOutRate(), out_rate); + EXPECT_EQ(dr.GetChannels(), channels); + out_frames = in_frames * out_rate / in_rate; + EXPECT_EQ(out_frames, 18u); + // Even if we provide no input if we have enough buffered input, we can create + // output + hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_FALSE(hasUnderrun); +} + +TEST(TestDynamicResampler, BigRangeOutRates_Float) +{ + uint32_t in_frames = 10; + uint32_t out_frames = 10; + uint32_t channels = 2; + uint32_t in_rate = 44100; + uint32_t out_rate = 44100; + uint32_t pre_buffer = 20; + + DynamicResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate)); + dr.SetSampleFormat(AUDIO_FORMAT_FLOAT32); + + const uint32_t in_capacity = 40; + float in_ch1[in_capacity] = {}; + float in_ch2[in_capacity] = {}; + for (uint32_t i = 0; i < in_capacity; ++i) { + in_ch1[i] = in_ch2[i] = 0.01f * i; + } + AutoTArray<const float*, 2> in_buffer; + in_buffer.AppendElements(channels); + in_buffer[0] = in_ch1; + in_buffer[1] = in_ch2; + + const uint32_t out_capacity = 1000; + float out_ch1[out_capacity] = {}; + float out_ch2[out_capacity] = {}; + + for (uint32_t rate = 10000; rate < 90000; ++rate) { + out_rate = rate; + dr.UpdateResampler(out_rate, channels); + EXPECT_EQ(dr.GetOutRate(), out_rate); + EXPECT_EQ(dr.GetChannels(), channels); + in_frames = 20; // more than we need + out_frames = in_frames * out_rate / in_rate; + for (uint32_t y = 0; y < 2; ++y) { + dr.AppendInput(in_buffer, in_frames); + bool hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_FALSE(hasUnderrun); + } + } +} + +TEST(TestDynamicResampler, BigRangeOutRates_Short) +{ + uint32_t in_frames = 10; + uint32_t out_frames = 10; + uint32_t channels = 2; + uint32_t in_rate = 44100; + uint32_t out_rate = 44100; + uint32_t pre_buffer = 20; + + DynamicResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate)); + dr.SetSampleFormat(AUDIO_FORMAT_S16); + + const uint32_t in_capacity = 40; + short in_ch1[in_capacity] = {}; + short in_ch2[in_capacity] = {}; + for (uint32_t i = 0; i < in_capacity; ++i) { + in_ch1[i] = in_ch2[i] = i; + } + AutoTArray<const short*, 2> in_buffer; + in_buffer.AppendElements(channels); + in_buffer[0] = in_ch1; + in_buffer[1] = in_ch2; + + const uint32_t out_capacity = 1000; + short out_ch1[out_capacity] = {}; + short out_ch2[out_capacity] = {}; + + for (uint32_t rate = 10000; rate < 90000; ++rate) { + out_rate = rate; + dr.UpdateResampler(out_rate, channels); + in_frames = 20; // more than we need + out_frames = in_frames * out_rate / in_rate; + for (uint32_t y = 0; y < 2; ++y) { + dr.AppendInput(in_buffer, in_frames); + bool hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_FALSE(hasUnderrun); + } + } +} + +TEST(TestDynamicResampler, UpdateChannels_Float) +{ + uint32_t in_frames = 10; + uint32_t out_frames = 10; + uint32_t channels = 2; + uint32_t in_rate = 44100; + uint32_t out_rate = 48000; + + DynamicResampler dr(in_rate, out_rate); + dr.SetSampleFormat(AUDIO_FORMAT_FLOAT32); + + float in_ch1[10] = {}; + float in_ch2[10] = {}; + for (uint32_t i = 0; i < in_frames; ++i) { + in_ch1[i] = in_ch2[i] = 0.01f * i; + } + AutoTArray<const float*, 2> in_buffer; + in_buffer.AppendElements(channels); + in_buffer[0] = in_ch1; + in_buffer[1] = in_ch2; + + float out_ch1[10] = {}; + float out_ch2[10] = {}; + + dr.AppendInput(in_buffer, in_frames); + bool hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_FALSE(hasUnderrun); + + // Add 3rd channel + dr.UpdateResampler(out_rate, 3); + EXPECT_EQ(dr.GetOutRate(), out_rate); + EXPECT_EQ(dr.GetChannels(), 3u); + + float in_ch3[10] = {}; + for (uint32_t i = 0; i < in_frames; ++i) { + in_ch3[i] = 0.01f * i; + } + in_buffer.AppendElement(); + in_buffer[2] = in_ch3; + float out_ch3[10] = {}; + + dr.AppendInput(in_buffer, in_frames); + + hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch3, out_frames, 2); + EXPECT_FALSE(hasUnderrun); + + float in_ch4[10] = {}; + for (uint32_t i = 0; i < in_frames; ++i) { + in_ch3[i] = 0.01f * i; + } + in_buffer.AppendElement(); + in_buffer[3] = in_ch4; + float out_ch4[10] = {}; + + dr.UpdateResampler(out_rate, 4); + EXPECT_EQ(dr.GetOutRate(), out_rate); + EXPECT_EQ(dr.GetChannels(), 4u); + dr.AppendInput(in_buffer, in_frames); + + hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch3, out_frames, 2); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch4, out_frames, 3); + EXPECT_FALSE(hasUnderrun); +} + +TEST(TestDynamicResampler, UpdateChannels_Short) +{ + uint32_t in_frames = 10; + uint32_t out_frames = 10; + uint32_t channels = 2; + uint32_t in_rate = 44100; + uint32_t out_rate = 48000; + + DynamicResampler dr(in_rate, out_rate); + dr.SetSampleFormat(AUDIO_FORMAT_S16); + + short in_ch1[10] = {}; + short in_ch2[10] = {}; + for (uint32_t i = 0; i < in_frames; ++i) { + in_ch1[i] = in_ch2[i] = i; + } + AutoTArray<const short*, 2> in_buffer; + in_buffer.AppendElements(channels); + in_buffer[0] = in_ch1; + in_buffer[1] = in_ch2; + + short out_ch1[10] = {}; + short out_ch2[10] = {}; + + dr.AppendInput(in_buffer, in_frames); + bool hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_FALSE(hasUnderrun); + + // Add 3rd channel + dr.UpdateResampler(out_rate, 3); + EXPECT_EQ(dr.GetOutRate(), out_rate); + EXPECT_EQ(dr.GetChannels(), 3u); + + short in_ch3[10] = {}; + for (uint32_t i = 0; i < in_frames; ++i) { + in_ch3[i] = i; + } + in_buffer.AppendElement(); + in_buffer[2] = in_ch3; + short out_ch3[10] = {}; + + dr.AppendInput(in_buffer, in_frames); + + hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch3, out_frames, 2); + EXPECT_FALSE(hasUnderrun); + + // Check update with AudioSegment + short in_ch4[10] = {}; + for (uint32_t i = 0; i < in_frames; ++i) { + in_ch3[i] = i; + } + in_buffer.AppendElement(); + in_buffer[3] = in_ch4; + short out_ch4[10] = {}; + + dr.UpdateResampler(out_rate, 4); + EXPECT_EQ(dr.GetOutRate(), out_rate); + EXPECT_EQ(dr.GetChannels(), 4u); + dr.AppendInput(in_buffer, in_frames); + + hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch3, out_frames, 2); + EXPECT_FALSE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch4, out_frames, 3); + EXPECT_FALSE(hasUnderrun); +} + +TEST(TestDynamicResampler, Underrun) +{ + const uint32_t in_frames = 100; + const uint32_t out_frames = 200; + uint32_t channels = 2; + uint32_t in_rate = 48000; + uint32_t out_rate = 48000; + + DynamicResampler dr(in_rate, out_rate); + dr.SetSampleFormat(AUDIO_FORMAT_FLOAT32); + EXPECT_EQ(dr.GetOutRate(), out_rate); + EXPECT_EQ(dr.GetChannels(), channels); + + float in_ch1[in_frames] = {}; + float in_ch2[in_frames] = {}; + AutoTArray<const float*, 2> in_buffer; + in_buffer.AppendElements(channels); + in_buffer[0] = in_ch1; + in_buffer[1] = in_ch2; + + float out_ch1[out_frames] = {}; + float out_ch2[out_frames] = {}; + + for (uint32_t i = 0; i < in_frames; ++i) { + in_ch1[i] = 0.01f * i; + in_ch2[i] = -0.01f * i; + } + dr.AppendInput(in_buffer, in_frames); + bool hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_TRUE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_TRUE(hasUnderrun); + for (uint32_t i = 0; i < in_frames; ++i) { + EXPECT_EQ(out_ch1[i], in_ch1[i]); + EXPECT_EQ(out_ch2[i], in_ch2[i]); + } + for (uint32_t i = in_frames; i < out_frames; ++i) { + EXPECT_EQ(out_ch1[i], 0.0f) << "for i=" << i; + EXPECT_EQ(out_ch2[i], 0.0f) << "for i=" << i; + } + + // No more frames in the input buffer + hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_TRUE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_TRUE(hasUnderrun); + for (uint32_t i = 0; i < out_frames; ++i) { + EXPECT_EQ(out_ch1[i], 0.0f) << "for i=" << i; + EXPECT_EQ(out_ch2[i], 0.0f) << "for i=" << i; + } + + // Now try with resampling. + dr.UpdateResampler(out_rate / 2, channels); + dr.AppendInput(in_buffer, in_frames); + hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_TRUE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_TRUE(hasUnderrun); + // There is some buffering in the resampler, which is why the below is not + // exact. + for (uint32_t i = 0; i < 50; ++i) { + EXPECT_GT(out_ch1[i], 0.0f) << "for i=" << i; + EXPECT_LT(out_ch2[i], 0.0f) << "for i=" << i; + } + for (uint32_t i = 50; i < 54; ++i) { + EXPECT_NE(out_ch1[i], 0.0f) << "for i=" << i; + EXPECT_NE(out_ch2[i], 0.0f) << "for i=" << i; + } + for (uint32_t i = 54; i < out_frames; ++i) { + EXPECT_EQ(out_ch1[i], 0.0f) << "for i=" << i; + EXPECT_EQ(out_ch2[i], 0.0f) << "for i=" << i; + } + + // No more frames in the input buffer + hasUnderrun = dr.Resample(out_ch1, out_frames, 0); + EXPECT_TRUE(hasUnderrun); + hasUnderrun = dr.Resample(out_ch2, out_frames, 1); + EXPECT_TRUE(hasUnderrun); + for (uint32_t i = 0; i < out_frames; ++i) { + EXPECT_EQ(out_ch1[i], 0.0f) << "for i=" << i; + EXPECT_EQ(out_ch2[i], 0.0f) << "for i=" << i; + } +} diff --git a/dom/media/driftcontrol/gtest/moz.build b/dom/media/driftcontrol/gtest/moz.build new file mode 100644 index 0000000000..d645760841 --- /dev/null +++ b/dom/media/driftcontrol/gtest/moz.build @@ -0,0 +1,21 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +UNIFIED_SOURCES += [ + "TestAudioChunkList.cpp", + "TestAudioDriftCorrection.cpp", + "TestAudioResampler.cpp", + "TestDriftController.cpp", + "TestDynamicResampler.cpp", +] + +LOCAL_INCLUDES += [ + "/dom/media", + "/dom/media/driftcontrol", + "/dom/media/gtest", +] + +FINAL_LIBRARY = "xul-gtest" diff --git a/dom/media/driftcontrol/moz.build b/dom/media/driftcontrol/moz.build new file mode 100644 index 0000000000..f7e8e19f2b --- /dev/null +++ b/dom/media/driftcontrol/moz.build @@ -0,0 +1,30 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +with Files("**"): + BUG_COMPONENT = ("Core", "Audio/Video: MediaStreamGraph") + +TEST_DIRS += [ + "gtest", +] + +EXPORTS += [ + "AudioDriftCorrection.h", +] + +UNIFIED_SOURCES += [ + "AudioChunkList.cpp", + "AudioDriftCorrection.cpp", + "AudioResampler.cpp", + "DriftController.cpp", + "DynamicResampler.cpp", +] + +LOCAL_INCLUDES += [ + "/dom/media", +] + +FINAL_LIBRARY = "xul" diff --git a/dom/media/driftcontrol/plot.py b/dom/media/driftcontrol/plot.py new file mode 100755 index 0000000000..d55c0f7de0 --- /dev/null +++ b/dom/media/driftcontrol/plot.py @@ -0,0 +1,135 @@ +#! /usr/bin/env python3 +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# This scripts plots graphs produced by our drift correction code. +# +# Install dependencies with: +# > pip install bokeh pandas +# +# Generate the csv data file with the DriftControllerGraphs log module: +# > MOZ_LOG=raw,sync,DriftControllerGraphs:5 \ +# > MOZ_LOG_FILE=/tmp/driftcontrol.csv \ +# > ./mach gtest '*AudioDrift*StepResponse' +# +# Generate the graphs with this script: +# > ./dom/media/driftcontrol/plot.py /tmp/driftcontrol.csv.moz_log +# +# The script should produce a file plot.html in the working directory and +# open it in the default browser. + +import argparse +from collections import OrderedDict + +import pandas +from bokeh.io import output_file, show +from bokeh.layouts import gridplot +from bokeh.models import TabPanel, Tabs +from bokeh.plotting import figure + + +def main(): + parser = argparse.ArgumentParser( + prog="plot.py for DriftControllerGraphs", + description="""Takes a csv file of DriftControllerGraphs data +(from a single DriftController instance) and plots +them into plot.html in the current working directory. + +The easiest way to produce the data is with MOZ_LOG: +MOZ_LOG=raw,sync,DriftControllerGraphs:5 \ +MOZ_LOG_FILE=/tmp/driftcontrol.csv \ +./mach gtest '*AudioDrift*StepResponse'""", + ) + parser.add_argument("csv_file", type=str) + args = parser.parse_args() + + all_df = pandas.read_csv(args.csv_file) + + # Filter on distinct ids to support multiple plotting sources + tabs = [] + for id in list(OrderedDict.fromkeys(all_df["id"])): + df = all_df[all_df["id"] == id] + + t = df["t"] + buffering = df["buffering"] + desired = df["desired"] + buffersize = df["buffersize"] + inlatency = df["inlatency"] + outlatency = df["outlatency"] + inrate = df["inrate"] + outrate = df["outrate"] + hysteresisthreshold = df["hysteresisthreshold"] + corrected = df["corrected"] + hysteresiscorrected = df["hysteresiscorrected"] + configured = df["configured"] + p = df["p"] + i = df["i"] + d = df["d"] + kpp = df["kpp"] + kii = df["kii"] + kdd = df["kdd"] + control = df["control"] + + output_file("plot.html") + + fig1 = figure() + fig1.line(t, inlatency, color="hotpink", legend_label="In latency") + fig1.line(t, outlatency, color="firebrick", legend_label="Out latency") + fig1.line(t, buffering, color="dodgerblue", legend_label="Actual buffering") + fig1.line(t, desired, color="goldenrod", legend_label="Desired buffering") + fig1.line(t, buffersize, color="seagreen", legend_label="Buffer size") + fig1.varea( + t, + [d - h for (d, h) in zip(desired, hysteresisthreshold)], + [d + h for (d, h) in zip(desired, hysteresisthreshold)], + alpha=0.2, + color="goldenrod", + legend_label="Hysteresis Threshold (won't correct out rate within area)", + ) + + fig2 = figure(x_range=fig1.x_range) + fig2.line(t, inrate, color="hotpink", legend_label="Nominal in sample rate") + fig2.line(t, outrate, color="firebrick", legend_label="Nominal out sample rate") + fig2.line( + t, corrected, color="dodgerblue", legend_label="Corrected out sample rate" + ) + fig2.line( + t, + hysteresiscorrected, + color="seagreen", + legend_label="Hysteresis-corrected out sample rate", + ) + fig2.line( + t, configured, color="goldenrod", legend_label="Configured out sample rate" + ) + + fig3 = figure(x_range=fig1.x_range) + fig3.line(t, p, color="goldenrod", legend_label="P") + fig3.line(t, i, color="dodgerblue", legend_label="I") + fig3.line(t, d, color="seagreen", legend_label="D") + + fig4 = figure(x_range=fig1.x_range) + fig4.line(t, kpp, color="goldenrod", legend_label="KpP") + fig4.line(t, kii, color="dodgerblue", legend_label="KiI") + fig4.line(t, kdd, color="seagreen", legend_label="KdD") + fig4.line(t, control, color="hotpink", legend_label="Control Signal") + + fig1.legend.location = "top_left" + fig2.legend.location = "top_right" + fig3.legend.location = "bottom_left" + fig4.legend.location = "bottom_right" + for fig in (fig1, fig2, fig3, fig4): + fig.legend.background_fill_alpha = 0.6 + fig.legend.click_policy = "hide" + + tabs.append( + TabPanel(child=gridplot([[fig1, fig2], [fig3, fig4]]), title=str(id)) + ) + + show(Tabs(tabs=tabs)) + + +if __name__ == "__main__": + main() |