diff options
Diffstat (limited to 'dom/media/driftcontrol')
-rw-r--r-- | dom/media/driftcontrol/AudioDriftCorrection.cpp | 9 | ||||
-rw-r--r-- | dom/media/driftcontrol/AudioResampler.cpp | 17 | ||||
-rw-r--r-- | dom/media/driftcontrol/AudioResampler.h | 21 | ||||
-rw-r--r-- | dom/media/driftcontrol/DriftController.cpp | 32 | ||||
-rw-r--r-- | dom/media/driftcontrol/DriftController.h | 33 | ||||
-rw-r--r-- | dom/media/driftcontrol/DynamicResampler.cpp | 83 | ||||
-rw-r--r-- | dom/media/driftcontrol/DynamicResampler.h | 148 | ||||
-rw-r--r-- | dom/media/driftcontrol/gtest/TestAudioDriftCorrection.cpp | 60 | ||||
-rw-r--r-- | dom/media/driftcontrol/gtest/TestAudioResampler.cpp | 50 | ||||
-rw-r--r-- | dom/media/driftcontrol/gtest/TestDriftController.cpp | 97 | ||||
-rw-r--r-- | dom/media/driftcontrol/gtest/TestDynamicResampler.cpp | 76 | ||||
-rwxr-xr-x | dom/media/driftcontrol/plot.py | 8 |
12 files changed, 337 insertions, 297 deletions
diff --git a/dom/media/driftcontrol/AudioDriftCorrection.cpp b/dom/media/driftcontrol/AudioDriftCorrection.cpp index e66c435c36..1b86a99a44 100644 --- a/dom/media/driftcontrol/AudioDriftCorrection.cpp +++ b/dom/media/driftcontrol/AudioDriftCorrection.cpp @@ -35,8 +35,8 @@ AudioDriftCorrection::AudioDriftCorrection( : mTargetRate(aTargetRate), mDriftController(MakeUnique<DriftController>(aSourceRate, aTargetRate, mDesiredBuffering)), - mResampler(MakeUnique<AudioResampler>( - aSourceRate, aTargetRate, mDesiredBuffering, aPrincipalHandle)) {} + mResampler(MakeUnique<AudioResampler>(aSourceRate, aTargetRate, 0, + aPrincipalHandle)) {} AudioDriftCorrection::~AudioDriftCorrection() = default; @@ -94,7 +94,7 @@ AudioSegment AudioDriftCorrection::RequestFrames(const AudioSegment& aInput, mDriftController->UpdateClock(inputDuration, outputDuration, CurrentBuffering(), BufferSize()); // Update resampler's rate if there is a new correction. - mResampler->UpdateOutRate(mDriftController->GetCorrectedTargetRate()); + mResampler->UpdateInRate(mDriftController->GetCorrectedSourceRate()); if (hasUnderrun) { if (!mIsHandlingUnderrun) { NS_WARNING("Drift-correction: Underrun"); @@ -171,7 +171,8 @@ void AudioDriftCorrection::SetDesiredBuffering( media::TimeUnit aDesiredBuffering) { mDesiredBuffering = aDesiredBuffering; mDriftController->SetDesiredBuffering(mDesiredBuffering); - mResampler->SetPreBufferDuration(mDesiredBuffering); + mResampler->SetInputPreBufferFrameCount( + mDesiredBuffering.ToTicksAtRate(mDriftController->mSourceRate)); } } // namespace mozilla diff --git a/dom/media/driftcontrol/AudioResampler.cpp b/dom/media/driftcontrol/AudioResampler.cpp index ecef033a5c..1402fae39e 100644 --- a/dom/media/driftcontrol/AudioResampler.cpp +++ b/dom/media/driftcontrol/AudioResampler.cpp @@ -5,12 +5,14 @@ #include "AudioResampler.h" +#include "TimeUnits.h" + namespace mozilla { AudioResampler::AudioResampler(uint32_t aInRate, uint32_t aOutRate, - media::TimeUnit aPreBufferDuration, + uint32_t aInputPreBufferFrameCount, const PrincipalHandle& aPrincipalHandle) - : mResampler(aInRate, aOutRate, aPreBufferDuration), + : mResampler(aInRate, aOutRate, aInputPreBufferFrameCount), mOutputChunks(aOutRate / 10, STEREO, aPrincipalHandle) {} void AudioResampler::AppendInput(const AudioSegment& aInSegment) { @@ -59,11 +61,11 @@ AudioSegment AudioResampler::Resample(uint32_t aOutFrames, bool* aHasUnderrun) { return segment; } - media::TimeUnit outDuration(aOutFrames, mResampler.GetOutRate()); + media::TimeUnit outDuration(aOutFrames, mResampler.mOutRate); mResampler.EnsurePreBuffer(outDuration); const media::TimeUnit chunkCapacity(mOutputChunks.ChunkCapacity(), - mResampler.GetOutRate()); + mResampler.mOutRate); while (!outDuration.IsZero()) { MOZ_ASSERT(outDuration.IsPositive()); @@ -71,8 +73,7 @@ AudioSegment AudioResampler::Resample(uint32_t aOutFrames, bool* aHasUnderrun) { const media::TimeUnit chunkDuration = std::min(outDuration, chunkCapacity); outDuration -= chunkDuration; - const uint32_t outFrames = - chunkDuration.ToTicksAtRate(mResampler.GetOutRate()); + const uint32_t outFrames = chunkDuration.ToTicksAtRate(mResampler.mOutRate); for (uint32_t i = 0; i < chunk.ChannelCount(); ++i) { if (chunk.mBufferFormat == AUDIO_FORMAT_FLOAT32) { *aHasUnderrun |= mResampler.Resample( @@ -92,8 +93,8 @@ AudioSegment AudioResampler::Resample(uint32_t aOutFrames, bool* aHasUnderrun) { return segment; } -void AudioResampler::Update(uint32_t aOutRate, uint32_t aChannels) { - mResampler.UpdateResampler(aOutRate, aChannels); +void AudioResampler::Update(uint32_t aInRate, uint32_t aChannels) { + mResampler.UpdateResampler(aInRate, aChannels); mOutputChunks.Update(aChannels); } diff --git a/dom/media/driftcontrol/AudioResampler.h b/dom/media/driftcontrol/AudioResampler.h index 20e4f1051b..c5982c3a39 100644 --- a/dom/media/driftcontrol/AudioResampler.h +++ b/dom/media/driftcontrol/AudioResampler.h @@ -9,12 +9,11 @@ #include "AudioChunkList.h" #include "AudioSegment.h" #include "DynamicResampler.h" -#include "TimeUnits.h" namespace mozilla { /** - * Audio Resampler is a resampler able to change the output rate and channels + * Audio Resampler is a resampler able to change the input rate and channels * count on the fly. The API is simple and it is based in AudioSegment in order * to be used MTG. Memory allocations, for input and output buffers, will happen * in the constructor, when channel count changes and if the amount of input @@ -36,7 +35,7 @@ namespace mozilla { class AudioResampler final { public: AudioResampler(uint32_t aInRate, uint32_t aOutRate, - media::TimeUnit aPreBufferDuration, + uint32_t aInputPreBufferFrameCount, const PrincipalHandle& aPrincipalHandle); /** @@ -69,24 +68,24 @@ class AudioResampler final { AudioSegment Resample(uint32_t aOutFrames, bool* aHasUnderrun); /* - * Updates the output rate that will be used by the resampler. + * Updates the input rate that will be used by the resampler. */ - void UpdateOutRate(uint32_t aOutRate) { - Update(aOutRate, mResampler.GetChannels()); + void UpdateInRate(uint32_t aInRate) { + Update(aInRate, mResampler.GetChannels()); } /** - * Set the duration that should be used for pre-buffering. + * Set the number of frames that should be used for input pre-buffering. */ - void SetPreBufferDuration(media::TimeUnit aPreBufferDuration) { - mResampler.SetPreBufferDuration(aPreBufferDuration); + void SetInputPreBufferFrameCount(uint32_t aInputPreBufferFrameCount) { + mResampler.SetInputPreBufferFrameCount(aInputPreBufferFrameCount); } private: void UpdateChannels(uint32_t aChannels) { - Update(mResampler.GetOutRate(), aChannels); + Update(mResampler.GetInRate(), aChannels); } - void Update(uint32_t aOutRate, uint32_t aChannels); + void Update(uint32_t aInRate, uint32_t aChannels); private: DynamicResampler mResampler; diff --git a/dom/media/driftcontrol/DriftController.cpp b/dom/media/driftcontrol/DriftController.cpp index b5603f72bb..791bfe9614 100644 --- a/dom/media/driftcontrol/DriftController.cpp +++ b/dom/media/driftcontrol/DriftController.cpp @@ -50,7 +50,7 @@ DriftController::DriftController(uint32_t aSourceRate, uint32_t aTargetRate, mSourceRate(aSourceRate), mTargetRate(aTargetRate), mDesiredBuffering(aDesiredBuffering), - mCorrectedTargetRate(static_cast<float>(aTargetRate)), + mCorrectedSourceRate(static_cast<float>(aSourceRate)), mMeasuredSourceLatency(5), mMeasuredTargetLatency(5) { LOG_CONTROLLER( @@ -76,8 +76,8 @@ void DriftController::ResetAfterUnderrun() { mTargetClock = mAdjustmentInterval; } -uint32_t DriftController::GetCorrectedTargetRate() const { - return std::lround(mCorrectedTargetRate); +uint32_t DriftController::GetCorrectedSourceRate() const { + return std::lround(mCorrectedSourceRate); } void DriftController::UpdateClock(media::TimeUnit aSourceDuration, @@ -112,14 +112,16 @@ void DriftController::CalculateCorrection(uint32_t aBufferedFrames, static constexpr float kDerivativeGain = 0.12; // Maximum 0.1% change per update. - const float cap = static_cast<float>(mTargetRate) / 1000.0f; + const float cap = static_cast<float>(mSourceRate) / 1000.0f; // The integral term can make us grow far outside the cap. Impose a cap on // it individually that is roughly equivalent to the final cap. const float integralCap = cap / kIntegralGain; - int32_t error = CheckedInt32(mDesiredBuffering.ToTicksAtRate(mSourceRate) - - aBufferedFrames) + // Use nominal (not corrected) source rate when interpreting desired + // buffering so that the set point is independent of the control value. + int32_t error = CheckedInt32(aBufferedFrames - + mDesiredBuffering.ToTicksAtRate(mSourceRate)) .value(); int32_t proportional = error; // targetClockSec is the number of target clock seconds since last @@ -135,12 +137,12 @@ void DriftController::CalculateCorrection(uint32_t aBufferedFrames, kIntegralGain * mIntegral + kDerivativeGain * derivative; float correctedRate = - std::clamp(static_cast<float>(mTargetRate) + controlSignal, - mCorrectedTargetRate - cap, mCorrectedTargetRate + cap); + std::clamp(static_cast<float>(mSourceRate) + controlSignal, + mCorrectedSourceRate - cap, mCorrectedSourceRate + cap); // mDesiredBuffering is divided by this to calculate the amount of // hysteresis to apply. With a denominator of 5, an error within +/- 20% of - // the desired buffering will not make corrections to the target sample + // the desired buffering will not make corrections to the source sample // rate. static constexpr uint32_t kHysteresisDenominator = 5; // +/- 20% @@ -183,7 +185,7 @@ void DriftController::CalculateCorrection(uint32_t aBufferedFrames, return correctedRate; } - return mCorrectedTargetRate; + return mCorrectedSourceRate; }(); if (mDurationWithinHysteresis > mIntegralCapTimeLimit) { @@ -201,10 +203,10 @@ void DriftController::CalculateCorrection(uint32_t aBufferedFrames, LOG_CONTROLLER( LogLevel::Verbose, this, "Recalculating Correction: Nominal: %uHz->%uHz, Corrected: " - "%uHz->%.2fHz (diff %.2fHz), error: %.2fms (hysteresisThreshold: " + "%.2fHz->%uHz (diff %.2fHz), error: %.2fms (hysteresisThreshold: " "%.2fms), buffering: %.2fms, desired buffering: %.2fms", - mSourceRate, mTargetRate, mSourceRate, hysteresisCorrectedRate, - hysteresisCorrectedRate - mCorrectedTargetRate, + mSourceRate, mTargetRate, hysteresisCorrectedRate, mTargetRate, + hysteresisCorrectedRate - mCorrectedSourceRate, media::TimeUnit(error, mSourceRate).ToSeconds() * 1000.0, media::TimeUnit(hysteresisThreshold, mSourceRate).ToSeconds() * 1000.0, media::TimeUnit(aBufferedFrames, mSourceRate).ToSeconds() * 1000.0, @@ -219,13 +221,13 @@ void DriftController::CalculateCorrection(uint32_t aBufferedFrames, kProportionalGain * proportional, kIntegralGain * mIntegral, kDerivativeGain * derivative, controlSignal); - if (std::lround(mCorrectedTargetRate) != + if (std::lround(mCorrectedSourceRate) != std::lround(hysteresisCorrectedRate)) { ++mNumCorrectionChanges; } mPreviousError = error; - mCorrectedTargetRate = hysteresisCorrectedRate; + mCorrectedSourceRate = std::max(1.f, hysteresisCorrectedRate); // Reset the counters to prepare for the next period. mTargetClock = media::TimeUnit::Zero(); diff --git a/dom/media/driftcontrol/DriftController.h b/dom/media/driftcontrol/DriftController.h index 0bd745c737..e8dbc57e0e 100644 --- a/dom/media/driftcontrol/DriftController.h +++ b/dom/media/driftcontrol/DriftController.h @@ -22,8 +22,8 @@ namespace mozilla { * the calculations. * * The DriftController looks at how the current buffering level differs from the - * desired buffering level and sets a corrected target rate. A resampler should - * be configured to resample from the nominal source rate to the corrected + * desired buffering level and sets a corrected source rate. A resampler should + * be configured to resample from the corrected source rate to the nominal * target rate. It assumes that the resampler is initially configured to * resample from the nominal source rate to the nominal target rate. * @@ -53,12 +53,12 @@ class DriftController final { void ResetAfterUnderrun(); /** - * Returns the drift-corrected target rate. + * Returns the drift-corrected source rate. */ - uint32_t GetCorrectedTargetRate() const; + uint32_t GetCorrectedSourceRate() const; /** - * The number of times mCorrectedTargetRate has been changed to adjust to + * The number of times mCorrectedSourceRate has been changed to adjust to * drift. */ uint32_t NumCorrectionChanges() const { return mNumCorrectionChanges; } @@ -102,9 +102,12 @@ class DriftController final { // This implements a simple PID controller with feedback. // Set point: SP = mDesiredBuffering. // Process value: PV(t) = aBufferedFrames. This is the feedback. - // Error: e(t) = mDesiredBuffering - aBufferedFrames. - // Control value: CV(t) = the number to add to the nominal target rate, i.e. - // the corrected target rate = CV(t) + nominal target rate. + // Error: e(t) = aBufferedFrames - mDesiredBuffering. + // Error is positive when the process value is high, which is + // the opposite of conventional PID controllers because this + // is a reverse-acting system. + // Control value: CV(t) = the value to add to the nominal source rate, i.e. + // the corrected source rate = nominal source rate + CV(t). // // Controller: // Proportional part: The error, p(t) = e(t), multiplied by a gain factor, Kp. @@ -115,13 +118,13 @@ class DriftController final { // Control signal: The sum of the parts' output, // u(t) = Kp*p(t) + Ki*i(t) + Kd*d(t). // - // Control action: Converting the control signal to a target sample rate. + // Control action: Converting the control signal to a source sample rate. // Simplified, a positive control signal means the buffer is - // lower than desired (because the error is positive), so the - // target sample rate must be increased in order to consume - // input data slower. We calculate the corrected target rate - // by simply adding the control signal, u(t), to the nominal - // target rate. + // higher than desired (because the error is positive), + // so the source sample rate must be increased in order to + // consume input data faster. + // We calculate the corrected source rate by simply adding + // the control signal, u(t), to the nominal source rate. // // Hysteresis: As long as the error is within a threshold of 20% of the set // point (desired buffering level) (up to 10ms for >50ms desired @@ -144,7 +147,7 @@ class DriftController final { int32_t mPreviousError = 0; float mIntegral = 0.0; Maybe<float> mIntegralCenterForCap; - float mCorrectedTargetRate; + float mCorrectedSourceRate; Maybe<int32_t> mLastHysteresisBoundaryCorrection; media::TimeUnit mDurationWithinHysteresis; uint32_t mNumCorrectionChanges = 0; diff --git a/dom/media/driftcontrol/DynamicResampler.cpp b/dom/media/driftcontrol/DynamicResampler.cpp index e6f230278e..65a2ae3b57 100644 --- a/dom/media/driftcontrol/DynamicResampler.cpp +++ b/dom/media/driftcontrol/DynamicResampler.cpp @@ -8,14 +8,13 @@ namespace mozilla { DynamicResampler::DynamicResampler(uint32_t aInRate, uint32_t aOutRate, - media::TimeUnit aPreBufferDuration) - : mInRate(aInRate), - mPreBufferDuration(aPreBufferDuration), - mOutRate(aOutRate) { + uint32_t aInputPreBufferFrameCount) + : mOutRate(aOutRate), + mInputPreBufferFrameCount(aInputPreBufferFrameCount), + mInRate(aInRate) { MOZ_ASSERT(aInRate); MOZ_ASSERT(aOutRate); - MOZ_ASSERT(aPreBufferDuration.IsPositiveOrZero()); - UpdateResampler(mOutRate, STEREO); + UpdateResampler(mInRate, STEREO); mInputStreamFile.Open("DynamicResamplerInFirstChannel", 1, mInRate); mOutputStreamFile.Open("DynamicResamplerOutFirstChannel", 1, mOutRate); } @@ -35,7 +34,10 @@ void DynamicResampler::SetSampleFormat(AudioSampleFormat aFormat) { b.SetSampleFormat(mSampleFormat); } - EnsureInputBufferDuration(CalculateInputBufferDuration()); + // Pre-allocate something big. + // EnsureInputBufferDuration() adds 50ms for jitter to this first allocation + // so the 50ms argument means at least 100ms. + EnsureInputBufferSizeInFrames(mInRate / 20); } void DynamicResampler::EnsurePreBuffer(media::TimeUnit aDuration) { @@ -43,33 +45,36 @@ void DynamicResampler::EnsurePreBuffer(media::TimeUnit aDuration) { return; } - media::TimeUnit buffered(mInternalInBuffer[0].AvailableRead(), mInRate); - if (buffered.IsZero()) { + uint32_t buffered = mInternalInBuffer[0].AvailableRead(); + if (buffered == 0) { // Wait for the first input segment before deciding how much to pre-buffer. // If it is large it indicates high-latency, and the buffer would have to - // handle that. + // handle that. This also means that the pre-buffer is not set up just + // before a large input segment would extend the buffering beyond the + // desired level. return; } mIsPreBufferSet = true; - media::TimeUnit needed = aDuration + mPreBufferDuration; - EnsureInputBufferDuration(needed); + uint32_t needed = + aDuration.ToTicksAtRate(mInRate) + mInputPreBufferFrameCount; + EnsureInputBufferSizeInFrames(needed); if (needed > buffered) { for (auto& b : mInternalInBuffer) { - b.PrependSilence((needed - buffered).ToTicksAtRate(mInRate)); + b.PrependSilence(needed - buffered); } } else if (needed < buffered) { for (auto& b : mInternalInBuffer) { - b.Discard((buffered - needed).ToTicksAtRate(mInRate)); + b.Discard(buffered - needed); } } } -void DynamicResampler::SetPreBufferDuration(media::TimeUnit aDuration) { - MOZ_ASSERT(aDuration.IsPositive()); - mPreBufferDuration = aDuration; +void DynamicResampler::SetInputPreBufferFrameCount( + uint32_t aInputPreBufferFrameCount) { + mInputPreBufferFrameCount = aInputPreBufferFrameCount; } bool DynamicResampler::Resample(float* aOutBuffer, uint32_t aOutFrames, @@ -93,7 +98,6 @@ void DynamicResampler::ResampleInternal(const float* aInBuffer, MOZ_ASSERT(mInRate); MOZ_ASSERT(mOutRate); - MOZ_ASSERT(aInBuffer); MOZ_ASSERT(aInFrames); MOZ_ASSERT(*aInFrames > 0); MOZ_ASSERT(aOutBuffer); @@ -125,7 +129,6 @@ void DynamicResampler::ResampleInternal(const int16_t* aInBuffer, MOZ_ASSERT(mInRate); MOZ_ASSERT(mOutRate); - MOZ_ASSERT(aInBuffer); MOZ_ASSERT(aInFrames); MOZ_ASSERT(*aInFrames > 0); MOZ_ASSERT(aOutBuffer); @@ -147,19 +150,20 @@ void DynamicResampler::ResampleInternal(const int16_t* aInBuffer, } } -void DynamicResampler::UpdateResampler(uint32_t aOutRate, uint32_t aChannels) { - MOZ_ASSERT(aOutRate); +void DynamicResampler::UpdateResampler(uint32_t aInRate, uint32_t aChannels) { + MOZ_ASSERT(aInRate); MOZ_ASSERT(aChannels); if (mChannels != aChannels) { + uint32_t bufferSizeInFrames = InFramesBufferSize(); if (mResampler) { speex_resampler_destroy(mResampler); } - mResampler = speex_resampler_init(aChannels, mInRate, aOutRate, + mResampler = speex_resampler_init(aChannels, aInRate, mOutRate, SPEEX_RESAMPLER_QUALITY_MIN, nullptr); MOZ_ASSERT(mResampler); mChannels = aChannels; - mOutRate = aOutRate; + mInRate = aInRate; // Between mono and stereo changes, keep always allocated 2 channels to // avoid reallocations in the most common case. if ((mChannels == STEREO || mChannels == 1) && @@ -192,14 +196,12 @@ void DynamicResampler::UpdateResampler(uint32_t aOutRate, uint32_t aChannels) { b->SetSampleFormat(mSampleFormat); } } - media::TimeUnit d = mSetBufferDuration; - mSetBufferDuration = media::TimeUnit::Zero(); - EnsureInputBufferDuration(d); + EnsureInputBufferSizeInFrames(bufferSizeInFrames); mInputTail.SetLength(mChannels); return; } - if (mOutRate != aOutRate) { + if (mInRate != aInRate) { // If the rates was the same the resampler was not being used so warm up. if (mOutRate == mInRate) { WarmUpResampler(true); @@ -208,9 +210,9 @@ void DynamicResampler::UpdateResampler(uint32_t aOutRate, uint32_t aChannels) { #ifdef DEBUG int rv = #endif - speex_resampler_set_rate(mResampler, mInRate, aOutRate); + speex_resampler_set_rate(mResampler, aInRate, mOutRate); MOZ_ASSERT(rv == RESAMPLER_ERR_SUCCESS); - mOutRate = aOutRate; + mInRate = aInRate; } } @@ -236,13 +238,9 @@ void DynamicResampler::WarmUpResampler(bool aSkipLatency) { } } if (aSkipLatency) { - int inputLatency = speex_resampler_get_input_latency(mResampler); - MOZ_ASSERT(inputLatency > 0); - uint32_t ratioNum, ratioDen; - speex_resampler_get_ratio(mResampler, &ratioNum, &ratioDen); - // Ratio at this point is one so only skip the input latency. No special - // calculations are needed. - speex_resampler_set_skip_frac_num(mResampler, inputLatency * ratioDen); + // Don't generate output frames corresponding to times before the next + // input sample. + speex_resampler_skip_zeros(mResampler); } mIsWarmingUp = false; } @@ -268,7 +266,16 @@ void DynamicResampler::AppendInputSilence(const uint32_t aInFrames) { } uint32_t DynamicResampler::InFramesBufferSize() const { - return mSetBufferDuration.ToTicksAtRate(mInRate); + if (mSampleFormat == AUDIO_FORMAT_SILENCE) { + return 0; + } + // Buffers may have different capacities if a memory allocation has failed. + MOZ_ASSERT(!mInternalInBuffer.IsEmpty()); + uint32_t min = std::numeric_limits<uint32_t>::max(); + for (const auto& b : mInternalInBuffer) { + min = std::min(min, b.Capacity()); + } + return min; } uint32_t DynamicResampler::InFramesBuffered(uint32_t aChannelIndex) const { @@ -276,7 +283,7 @@ uint32_t DynamicResampler::InFramesBuffered(uint32_t aChannelIndex) const { MOZ_ASSERT(aChannelIndex <= mChannels); MOZ_ASSERT(aChannelIndex <= mInternalInBuffer.Length()); if (!mIsPreBufferSet) { - return mPreBufferDuration.ToTicksAtRate(mInRate); + return mInputPreBufferFrameCount; } return mInternalInBuffer[aChannelIndex].AvailableRead(); } diff --git a/dom/media/driftcontrol/DynamicResampler.h b/dom/media/driftcontrol/DynamicResampler.h index c1b9000aa0..1f601c898b 100644 --- a/dom/media/driftcontrol/DynamicResampler.h +++ b/dom/media/driftcontrol/DynamicResampler.h @@ -27,15 +27,13 @@ const uint32_t STEREO = 2; * to allow the requested to be resampled and returned. * * Input data buffering makes use of the AudioRingBuffer. The capacity of the - * buffer is initially 100ms of float audio and it is pre-allocated at the - * constructor. Should the input data grow beyond that, the input buffer is - * re-allocated on the fly. In addition to that, due to special feature of + * buffer is initially 100ms of audio and it is pre-allocated during + * SetSampleFormat(). Should the input data grow beyond that, the input buffer + * is re-allocated on the fly. In addition to that, due to special feature of * AudioRingBuffer, no extra copies take place when the input data is fed to the * resampler. * - * The sample format must be set before using any method. If the provided sample - * format is of type short the pre-allocated capacity of the input buffer - * becomes 200ms of short audio. + * The sample format must be set before using any method. * * The DynamicResampler is not thread-safe, so all the methods appart from the * constructor must be called on the same thread. @@ -47,16 +45,15 @@ class DynamicResampler final { * The channel count will be set to stereo. Memory allocation will take * place. The input buffer is non-interleaved. */ - DynamicResampler( - uint32_t aInRate, uint32_t aOutRate, - media::TimeUnit aPreBufferDuration = media::TimeUnit::Zero()); + DynamicResampler(uint32_t aInRate, uint32_t aOutRate, + uint32_t aInputPreBufferFrameCount = 0); ~DynamicResampler(); /** * Set the sample format type to float or short. */ void SetSampleFormat(AudioSampleFormat aFormat); - uint32_t GetOutRate() const { return mOutRate; } + uint32_t GetInRate() const { return mInRate; } uint32_t GetChannels() const { return mChannels; } /** @@ -81,16 +78,16 @@ class DynamicResampler final { /** * Prepends existing input data with a silent pre-buffer if not already done. - * Data will be prepended so that after resampling aOutFrames worth of output - * data, the buffering level will be as close as possible to - * mPreBufferDuration, which is the desired buffering level. + * Data will be prepended so that after resampling aDuration of data, + * the buffering level will be as close as possible to + * mInputPreBufferFrameCount, which is the desired buffering level. */ void EnsurePreBuffer(media::TimeUnit aDuration); /** - * Set the duration that should be used for pre-buffering. + * Set the number of frames that should be used for input pre-buffering. */ - void SetPreBufferDuration(media::TimeUnit aDuration); + void SetInputPreBufferFrameCount(uint32_t aInputPreBufferFrameCount); /* * Resample as much frames as needed from the internal input buffer to the @@ -114,14 +111,14 @@ class DynamicResampler final { /** * Update the output rate or/and the channel count. If a value is not updated - * compared to the current one nothing happens. Changing the `aOutRate` + * compared to the current one nothing happens. Changing the `aInRate` * results in recalculation in the resampler. Changing `aChannels` results in * the reallocation of the internal input buffer with the exception of * changes between mono to stereo and vice versa where no reallocation takes * place. A stereo internal input buffer is always maintained even if the * sound is mono. */ - void UpdateResampler(uint32_t aOutRate, uint32_t aChannels); + void UpdateResampler(uint32_t aInRate, uint32_t aChannels); private: template <typename T> @@ -174,24 +171,24 @@ class DynamicResampler final { } uint32_t totalOutFramesNeeded = aOutFrames; - auto resample = [&] { - mInternalInBuffer[aChannelIndex].ReadNoCopy( - [&](const Span<const T>& aInBuffer) -> uint32_t { - if (!totalOutFramesNeeded) { - return 0; - } - uint32_t outFramesResampled = totalOutFramesNeeded; - uint32_t inFrames = aInBuffer.Length(); - ResampleInternal(aInBuffer.data(), &inFrames, aOutBuffer, - &outFramesResampled, aChannelIndex); - aOutBuffer += outFramesResampled; - totalOutFramesNeeded -= outFramesResampled; - mInputTail[aChannelIndex].StoreTail<T>(aInBuffer.To(inFrames)); - return inFrames; - }); + auto resample = [&](const T* aInBuffer, uint32_t aInLength) -> uint32_t { + uint32_t outFramesResampled = totalOutFramesNeeded; + uint32_t inFrames = aInLength; + ResampleInternal(aInBuffer, &inFrames, aOutBuffer, &outFramesResampled, + aChannelIndex); + aOutBuffer += outFramesResampled; + totalOutFramesNeeded -= outFramesResampled; + mInputTail[aChannelIndex].StoreTail<T>(aInBuffer, inFrames); + return inFrames; }; - resample(); + mInternalInBuffer[aChannelIndex].ReadNoCopy( + [&](const Span<const T>& aInBuffer) -> uint32_t { + if (!totalOutFramesNeeded) { + return 0; + } + return resample(aInBuffer.Elements(), aInBuffer.Length()); + }); if (totalOutFramesNeeded == 0) { return false; @@ -204,8 +201,7 @@ class DynamicResampler final { ((CheckedUint32(totalOutFramesNeeded) * mInRate + mOutRate - 1) / mOutRate) .value(); - mInternalInBuffer[aChannelIndex].WriteSilence(totalInFramesNeeded); - resample(); + resample(nullptr, totalInFramesNeeded); } mIsPreBufferSet = false; return true; @@ -219,33 +215,14 @@ class DynamicResampler final { MOZ_ASSERT(mChannels); MOZ_ASSERT(aChannelIndex < mChannels); MOZ_ASSERT(aChannelIndex < mInternalInBuffer.Length()); - EnsureInputBufferDuration(media::TimeUnit( - CheckedInt64(mInternalInBuffer[aChannelIndex].AvailableRead()) + - aInFrames, - mInRate)); + EnsureInputBufferSizeInFrames( + mInternalInBuffer[aChannelIndex].AvailableRead() + aInFrames); mInternalInBuffer[aChannelIndex].Write(Span(aInBuffer, aInFrames)); } void WarmUpResampler(bool aSkipLatency); - media::TimeUnit CalculateInputBufferDuration() const { - // Pre-allocate something big, twice the pre-buffer, or at least 100ms. - return std::max(mPreBufferDuration * 2, media::TimeUnit::FromSeconds(0.1)); - } - - bool EnsureInputBufferDuration(media::TimeUnit aDuration) { - if (aDuration <= mSetBufferDuration) { - // Buffer size is sufficient. - return true; - } - - // 5 second cap. - const media::TimeUnit cap = media::TimeUnit::FromSeconds(5); - if (mSetBufferDuration == cap) { - // Already at the cap. - return false; - } - + bool EnsureInputBufferSizeInFrames(uint32_t aSizeInFrames) { uint32_t sampleSize = 0; if (mSampleFormat == AUDIO_FORMAT_FLOAT32) { sampleSize = sizeof(float); @@ -258,53 +235,62 @@ class DynamicResampler final { return true; } + uint32_t sizeInFrames = InFramesBufferSize(); + if (aSizeInFrames <= sizeInFrames) { + // Buffer size is sufficient. + return true; // no reallocation necessary + } + + // 5 second cap. + const uint32_t cap = 5 * mInRate; + if (sizeInFrames >= cap) { + // Already at the cap. + return false; + } + // As a backoff strategy, at least double the previous size. - media::TimeUnit duration = mSetBufferDuration * 2; + sizeInFrames *= 2; - if (aDuration > duration) { + if (aSizeInFrames > sizeInFrames) { // A larger buffer than the normal backoff strategy provides is needed, or - // this is the first time setting the buffer size. Round up to the nearest - // 100ms, some jitter is expected. - duration = aDuration.ToBase<media::TimeUnit::CeilingPolicy>(10); + // this is the first time setting the buffer size. Add another 50ms, as + // some jitter is expected. + sizeInFrames = aSizeInFrames + mInRate / 20; } - duration = std::min(cap, duration); + // mInputPreBufferFrameCount is an indication of the desired average + // buffering. Provide for at least twice this. + sizeInFrames = std::max(sizeInFrames, mInputPreBufferFrameCount * 2); + + sizeInFrames = std::min(cap, sizeInFrames); bool success = true; for (auto& b : mInternalInBuffer) { - success = success && - b.SetLengthBytes(sampleSize * duration.ToTicksAtRate(mInRate)); + success = success && b.EnsureLengthBytes(sampleSize * sizeInFrames); } if (success) { // All buffers have the new size. - mSetBufferDuration = duration; return true; } - const uint32_t sizeInFrames = - static_cast<uint32_t>(mSetBufferDuration.ToTicksAtRate(mInRate)); // Allocating an input buffer failed. We stick with the old buffer size. NS_WARNING(nsPrintfCString("Failed to allocate a buffer of %u bytes (%u " "frames). Expect glitches.", sampleSize * sizeInFrames, sizeInFrames) .get()); - for (auto& b : mInternalInBuffer) { - MOZ_ALWAYS_TRUE(b.SetLengthBytes(sampleSize * sizeInFrames)); - } return false; } public: - const uint32_t mInRate; + const uint32_t mOutRate; private: bool mIsPreBufferSet = false; bool mIsWarmingUp = false; - media::TimeUnit mPreBufferDuration; - media::TimeUnit mSetBufferDuration = media::TimeUnit::Zero(); + uint32_t mInputPreBufferFrameCount; uint32_t mChannels = 0; - uint32_t mOutRate; + uint32_t mInRate; AutoTArray<AudioRingBuffer, STEREO> mInternalInBuffer; @@ -324,16 +310,16 @@ class DynamicResampler final { } template <typename T> void StoreTail(const T* aInBuffer, uint32_t aInFrames) { - if (aInFrames >= MAXSIZE) { - PodCopy(Buffer<T>(), aInBuffer + aInFrames - MAXSIZE, MAXSIZE); - mSize = MAXSIZE; + const T* inBuffer = aInBuffer; + mSize = std::min(aInFrames, MAXSIZE); + if (inBuffer) { + PodCopy(Buffer<T>(), inBuffer + aInFrames - mSize, mSize); } else { - PodCopy(Buffer<T>(), aInBuffer, aInFrames); - mSize = aInFrames; + std::fill_n(Buffer<T>(), mSize, static_cast<T>(0)); } } uint32_t Length() { return mSize; } - static const uint32_t MAXSIZE = 20; + static constexpr uint32_t MAXSIZE = 20; private: float mBuffer[MAXSIZE] = {}; diff --git a/dom/media/driftcontrol/gtest/TestAudioDriftCorrection.cpp b/dom/media/driftcontrol/gtest/TestAudioDriftCorrection.cpp index c13f443d37..9d3f0f091a 100644 --- a/dom/media/driftcontrol/gtest/TestAudioDriftCorrection.cpp +++ b/dom/media/driftcontrol/gtest/TestAudioDriftCorrection.cpp @@ -260,10 +260,10 @@ TEST(TestAudioDriftCorrection, LargerTransmitterBlockSizeThanDesiredBuffering) // Input is stable so no corrections should occur. EXPECT_EQ(ad.NumCorrectionChanges(), 0U); - // The drift correction buffer size had to be larger than the desired (the - // buffer size is twice the initial buffering level), to accomodate the large - // input block size. - EXPECT_EQ(ad.BufferSize(), 9600U); + // The desired buffering and pre-buffering level was + // transmitterBlockSize * 11 / 10 to accomodate the large input block size. + // The buffer size was twice the pre-buffering level. + EXPECT_EQ(ad.BufferSize(), transmitterBlockSize * 11 / 10 * 2); } TEST(TestAudioDriftCorrection, LargerReceiverBlockSizeThanDesiredBuffering) @@ -275,9 +275,9 @@ TEST(TestAudioDriftCorrection, LargerReceiverBlockSizeThanDesiredBuffering) MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); AudioDriftCorrection ad(sampleRate, sampleRate, testPrincipal); + AudioSegment inSegment; for (uint32_t i = 0; i < (sampleRate / 1000) * 500; i += transmitterBlockSize) { - AudioSegment inSegment; AudioChunk chunk = CreateAudioChunk<float>(transmitterBlockSize, 1, AUDIO_FORMAT_FLOAT32); inSegment.AppendAndConsumeChunk(std::move(chunk)); @@ -285,6 +285,7 @@ TEST(TestAudioDriftCorrection, LargerReceiverBlockSizeThanDesiredBuffering) if (i % receiverBlockSize == 0) { AudioSegment outSegment = ad.RequestFrames(inSegment, receiverBlockSize); EXPECT_EQ(outSegment.GetDuration(), receiverBlockSize); + inSegment.Clear(); } if (i >= receiverBlockSize) { @@ -294,11 +295,12 @@ TEST(TestAudioDriftCorrection, LargerReceiverBlockSizeThanDesiredBuffering) // Input is stable so no corrections should occur. EXPECT_EQ(ad.NumCorrectionChanges(), 0U); + EXPECT_EQ(ad.NumUnderruns(), 0U); // The drift correction buffer size had to be larger than the desired (the // buffer size is twice the initial buffering level), to accomodate the large // input block size that gets buffered in the resampler only when processing // output. - EXPECT_EQ(ad.BufferSize(), 19200U); + EXPECT_EQ(ad.BufferSize(), 9600U); } TEST(TestAudioDriftCorrection, DynamicInputBufferSizeChanges) @@ -329,9 +331,9 @@ TEST(TestAudioDriftCorrection, DynamicInputBufferSizeChanges) if (((receivedFramesStart - transmittedFramesStart + i) / aTransmitterBlockSize) > numBlocksTransmitted) { tone.Generate(inSegment, aTransmitterBlockSize); - MOZ_ASSERT(!inSegment.IsNull()); + MOZ_RELEASE_ASSERT(!inSegment.IsNull()); inToneVerifier.AppendData(inSegment); - MOZ_ASSERT(!inSegment.IsNull()); + MOZ_RELEASE_ASSERT(!inSegment.IsNull()); ++numBlocksTransmitted; totalFramesTransmitted += aTransmitterBlockSize; } @@ -459,29 +461,50 @@ TEST(TestAudioDriftCorrection, DriftStepResponseUnderrunHighLatencyInput) constexpr uint32_t iterations = 200; const PrincipalHandle testPrincipal = MakePrincipalHandle(nsContentUtils::GetSystemPrincipal()); - uint32_t inputRate = nominalRate * 1005 / 1000; // 0.5% drift - uint32_t inputInterval = inputRate; + uint32_t inputRate1 = nominalRate * 1005 / 1000; // 0.5% drift + uint32_t inputInterval1 = inputRate1; AudioGenerator<AudioDataValue> tone(1, nominalRate, 440); AudioDriftCorrection ad(nominalRate, nominalRate, testPrincipal); for (uint32_t i = 0; i < interval * iterations; i += interval / 100) { AudioSegment inSegment; if (i > 0 && i % interval == 0) { - tone.Generate(inSegment, inputInterval); + tone.Generate(inSegment, inputInterval1); } ad.RequestFrames(inSegment, interval / 100); } - inputRate = nominalRate * 995 / 1000; // -0.5% drift - inputInterval = inputRate; + uint32_t inputRate2 = nominalRate * 995 / 1000; // -0.5% drift + uint32_t inputInterval2 = inputRate2; for (uint32_t i = 0; i < interval * iterations; i += interval / 100) { AudioSegment inSegment; + // The first segment is skipped to cause an underrun. if (i > 0 && i % interval == 0) { - tone.Generate(inSegment, inputInterval); + tone.Generate(inSegment, inputInterval2); } ad.RequestFrames(inSegment, interval / 100); + if (i >= interval / 10 && i < interval) { + // While the DynamicResampler has not set its pre-buffer after the + // underrun, InFramesBuffered() reports the pre-buffer size. + // The initial desired buffer and pre-buffer size was + // inputInterval1 * 11 / 10 to accomodate the large input block size. + // This was doubled when the underrun occurred. + EXPECT_EQ(ad.CurrentBuffering(), inputInterval1 * 11 / 10 * 2) + << "for i=" << i; + } else if (i == interval) { + // After the pre-buffer was set and used to generate the first output + // block, the actual number of frames buffered almost matches the + // pre-buffer size, with some rounding from output to input frame count + // conversion. + EXPECT_EQ(ad.CurrentBuffering(), inputInterval1 * 11 / 10 * 2 - 1) + << "after first input after underrun"; + } } - EXPECT_EQ(ad.BufferSize(), 220800U); + // The initial desired buffering and pre-buffering level was + // inputInterval1 * 11 / 10 to accomodate the large input block size. + // The buffer size was initially twice the pre-buffering level, and then + // doubled when the underrun occurred. + EXPECT_EQ(ad.BufferSize(), inputInterval1 * 11 / 10 * 2 * 2); EXPECT_EQ(ad.NumUnderruns(), 1u); } @@ -511,7 +534,7 @@ TEST(TestAudioDriftCorrection, DriftStepResponseOverrun) ad.RequestFrames(inSegment, interval / 100); } - // Change input callbacks to 2000ms (+0.5% drift) = 48200 frames, which will + // Change input callbacks to 1000ms (+0.5% drift) = 48200 frames, which will // overrun the ring buffer. for (uint32_t i = 0; i < interval * iterations; i += interval / 100) { AudioSegment inSegment; @@ -524,6 +547,9 @@ TEST(TestAudioDriftCorrection, DriftStepResponseOverrun) ad.RequestFrames(inSegment, interval / 100); } - EXPECT_EQ(ad.BufferSize(), 105600U); + // The desired buffering and pre-buffering levels were increased to + // inputInterval * 11 / 10 to accomodate the large input block size. + // The buffer size was increased to twice the pre-buffering level. + EXPECT_EQ(ad.BufferSize(), inputInterval * 11 / 10 * 2); EXPECT_EQ(ad.NumUnderruns(), 1u); } diff --git a/dom/media/driftcontrol/gtest/TestAudioResampler.cpp b/dom/media/driftcontrol/gtest/TestAudioResampler.cpp index f04bc87314..7122b60a1a 100644 --- a/dom/media/driftcontrol/gtest/TestAudioResampler.cpp +++ b/dom/media/driftcontrol/gtest/TestAudioResampler.cpp @@ -64,8 +64,7 @@ TEST(TestAudioResampler, OutAudioSegment_Float) uint32_t pre_buffer = 21; - AudioResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate), - testPrincipal); + AudioResampler dr(in_rate, out_rate, pre_buffer, testPrincipal); AudioSegment inSegment = CreateAudioSegment<float>(in_frames, channels, AUDIO_FORMAT_FLOAT32); @@ -91,9 +90,9 @@ TEST(TestAudioResampler, OutAudioSegment_Float) } } - // Update out rate - out_rate = 44100; - dr.UpdateOutRate(out_rate); + // Update in rate + in_rate = 26122; + dr.UpdateInRate(in_rate); out_frames = in_frames * out_rate / in_rate; EXPECT_EQ(out_frames, 18u); // Even if we provide no input if we have enough buffered input, we can create @@ -121,8 +120,7 @@ TEST(TestAudioResampler, OutAudioSegment_Short) uint32_t pre_buffer = 21; - AudioResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate), - testPrincipal); + AudioResampler dr(in_rate, out_rate, pre_buffer, testPrincipal); AudioSegment inSegment = CreateAudioSegment<short>(in_frames, channels, AUDIO_FORMAT_S16); @@ -148,9 +146,9 @@ TEST(TestAudioResampler, OutAudioSegment_Short) } } - // Update out rate - out_rate = 44100; - dr.UpdateOutRate(out_rate); + // Update in rate + in_rate = 26122; + dr.UpdateInRate(out_rate); out_frames = in_frames * out_rate / in_rate; EXPECT_EQ(out_frames, 18u); // Even if we provide no input if we have enough buffered input, we can create @@ -175,8 +173,7 @@ TEST(TestAudioResampler, OutAudioSegmentLargerThanResampledInput_Float) uint32_t pre_buffer = 5; - AudioResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate), - PRINCIPAL_HANDLE_NONE); + AudioResampler dr(in_rate, out_rate, pre_buffer, PRINCIPAL_HANDLE_NONE); AudioSegment inSegment = CreateAudioSegment<float>(in_frames, channels, AUDIO_FORMAT_FLOAT32); @@ -209,8 +206,7 @@ TEST(TestAudioResampler, InAudioSegment_Float) uint32_t out_rate = 48000; uint32_t pre_buffer = 10; - AudioResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate), - testPrincipal); + AudioResampler dr(in_rate, out_rate, pre_buffer, testPrincipal); AudioSegment inSegment; @@ -275,8 +271,7 @@ TEST(TestAudioResampler, InAudioSegment_Short) uint32_t out_rate = 48000; uint32_t pre_buffer = 10; - AudioResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate), - testPrincipal); + AudioResampler dr(in_rate, out_rate, pre_buffer, testPrincipal); AudioSegment inSegment; @@ -342,8 +337,7 @@ TEST(TestAudioResampler, ChannelChange_MonoToStereo) uint32_t pre_buffer = 0; - AudioResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate), - testPrincipal); + AudioResampler dr(in_rate, out_rate, pre_buffer, testPrincipal); AudioChunk monoChunk = CreateAudioChunk<float>(in_frames, 1, AUDIO_FORMAT_FLOAT32); @@ -378,8 +372,7 @@ TEST(TestAudioResampler, ChannelChange_StereoToMono) uint32_t pre_buffer = 0; - AudioResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate), - testPrincipal); + AudioResampler dr(in_rate, out_rate, pre_buffer, testPrincipal); AudioChunk monoChunk = CreateAudioChunk<float>(in_frames, 1, AUDIO_FORMAT_FLOAT32); @@ -414,8 +407,7 @@ TEST(TestAudioResampler, ChannelChange_StereoToQuad) uint32_t pre_buffer = 0; - AudioResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate), - testPrincipal); + AudioResampler dr(in_rate, out_rate, pre_buffer, testPrincipal); AudioChunk stereoChunk = CreateAudioChunk<float>(in_frames, 2, AUDIO_FORMAT_FLOAT32); @@ -452,7 +444,7 @@ TEST(TestAudioResampler, ChannelChange_QuadToStereo) uint32_t in_rate = 24000; uint32_t out_rate = 48000; - AudioResampler dr(in_rate, out_rate, media::TimeUnit::Zero(), testPrincipal); + AudioResampler dr(in_rate, out_rate, 0, testPrincipal); AudioChunk stereoChunk = CreateAudioChunk<float>(in_frames, 2, AUDIO_FORMAT_FLOAT32); @@ -497,7 +489,7 @@ TEST(TestAudioResampler, ChannelChange_Discontinuity) uint32_t in_frames = in_rate / 100; uint32_t out_frames = out_rate / 100; - AudioResampler dr(in_rate, out_rate, media::TimeUnit::Zero(), testPrincipal); + AudioResampler dr(in_rate, out_rate, 0, testPrincipal); AudioChunk monoChunk = CreateAudioChunk<float>(in_frames, 1, AUDIO_FORMAT_FLOAT32); @@ -560,8 +552,7 @@ TEST(TestAudioResampler, ChannelChange_Discontinuity2) uint32_t in_frames = in_rate / 100; uint32_t out_frames = out_rate / 100; - AudioResampler dr(in_rate, out_rate, media::TimeUnit(10, in_rate), - testPrincipal); + AudioResampler dr(in_rate, out_rate, 10, testPrincipal); AudioChunk monoChunk = CreateAudioChunk<float>(in_frames / 2, 1, AUDIO_FORMAT_FLOAT32); @@ -630,8 +621,7 @@ TEST(TestAudioResampler, ChannelChange_Discontinuity3) uint32_t in_frames = in_rate / 100; uint32_t out_frames = out_rate / 100; - AudioResampler dr(in_rate, out_rate, media::TimeUnit(10, in_rate), - testPrincipal); + AudioResampler dr(in_rate, out_rate, 10, testPrincipal); AudioChunk stereoChunk = CreateAudioChunk<float>(in_frames, 2, AUDIO_FORMAT_FLOAT32); @@ -660,9 +650,9 @@ TEST(TestAudioResampler, ChannelChange_Discontinuity3) // The resampler here is updated due to the rate change. This is because the // in and out rate was the same so a pass through logic was used. By updating - // the out rate to something different than the in rate, the resampler will + // the in rate to something different than the out rate, the resampler will // start being used and discontinuity will exist. - dr.UpdateOutRate(out_rate + 400); + dr.UpdateInRate(in_rate - 400); dr.AppendInput(inSegment); AudioSegment s2 = dr.Resample(out_frames, &hasUnderrun); EXPECT_FALSE(hasUnderrun); diff --git a/dom/media/driftcontrol/gtest/TestDriftController.cpp b/dom/media/driftcontrol/gtest/TestDriftController.cpp index 33486f945f..132577e44a 100644 --- a/dom/media/driftcontrol/gtest/TestDriftController.cpp +++ b/dom/media/driftcontrol/gtest/TestDriftController.cpp @@ -18,50 +18,50 @@ TEST(TestDriftController, Basic) constexpr uint32_t bufferedHigh = 7 * 480; DriftController c(48000, 48000, media::TimeUnit::FromSeconds(0.05)); - EXPECT_EQ(c.GetCorrectedTargetRate(), 48000U); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48000U); // The adjustment interval is 1s. const auto oneSec = media::TimeUnit(48000, 48000); c.UpdateClock(oneSec, oneSec, buffered, 0); - EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48000u); c.UpdateClock(oneSec, oneSec, bufferedLow, 0); - EXPECT_EQ(c.GetCorrectedTargetRate(), 48048u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 47952u); c.UpdateClock(oneSec, oneSec, bufferedHigh, 0); - EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48000u); c.UpdateClock(oneSec, oneSec, bufferedHigh, 0); - EXPECT_EQ(c.GetCorrectedTargetRate(), 47952u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48048u); } TEST(TestDriftController, BasicResampler) { // The buffer level is the only input to the controller logic. - constexpr uint32_t buffered = 5 * 240; - constexpr uint32_t bufferedLow = 3 * 240; - constexpr uint32_t bufferedHigh = 7 * 240; + constexpr uint32_t buffered = 5 * 480; + constexpr uint32_t bufferedLow = 3 * 480; + constexpr uint32_t bufferedHigh = 7 * 480; - DriftController c(24000, 48000, media::TimeUnit::FromSeconds(0.05)); + DriftController c(48000, 24000, media::TimeUnit::FromSeconds(0.05)); // The adjustment interval is 1s. const auto oneSec = media::TimeUnit(48000, 48000); c.UpdateClock(oneSec, oneSec, buffered, 0); - EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48000u); // low c.UpdateClock(oneSec, oneSec, bufferedLow, 0); - EXPECT_EQ(c.GetCorrectedTargetRate(), 48048u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 47952u); // high c.UpdateClock(oneSec, oneSec, bufferedHigh, 0); - EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48000u); // high c.UpdateClock(oneSec, oneSec, bufferedHigh, 0); - EXPECT_EQ(c.GetCorrectedTargetRate(), 47964u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48048u); } TEST(TestDriftController, BufferedInput) @@ -72,56 +72,56 @@ TEST(TestDriftController, BufferedInput) constexpr uint32_t bufferedHigh = 7 * 480; DriftController c(48000, 48000, media::TimeUnit::FromSeconds(0.05)); - EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48000u); // The adjustment interval is 1s. const auto oneSec = media::TimeUnit(48000, 48000); c.UpdateClock(oneSec, oneSec, buffered, 0); - EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48000u); // 0 buffered when updating correction c.UpdateClock(oneSec, oneSec, 0, 0); - EXPECT_EQ(c.GetCorrectedTargetRate(), 48048u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 47952u); c.UpdateClock(oneSec, oneSec, bufferedLow, 0); - EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48000u); c.UpdateClock(oneSec, oneSec, buffered, 0); - EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48000u); c.UpdateClock(oneSec, oneSec, bufferedHigh, 0); - EXPECT_EQ(c.GetCorrectedTargetRate(), 47952u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48048u); } TEST(TestDriftController, BufferedInputWithResampling) { // The buffer level is the only input to the controller logic. - constexpr uint32_t buffered = 5 * 240; - constexpr uint32_t bufferedLow = 3 * 240; - constexpr uint32_t bufferedHigh = 7 * 240; + constexpr uint32_t buffered = 5 * 480; + constexpr uint32_t bufferedLow = 3 * 480; + constexpr uint32_t bufferedHigh = 7 * 480; - DriftController c(24000, 48000, media::TimeUnit::FromSeconds(0.05)); - EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + DriftController c(48000, 24000, media::TimeUnit::FromSeconds(0.05)); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48000u); // The adjustment interval is 1s. const auto oneSec = media::TimeUnit(24000, 24000); c.UpdateClock(oneSec, oneSec, buffered, 0); - EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48000u); // 0 buffered when updating correction c.UpdateClock(oneSec, oneSec, 0, 0); - EXPECT_EQ(c.GetCorrectedTargetRate(), 48048u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 47952u); c.UpdateClock(oneSec, oneSec, bufferedLow, 0); - EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48000u); c.UpdateClock(oneSec, oneSec, buffered, 0); - EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48000u); c.UpdateClock(oneSec, oneSec, bufferedHigh, 0); - EXPECT_EQ(c.GetCorrectedTargetRate(), 47952u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48048u); } TEST(TestDriftController, SmallError) @@ -132,21 +132,21 @@ TEST(TestDriftController, SmallError) constexpr uint32_t bufferedHigh = buffered + 48; DriftController c(48000, 48000, media::TimeUnit::FromSeconds(0.05)); - EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48000u); // The adjustment interval is 1s. const auto oneSec = media::TimeUnit(48000, 48000); c.UpdateClock(oneSec, oneSec, buffered, 0); - EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48000u); c.UpdateClock(oneSec, oneSec, bufferedLow, 0); - EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48000u); c.UpdateClock(oneSec, oneSec, bufferedHigh, 0); - EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48000u); c.UpdateClock(oneSec, oneSec, bufferedHigh, 0); - EXPECT_EQ(c.GetCorrectedTargetRate(), 48000u); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48000u); } TEST(TestDriftController, SmallBufferedFrames) @@ -158,11 +158,34 @@ TEST(TestDriftController, SmallBufferedFrames) media::TimeUnit oneSec = media::TimeUnit::FromSeconds(1); media::TimeUnit hundredMillis = oneSec / 10; - EXPECT_EQ(c.GetCorrectedTargetRate(), 48000U); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48000U); for (uint32_t i = 0; i < 9; ++i) { c.UpdateClock(hundredMillis, hundredMillis, bufferedLow, 0); } - EXPECT_EQ(c.GetCorrectedTargetRate(), 48000U); + EXPECT_EQ(c.GetCorrectedSourceRate(), 48000U); c.UpdateClock(hundredMillis, hundredMillis, bufferedLow, 0); - EXPECT_EQ(c.GetCorrectedTargetRate(), 48048U); + EXPECT_EQ(c.GetCorrectedSourceRate(), 47952U); +} + +TEST(TestDriftController, VerySmallBufferedFrames) +{ + // The buffer level is the only input to the controller logic. + uint32_t bufferedLow = 1; + uint32_t nominalRate = 48000; + + DriftController c(nominalRate, nominalRate, media::TimeUnit::FromSeconds(1)); + media::TimeUnit oneSec = media::TimeUnit::FromSeconds(1); + media::TimeUnit sourceDuration(1, nominalRate); + + EXPECT_EQ(c.GetCorrectedSourceRate(), nominalRate); + uint32_t previousCorrected = nominalRate; + // Steps are limited to nominalRate/1000. + // Perform 1001 steps to check the corrected rate does not underflow zero. + for (uint32_t i = 0; i < 1001; ++i) { + c.UpdateClock(sourceDuration, oneSec, bufferedLow, 0); + uint32_t correctedRate = c.GetCorrectedSourceRate(); + EXPECT_LE(correctedRate, previousCorrected) << "for i=" << i; + EXPECT_GT(correctedRate, 0u) << "for i=" << i; + previousCorrected = correctedRate; + } } diff --git a/dom/media/driftcontrol/gtest/TestDynamicResampler.cpp b/dom/media/driftcontrol/gtest/TestDynamicResampler.cpp index fb8ac52ae4..539dfbfbea 100644 --- a/dom/media/driftcontrol/gtest/TestDynamicResampler.cpp +++ b/dom/media/driftcontrol/gtest/TestDynamicResampler.cpp @@ -19,7 +19,7 @@ TEST(TestDynamicResampler, SameRates_Float1) DynamicResampler dr(in_rate, out_rate); dr.SetSampleFormat(AUDIO_FORMAT_FLOAT32); - EXPECT_EQ(dr.GetOutRate(), out_rate); + EXPECT_EQ(dr.GetInRate(), in_rate); EXPECT_EQ(dr.GetChannels(), channels); // float in_ch1[] = {.1, .2, .3, .4, .5, .6, .7, .8, .9, 1.0}; @@ -76,7 +76,7 @@ TEST(TestDynamicResampler, SameRates_Short1) DynamicResampler dr(in_rate, out_rate); dr.SetSampleFormat(AUDIO_FORMAT_S16); - EXPECT_EQ(dr.GetOutRate(), out_rate); + EXPECT_EQ(dr.GetInRate(), in_rate); EXPECT_EQ(dr.GetChannels(), channels); short in_ch1[] = {1, 2, 3}; @@ -298,9 +298,9 @@ TEST(TestDynamicResampler, UpdateOutRate_Float) uint32_t pre_buffer = 20; - DynamicResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate)); + DynamicResampler dr(in_rate, out_rate); dr.SetSampleFormat(AUDIO_FORMAT_FLOAT32); - EXPECT_EQ(dr.GetOutRate(), out_rate); + EXPECT_EQ(dr.GetInRate(), in_rate); EXPECT_EQ(dr.GetChannels(), channels); float in_ch1[10] = {}; @@ -329,10 +329,10 @@ TEST(TestDynamicResampler, UpdateOutRate_Float) EXPECT_FLOAT_EQ(out_ch2[i], 0.0); } - // Update out rate - out_rate = 44100; - dr.UpdateResampler(out_rate, channels); - EXPECT_EQ(dr.GetOutRate(), out_rate); + // Update in rate + in_rate = 26122; + dr.UpdateResampler(in_rate, channels); + EXPECT_EQ(dr.GetInRate(), in_rate); EXPECT_EQ(dr.GetChannels(), channels); out_frames = in_frames * out_rate / in_rate; EXPECT_EQ(out_frames, 18u); @@ -354,9 +354,9 @@ TEST(TestDynamicResampler, UpdateOutRate_Short) uint32_t pre_buffer = 20; - DynamicResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate)); + DynamicResampler dr(in_rate, out_rate); dr.SetSampleFormat(AUDIO_FORMAT_S16); - EXPECT_EQ(dr.GetOutRate(), out_rate); + EXPECT_EQ(dr.GetInRate(), in_rate); EXPECT_EQ(dr.GetChannels(), channels); short in_ch1[10] = {}; @@ -385,10 +385,10 @@ TEST(TestDynamicResampler, UpdateOutRate_Short) EXPECT_EQ(out_ch2[i], 0.0); } - // Update out rate - out_rate = 44100; - dr.UpdateResampler(out_rate, channels); - EXPECT_EQ(dr.GetOutRate(), out_rate); + // Update in rate + in_rate = 26122; + dr.UpdateResampler(in_rate, channels); + EXPECT_EQ(dr.GetInRate(), in_rate); EXPECT_EQ(dr.GetChannels(), channels); out_frames = in_frames * out_rate / in_rate; EXPECT_EQ(out_frames, 18u); @@ -400,16 +400,15 @@ TEST(TestDynamicResampler, UpdateOutRate_Short) EXPECT_FALSE(hasUnderrun); } -TEST(TestDynamicResampler, BigRangeOutRates_Float) +TEST(TestDynamicResampler, BigRangeInRates_Float) { uint32_t in_frames = 10; uint32_t out_frames = 10; uint32_t channels = 2; uint32_t in_rate = 44100; uint32_t out_rate = 44100; - uint32_t pre_buffer = 20; - DynamicResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate)); + DynamicResampler dr(in_rate, out_rate); dr.SetSampleFormat(AUDIO_FORMAT_FLOAT32); const uint32_t in_capacity = 40; @@ -427,10 +426,14 @@ TEST(TestDynamicResampler, BigRangeOutRates_Float) float out_ch1[out_capacity] = {}; float out_ch2[out_capacity] = {}; - for (uint32_t rate = 10000; rate < 90000; ++rate) { - out_rate = rate; - dr.UpdateResampler(out_rate, channels); - EXPECT_EQ(dr.GetOutRate(), out_rate); + // Downsampling at a high enough ratio happens to have enough excess + // in_frames from rounding in the out_frames calculation to cover the + // skipped input latency when switching from zero-latency 44100->44100 to a + // non-1:1 ratio. + for (uint32_t rate = 100000; rate >= 10000; rate -= 2) { + in_rate = rate; + dr.UpdateResampler(in_rate, channels); + EXPECT_EQ(dr.GetInRate(), in_rate); EXPECT_EQ(dr.GetChannels(), channels); in_frames = 20; // more than we need out_frames = in_frames * out_rate / in_rate; @@ -444,16 +447,15 @@ TEST(TestDynamicResampler, BigRangeOutRates_Float) } } -TEST(TestDynamicResampler, BigRangeOutRates_Short) +TEST(TestDynamicResampler, BigRangeInRates_Short) { uint32_t in_frames = 10; uint32_t out_frames = 10; uint32_t channels = 2; uint32_t in_rate = 44100; uint32_t out_rate = 44100; - uint32_t pre_buffer = 20; - DynamicResampler dr(in_rate, out_rate, media::TimeUnit(pre_buffer, in_rate)); + DynamicResampler dr(in_rate, out_rate); dr.SetSampleFormat(AUDIO_FORMAT_S16); const uint32_t in_capacity = 40; @@ -471,9 +473,9 @@ TEST(TestDynamicResampler, BigRangeOutRates_Short) short out_ch1[out_capacity] = {}; short out_ch2[out_capacity] = {}; - for (uint32_t rate = 10000; rate < 90000; ++rate) { - out_rate = rate; - dr.UpdateResampler(out_rate, channels); + for (uint32_t rate = 100000; rate >= 10000; rate -= 2) { + in_rate = rate; + dr.UpdateResampler(in_rate, channels); in_frames = 20; // more than we need out_frames = in_frames * out_rate / in_rate; for (uint32_t y = 0; y < 2; ++y) { @@ -517,8 +519,8 @@ TEST(TestDynamicResampler, UpdateChannels_Float) EXPECT_FALSE(hasUnderrun); // Add 3rd channel - dr.UpdateResampler(out_rate, 3); - EXPECT_EQ(dr.GetOutRate(), out_rate); + dr.UpdateResampler(in_rate, 3); + EXPECT_EQ(dr.GetInRate(), in_rate); EXPECT_EQ(dr.GetChannels(), 3u); float in_ch3[10] = {}; @@ -546,8 +548,8 @@ TEST(TestDynamicResampler, UpdateChannels_Float) in_buffer[3] = in_ch4; float out_ch4[10] = {}; - dr.UpdateResampler(out_rate, 4); - EXPECT_EQ(dr.GetOutRate(), out_rate); + dr.UpdateResampler(in_rate, 4); + EXPECT_EQ(dr.GetInRate(), in_rate); EXPECT_EQ(dr.GetChannels(), 4u); dr.AppendInput(in_buffer, in_frames); @@ -592,8 +594,8 @@ TEST(TestDynamicResampler, UpdateChannels_Short) EXPECT_FALSE(hasUnderrun); // Add 3rd channel - dr.UpdateResampler(out_rate, 3); - EXPECT_EQ(dr.GetOutRate(), out_rate); + dr.UpdateResampler(in_rate, 3); + EXPECT_EQ(dr.GetInRate(), in_rate); EXPECT_EQ(dr.GetChannels(), 3u); short in_ch3[10] = {}; @@ -622,8 +624,8 @@ TEST(TestDynamicResampler, UpdateChannels_Short) in_buffer[3] = in_ch4; short out_ch4[10] = {}; - dr.UpdateResampler(out_rate, 4); - EXPECT_EQ(dr.GetOutRate(), out_rate); + dr.UpdateResampler(in_rate, 4); + EXPECT_EQ(dr.GetInRate(), in_rate); EXPECT_EQ(dr.GetChannels(), 4u); dr.AppendInput(in_buffer, in_frames); @@ -647,7 +649,7 @@ TEST(TestDynamicResampler, Underrun) DynamicResampler dr(in_rate, out_rate); dr.SetSampleFormat(AUDIO_FORMAT_FLOAT32); - EXPECT_EQ(dr.GetOutRate(), out_rate); + EXPECT_EQ(dr.GetInRate(), in_rate); EXPECT_EQ(dr.GetChannels(), channels); float in_ch1[in_frames] = {}; @@ -689,7 +691,7 @@ TEST(TestDynamicResampler, Underrun) } // Now try with resampling. - dr.UpdateResampler(out_rate / 2, channels); + dr.UpdateResampler(in_rate * 2, channels); dr.AppendInput(in_buffer, in_frames); hasUnderrun = dr.Resample(out_ch1, out_frames, 0); EXPECT_TRUE(hasUnderrun); diff --git a/dom/media/driftcontrol/plot.py b/dom/media/driftcontrol/plot.py index d55c0f7de0..c3685ead7c 100755 --- a/dom/media/driftcontrol/plot.py +++ b/dom/media/driftcontrol/plot.py @@ -86,23 +86,23 @@ MOZ_LOG_FILE=/tmp/driftcontrol.csv \ [d + h for (d, h) in zip(desired, hysteresisthreshold)], alpha=0.2, color="goldenrod", - legend_label="Hysteresis Threshold (won't correct out rate within area)", + legend_label="Hysteresis Threshold (won't correct in rate within area)", ) fig2 = figure(x_range=fig1.x_range) fig2.line(t, inrate, color="hotpink", legend_label="Nominal in sample rate") fig2.line(t, outrate, color="firebrick", legend_label="Nominal out sample rate") fig2.line( - t, corrected, color="dodgerblue", legend_label="Corrected out sample rate" + t, corrected, color="dodgerblue", legend_label="Corrected in sample rate" ) fig2.line( t, hysteresiscorrected, color="seagreen", - legend_label="Hysteresis-corrected out sample rate", + legend_label="Hysteresis-corrected in sample rate", ) fig2.line( - t, configured, color="goldenrod", legend_label="Configured out sample rate" + t, configured, color="goldenrod", legend_label="Configured in sample rate" ) fig3 = figure(x_range=fig1.x_range) |