diff options
Diffstat (limited to 'dom/media/mediasink/AudioDecoderInputTrack.cpp')
-rw-r--r-- | dom/media/mediasink/AudioDecoderInputTrack.cpp | 674 |
1 files changed, 674 insertions, 0 deletions
diff --git a/dom/media/mediasink/AudioDecoderInputTrack.cpp b/dom/media/mediasink/AudioDecoderInputTrack.cpp new file mode 100644 index 0000000000..50555256e7 --- /dev/null +++ b/dom/media/mediasink/AudioDecoderInputTrack.cpp @@ -0,0 +1,674 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "AudioDecoderInputTrack.h" + +#include "MediaData.h" +#include "mozilla/ScopeExit.h" +#include "mozilla/StaticPrefs_media.h" +#include "Tracing.h" + +#include "RLBoxSoundTouch.h" + +namespace mozilla { + +extern LazyLogModule gMediaDecoderLog; + +#define LOG(msg, ...) \ + MOZ_LOG(gMediaDecoderLog, LogLevel::Debug, \ + ("AudioDecoderInputTrack=%p " msg, this, ##__VA_ARGS__)) + +#define LOG_M(msg, this, ...) \ + MOZ_LOG(gMediaDecoderLog, LogLevel::Debug, \ + ("AudioDecoderInputTrack=%p " msg, this, ##__VA_ARGS__)) + +/* static */ +AudioDecoderInputTrack* AudioDecoderInputTrack::Create( + MediaTrackGraph* aGraph, nsISerialEventTarget* aDecoderThread, + const AudioInfo& aInfo, float aPlaybackRate, float aVolume, + bool aPreservesPitch) { + MOZ_ASSERT(aGraph); + MOZ_ASSERT(aDecoderThread); + AudioDecoderInputTrack* track = + new AudioDecoderInputTrack(aDecoderThread, aGraph->GraphRate(), aInfo, + aPlaybackRate, aVolume, aPreservesPitch); + aGraph->AddTrack(track); + return track; +} + +AudioDecoderInputTrack::AudioDecoderInputTrack( + nsISerialEventTarget* aDecoderThread, TrackRate aGraphRate, + const AudioInfo& aInfo, float aPlaybackRate, float aVolume, + bool aPreservesPitch) + : ProcessedMediaTrack(aGraphRate, MediaSegment::AUDIO, new AudioSegment()), + mDecoderThread(aDecoderThread), + mResamplerChannelCount(0), + mInitialInputChannels(aInfo.mChannels), + mInputSampleRate(aInfo.mRate), + mDelayedScheduler(mDecoderThread), + mPlaybackRate(aPlaybackRate), + mVolume(aVolume), + mPreservesPitch(aPreservesPitch) {} + +bool AudioDecoderInputTrack::ConvertAudioDataToSegment( + AudioData* aAudio, AudioSegment& aSegment, + const PrincipalHandle& aPrincipalHandle) { + AssertOnDecoderThread(); + MOZ_ASSERT(aAudio); + MOZ_ASSERT(aSegment.IsEmpty()); + if (!aAudio->Frames()) { + LOG("Ignore audio with zero frame"); + return false; + } + + aAudio->EnsureAudioBuffer(); + RefPtr<SharedBuffer> buffer = aAudio->mAudioBuffer; + AudioDataValue* bufferData = static_cast<AudioDataValue*>(buffer->Data()); + AutoTArray<const AudioDataValue*, 2> channels; + for (uint32_t i = 0; i < aAudio->mChannels; ++i) { + channels.AppendElement(bufferData + i * aAudio->Frames()); + } + aSegment.AppendFrames(buffer.forget(), channels, aAudio->Frames(), + aPrincipalHandle); + const TrackRate newInputRate = static_cast<TrackRate>(aAudio->mRate); + if (newInputRate != mInputSampleRate) { + LOG("Input sample rate changed %u -> %u", mInputSampleRate, newInputRate); + mInputSampleRate = newInputRate; + mResampler.own(nullptr); + mResamplerChannelCount = 0; + } + if (mInputSampleRate != Graph()->GraphRate()) { + aSegment.ResampleChunks(mResampler, &mResamplerChannelCount, + mInputSampleRate, Graph()->GraphRate()); + } + return aSegment.GetDuration() > 0; +} + +void AudioDecoderInputTrack::AppendData( + AudioData* aAudio, const PrincipalHandle& aPrincipalHandle) { + AssertOnDecoderThread(); + MOZ_ASSERT(aAudio); + nsTArray<RefPtr<AudioData>> audio; + audio.AppendElement(aAudio); + AppendData(audio, aPrincipalHandle); +} + +void AudioDecoderInputTrack::AppendData( + nsTArray<RefPtr<AudioData>>& aAudioArray, + const PrincipalHandle& aPrincipalHandle) { + AssertOnDecoderThread(); + MOZ_ASSERT(!mShutdownSPSCQueue); + + // Batching all new data together in order to push them as a single unit that + // gives the SPSC queue more spaces. + for (const auto& audio : aAudioArray) { + BatchData(audio, aPrincipalHandle); + } + + // If SPSC queue doesn't have much available capacity now, we would push + // batched later. + if (ShouldBatchData()) { + return; + } + PushBatchedDataIfNeeded(); +} + +bool AudioDecoderInputTrack::ShouldBatchData() const { + AssertOnDecoderThread(); + // If the SPSC queue has less available capacity than the threshold, then all + // input audio data should be batched together, in order not to increase the + // pressure of SPSC queue. + static const int kThresholdNumerator = 3; + static const int kThresholdDenominator = 10; + return mSPSCQueue.AvailableWrite() < + mSPSCQueue.Capacity() * kThresholdNumerator / kThresholdDenominator; +} + +bool AudioDecoderInputTrack::HasBatchedData() const { + AssertOnDecoderThread(); + return !mBatchedData.mSegment.IsEmpty(); +} + +void AudioDecoderInputTrack::BatchData( + AudioData* aAudio, const PrincipalHandle& aPrincipalHandle) { + AssertOnDecoderThread(); + AudioSegment segment; + if (!ConvertAudioDataToSegment(aAudio, segment, aPrincipalHandle)) { + return; + } + mBatchedData.mSegment.AppendFrom(&segment); + if (!mBatchedData.mStartTime.IsValid()) { + mBatchedData.mStartTime = aAudio->mTime; + } + mBatchedData.mEndTime = aAudio->GetEndTime(); + LOG("batched data [%" PRId64 ":%" PRId64 "] sz=%" PRId64, + aAudio->mTime.ToMicroseconds(), aAudio->GetEndTime().ToMicroseconds(), + mBatchedData.mSegment.GetDuration()); + DispatchPushBatchedDataIfNeeded(); +} + +void AudioDecoderInputTrack::DispatchPushBatchedDataIfNeeded() { + AssertOnDecoderThread(); + MOZ_ASSERT(!mShutdownSPSCQueue); + // The graph thread runs iteration around per 2~10ms. Doing this to ensure + // that we can keep consuming data. If the producer stops pushing new data + // due to MDSM stops decoding, which is because MDSM thinks the data stored + // in the audio queue are enough. The way to remove those data from the + // audio queue is driven by us, so we have to keep consuming data. + // Otherwise, we would get stuck because those batched data would never be + // consumed. + static const uint8_t kTimeoutMS = 10; + TimeStamp target = + TimeStamp::Now() + TimeDuration::FromMilliseconds(kTimeoutMS); + mDelayedScheduler.Ensure( + target, + [self = RefPtr<AudioDecoderInputTrack>(this), this]() { + LOG("In the task of DispatchPushBatchedDataIfNeeded"); + mDelayedScheduler.CompleteRequest(); + MOZ_ASSERT(!mShutdownSPSCQueue); + MOZ_ASSERT(HasBatchedData()); + // The capacity in SPSC is still not enough, so we can't push data now. + // Retrigger another task to push batched data. + if (ShouldBatchData()) { + DispatchPushBatchedDataIfNeeded(); + return; + } + PushBatchedDataIfNeeded(); + }, + []() { MOZ_DIAGNOSTIC_ASSERT(false); }); +} + +void AudioDecoderInputTrack::PushBatchedDataIfNeeded() { + AssertOnDecoderThread(); + if (!HasBatchedData()) { + return; + } + LOG("Append batched data [%" PRId64 ":%" PRId64 "], available SPSC sz=%u", + mBatchedData.mStartTime.ToMicroseconds(), + mBatchedData.mEndTime.ToMicroseconds(), mSPSCQueue.AvailableWrite()); + SPSCData data({SPSCData::DecodedData(std::move(mBatchedData))}); + PushDataToSPSCQueue(data); + MOZ_ASSERT(mBatchedData.mSegment.IsEmpty()); + // No batched data remains, we can cancel the pending tasks. + mDelayedScheduler.Reset(); +} + +void AudioDecoderInputTrack::NotifyEndOfStream() { + AssertOnDecoderThread(); + // Force to push all data before EOS. Otherwise, the track would be ended too + // early without sending all data. + PushBatchedDataIfNeeded(); + SPSCData data({SPSCData::EOS()}); + LOG("Set EOS, available SPSC sz=%u", mSPSCQueue.AvailableWrite()); + PushDataToSPSCQueue(data); +} + +void AudioDecoderInputTrack::ClearFutureData() { + AssertOnDecoderThread(); + // Clear the data hasn't been pushed to SPSC queue yet. + mBatchedData.Clear(); + mDelayedScheduler.Reset(); + SPSCData data({SPSCData::ClearFutureData()}); + LOG("Set clear future data, available SPSC sz=%u", + mSPSCQueue.AvailableWrite()); + PushDataToSPSCQueue(data); +} + +void AudioDecoderInputTrack::PushDataToSPSCQueue(SPSCData& data) { + AssertOnDecoderThread(); + const bool rv = mSPSCQueue.Enqueue(data); + MOZ_DIAGNOSTIC_ASSERT(rv, "Failed to push data, SPSC queue is full!"); + Unused << rv; +} + +void AudioDecoderInputTrack::SetVolume(float aVolume) { + AssertOnDecoderThread(); + LOG("Set volume=%f", aVolume); + GetMainThreadSerialEventTarget()->Dispatch( + NS_NewRunnableFunction("AudioDecoderInputTrack::SetVolume", + [self = RefPtr<AudioDecoderInputTrack>(this), + aVolume] { self->SetVolumeImpl(aVolume); })); +} + +void AudioDecoderInputTrack::SetVolumeImpl(float aVolume) { + MOZ_ASSERT(NS_IsMainThread()); + QueueControlMessageWithNoShutdown([self = RefPtr{this}, this, aVolume] { + TRACE_COMMENT("AudioDecoderInputTrack::SetVolume ControlMessage", "%f", + aVolume); + LOG_M("Apply volume=%f", this, aVolume); + mVolume = aVolume; + }); +} + +void AudioDecoderInputTrack::SetPlaybackRate(float aPlaybackRate) { + AssertOnDecoderThread(); + LOG("Set playback rate=%f", aPlaybackRate); + GetMainThreadSerialEventTarget()->Dispatch(NS_NewRunnableFunction( + "AudioDecoderInputTrack::SetPlaybackRate", + [self = RefPtr<AudioDecoderInputTrack>(this), aPlaybackRate] { + self->SetPlaybackRateImpl(aPlaybackRate); + })); +} + +void AudioDecoderInputTrack::SetPlaybackRateImpl(float aPlaybackRate) { + MOZ_ASSERT(NS_IsMainThread()); + QueueControlMessageWithNoShutdown([self = RefPtr{this}, this, aPlaybackRate] { + TRACE_COMMENT("AudioDecoderInputTrack::SetPlaybackRate ControlMessage", + "%f", aPlaybackRate); + LOG_M("Apply playback rate=%f", this, aPlaybackRate); + mPlaybackRate = aPlaybackRate; + SetTempoAndRateForTimeStretcher(); + }); +} + +void AudioDecoderInputTrack::SetPreservesPitch(bool aPreservesPitch) { + AssertOnDecoderThread(); + LOG("Set preserves pitch=%d", aPreservesPitch); + GetMainThreadSerialEventTarget()->Dispatch(NS_NewRunnableFunction( + "AudioDecoderInputTrack::SetPreservesPitch", + [self = RefPtr<AudioDecoderInputTrack>(this), aPreservesPitch] { + self->SetPreservesPitchImpl(aPreservesPitch); + })); +} + +void AudioDecoderInputTrack::SetPreservesPitchImpl(bool aPreservesPitch) { + MOZ_ASSERT(NS_IsMainThread()); + QueueControlMessageWithNoShutdown( + [self = RefPtr{this}, this, aPreservesPitch] { + TRACE_COMMENT("AudioDecoderInputTrack::SetPreservesPitch", "%s", + aPreservesPitch ? "true" : "false"); + LOG_M("Apply preserves pitch=%d", this, aPreservesPitch); + mPreservesPitch = aPreservesPitch; + SetTempoAndRateForTimeStretcher(); + }); +} + +void AudioDecoderInputTrack::Close() { + AssertOnDecoderThread(); + LOG("Close"); + mShutdownSPSCQueue = true; + mBatchedData.Clear(); + mDelayedScheduler.Reset(); +} + +void AudioDecoderInputTrack::DestroyImpl() { + LOG("DestroyImpl"); + AssertOnGraphThreadOrNotRunning(); + mBufferedData.Clear(); + if (mTimeStretcher) { + delete mTimeStretcher; + mTimeStretcher = nullptr; + } + ProcessedMediaTrack::DestroyImpl(); +} + +AudioDecoderInputTrack::~AudioDecoderInputTrack() { + MOZ_ASSERT(mBatchedData.mSegment.IsEmpty()); + MOZ_ASSERT(mShutdownSPSCQueue); + mResampler.own(nullptr); +} + +void AudioDecoderInputTrack::ProcessInput(GraphTime aFrom, GraphTime aTo, + uint32_t aFlags) { + AssertOnGraphThread(); + if (Ended()) { + return; + } + + TrackTime consumedDuration = 0; + auto notify = MakeScopeExit([this, &consumedDuration] { + NotifyInTheEndOfProcessInput(consumedDuration); + }); + + if (mSentAllData && (aFlags & ALLOW_END)) { + LOG("End track"); + mEnded = true; + return; + } + + const TrackTime expectedDuration = aTo - aFrom; + LOG("ProcessInput [%" PRId64 " to %" PRId64 "], duration=%" PRId64, aFrom, + aTo, expectedDuration); + + // Drain all data from SPSC queue first, because we want that the SPSC queue + // always has capacity of accepting data from the producer. In addition, we + // also need to check if there is any control related data that should be + // applied to output segment, eg. `ClearFutureData`. + SPSCData data; + while (mSPSCQueue.Dequeue(&data, 1) > 0) { + HandleSPSCData(data); + } + + consumedDuration += AppendBufferedDataToOutput(expectedDuration); + if (HasSentAllData()) { + LOG("Sent all data, should end track in next iteration"); + mSentAllData = true; + } +} + +void AudioDecoderInputTrack::HandleSPSCData(SPSCData& aData) { + AssertOnGraphThread(); + if (aData.IsDecodedData()) { + MOZ_ASSERT(!mReceivedEOS); + AudioSegment& segment = aData.AsDecodedData()->mSegment; + LOG("popped out data [%" PRId64 ":%" PRId64 "] sz=%" PRId64, + aData.AsDecodedData()->mStartTime.ToMicroseconds(), + aData.AsDecodedData()->mEndTime.ToMicroseconds(), + segment.GetDuration()); + mBufferedData.AppendFrom(&segment); + return; + } + if (aData.IsEOS()) { + MOZ_ASSERT(!Ended()); + LOG("Received EOS"); + mReceivedEOS = true; + return; + } + if (aData.IsClearFutureData()) { + LOG("Clear future data"); + mBufferedData.Clear(); + if (!Ended()) { + LOG("Clear EOS"); + mReceivedEOS = false; + } + return; + } + MOZ_ASSERT_UNREACHABLE("unsupported SPSC data"); +} + +TrackTime AudioDecoderInputTrack::AppendBufferedDataToOutput( + TrackTime aExpectedDuration) { + AssertOnGraphThread(); + + // Remove the necessary part from `mBufferedData` to create a new + // segment in order to apply some operation without affecting all data. + AudioSegment outputSegment; + TrackTime consumedDuration = 0; + if (mPlaybackRate != 1.0) { + consumedDuration = + AppendTimeStretchedDataToSegment(aExpectedDuration, outputSegment); + } else { + consumedDuration = + AppendUnstretchedDataToSegment(aExpectedDuration, outputSegment); + } + + // Apply any necessary change on the segement which would be outputed to the + // graph. + const TrackTime appendedDuration = outputSegment.GetDuration(); + outputSegment.ApplyVolume(mVolume); + ApplyTrackDisabling(&outputSegment); + mSegment->AppendFrom(&outputSegment); + + unsigned int numSamples = 0; + if (mTimeStretcher) { + numSamples = mTimeStretcher->numSamples().unverified_safe_because( + "Only used for logging."); + } + + LOG("Appended %" PRId64 ", consumed %" PRId64 + ", remaining raw buffered %" PRId64 ", remaining time-stretched %u", + appendedDuration, consumedDuration, mBufferedData.GetDuration(), + numSamples); + if (auto gap = aExpectedDuration - appendedDuration; gap > 0) { + LOG("Audio underrun, fill silence %" PRId64, gap); + MOZ_ASSERT(mBufferedData.IsEmpty()); + mSegment->AppendNullData(gap); + } + return consumedDuration; +} + +TrackTime AudioDecoderInputTrack::AppendTimeStretchedDataToSegment( + TrackTime aExpectedDuration, AudioSegment& aOutput) { + AssertOnGraphThread(); + EnsureTimeStretcher(); + + MOZ_ASSERT(mPlaybackRate != 1.0f); + MOZ_ASSERT(aExpectedDuration >= 0); + MOZ_ASSERT(mTimeStretcher); + MOZ_ASSERT(aOutput.IsEmpty()); + + // If we don't have enough data that have been time-stretched, fill raw data + // into the time stretcher until the amount of samples that time stretcher + // finishes processed reaches or exceeds the expected duration. + TrackTime consumedDuration = 0; + mTimeStretcher->numSamples().copy_and_verify([&](auto numSamples) { + // Attacker controlled soundtouch can return a bogus numSamples, which + // can result in filling data into the time stretcher (or not). This is + // safe as long as filling (and getting) data is checked. + if (numSamples < aExpectedDuration) { + consumedDuration = FillDataToTimeStretcher(aExpectedDuration); + } + }); + MOZ_ASSERT(consumedDuration >= 0); + Unused << GetDataFromTimeStretcher(aExpectedDuration, aOutput); + return consumedDuration; +} + +TrackTime AudioDecoderInputTrack::FillDataToTimeStretcher( + TrackTime aExpectedDuration) { + AssertOnGraphThread(); + MOZ_ASSERT(mPlaybackRate != 1.0f); + MOZ_ASSERT(aExpectedDuration >= 0); + MOZ_ASSERT(mTimeStretcher); + + TrackTime consumedDuration = 0; + const uint32_t channels = GetChannelCountForTimeStretcher(); + mBufferedData.IterateOnChunks([&](AudioChunk* aChunk) { + MOZ_ASSERT(aChunk); + if (aChunk->IsNull() && aChunk->GetDuration() == 0) { + // Skip this chunk and wait for next one. + return false; + } + const uint32_t bufferLength = channels * aChunk->GetDuration(); + if (bufferLength > mInterleavedBuffer.Capacity()) { + mInterleavedBuffer.SetCapacity(bufferLength); + } + mInterleavedBuffer.SetLengthAndRetainStorage(bufferLength); + if (aChunk->IsNull()) { + MOZ_ASSERT(aChunk->GetDuration(), "chunk with only silence"); + memset(mInterleavedBuffer.Elements(), 0, mInterleavedBuffer.Length()); + } else { + // Do the up-mix/down-mix first if necessary that forces to change the + // data's channel count to the time stretcher's channel count. Then + // perform a transformation from planar to interleaved. + switch (aChunk->mBufferFormat) { + case AUDIO_FORMAT_S16: + WriteChunk<int16_t>(*aChunk, channels, 1.0f, + mInterleavedBuffer.Elements()); + break; + case AUDIO_FORMAT_FLOAT32: + WriteChunk<float>(*aChunk, channels, 1.0f, + mInterleavedBuffer.Elements()); + break; + default: + MOZ_ASSERT_UNREACHABLE("Not expected format"); + } + } + mTimeStretcher->putSamples(mInterleavedBuffer.Elements(), + aChunk->GetDuration()); + consumedDuration += aChunk->GetDuration(); + return mTimeStretcher->numSamples().copy_and_verify( + [&aExpectedDuration](auto numSamples) { + // Attacker controlled soundtouch can return a bogus numSamples to + // return early or force additional iterations. This is safe + // as long as all the writes in the lambda are checked. + return numSamples >= aExpectedDuration; + }); + }); + mBufferedData.RemoveLeading(consumedDuration); + return consumedDuration; +} + +TrackTime AudioDecoderInputTrack::AppendUnstretchedDataToSegment( + TrackTime aExpectedDuration, AudioSegment& aOutput) { + AssertOnGraphThread(); + MOZ_ASSERT(mPlaybackRate == 1.0f); + MOZ_ASSERT(aExpectedDuration >= 0); + MOZ_ASSERT(aOutput.IsEmpty()); + + const TrackTime drained = + DrainStretchedDataIfNeeded(aExpectedDuration, aOutput); + const TrackTime available = + std::min(aExpectedDuration - drained, mBufferedData.GetDuration()); + aOutput.AppendSlice(mBufferedData, 0, available); + MOZ_ASSERT(aOutput.GetDuration() <= aExpectedDuration); + mBufferedData.RemoveLeading(available); + return available; +} + +TrackTime AudioDecoderInputTrack::DrainStretchedDataIfNeeded( + TrackTime aExpectedDuration, AudioSegment& aOutput) { + AssertOnGraphThread(); + MOZ_ASSERT(mPlaybackRate == 1.0f); + MOZ_ASSERT(aExpectedDuration >= 0); + + if (!mTimeStretcher) { + return 0; + } + auto numSamples = mTimeStretcher->numSamples().unverified_safe_because( + "Bogus numSamples can result in draining the stretched data (or not)."); + if (numSamples == 0) { + return 0; + } + return GetDataFromTimeStretcher(aExpectedDuration, aOutput); +} + +TrackTime AudioDecoderInputTrack::GetDataFromTimeStretcher( + TrackTime aExpectedDuration, AudioSegment& aOutput) { + AssertOnGraphThread(); + MOZ_ASSERT(mTimeStretcher); + MOZ_ASSERT(aExpectedDuration >= 0); + + auto numSamples = + mTimeStretcher->numSamples().unverified_safe_because("Used for logging"); + + mTimeStretcher->numUnprocessedSamples().copy_and_verify([&](auto samples) { + if (HasSentAllData() && samples) { + mTimeStretcher->flush(); + LOG("Flush %u frames from the time stretcher", numSamples); + } + }); + + // Flushing may have change the number of samples + numSamples = mTimeStretcher->numSamples().unverified_safe_because( + "Used to decide to flush (or not), which is checked."); + + const TrackTime available = + std::min((TrackTime)numSamples, aExpectedDuration); + if (available == 0) { + // Either running out of stretched data, or the raw data we filled into + // the time stretcher were not enough for producing stretched data. + return 0; + } + + // Retrieve interleaved data from the time stretcher. + const uint32_t channelCount = GetChannelCountForTimeStretcher(); + const uint32_t bufferLength = channelCount * available; + if (bufferLength > mInterleavedBuffer.Capacity()) { + mInterleavedBuffer.SetCapacity(bufferLength); + } + mInterleavedBuffer.SetLengthAndRetainStorage(bufferLength); + mTimeStretcher->receiveSamples(mInterleavedBuffer.Elements(), available); + + // Perform a transformation from interleaved to planar. + CheckedInt<size_t> bufferSize(sizeof(AudioDataValue)); + bufferSize *= bufferLength; + RefPtr<SharedBuffer> buffer = SharedBuffer::Create(bufferSize); + AudioDataValue* bufferData = static_cast<AudioDataValue*>(buffer->Data()); + AutoTArray<AudioDataValue*, 2> planarBuffer; + planarBuffer.SetLength(channelCount); + for (size_t idx = 0; idx < channelCount; idx++) { + planarBuffer[idx] = bufferData + idx * available; + } + DeinterleaveAndConvertBuffer(mInterleavedBuffer.Elements(), available, + channelCount, planarBuffer.Elements()); + AutoTArray<const AudioDataValue*, 2> outputChannels; + outputChannels.AppendElements(planarBuffer); + aOutput.AppendFrames(buffer.forget(), outputChannels, + static_cast<int32_t>(available), + mBufferedData.GetOldestPrinciple()); + return available; +} + +void AudioDecoderInputTrack::NotifyInTheEndOfProcessInput( + TrackTime aFillDuration) { + AssertOnGraphThread(); + mWrittenFrames += aFillDuration; + LOG("Notify, fill=%" PRId64 ", total written=%" PRId64 ", ended=%d", + aFillDuration, mWrittenFrames, Ended()); + if (aFillDuration > 0) { + mOnOutput.Notify(mWrittenFrames); + } + if (Ended()) { + mOnEnd.Notify(); + } +} + +bool AudioDecoderInputTrack::HasSentAllData() const { + AssertOnGraphThread(); + return mReceivedEOS && mSPSCQueue.AvailableRead() == 0 && + mBufferedData.IsEmpty(); +} + +uint32_t AudioDecoderInputTrack::NumberOfChannels() const { + AssertOnGraphThread(); + const uint32_t maxChannelCount = GetData<AudioSegment>()->MaxChannelCount(); + return maxChannelCount ? maxChannelCount : mInitialInputChannels; +} + +void AudioDecoderInputTrack::EnsureTimeStretcher() { + AssertOnGraphThread(); + if (!mTimeStretcher) { + mTimeStretcher = new RLBoxSoundTouch(); + mTimeStretcher->setSampleRate(Graph()->GraphRate()); + mTimeStretcher->setChannels(GetChannelCountForTimeStretcher()); + mTimeStretcher->setPitch(1.0); + + // SoundTouch v2.1.2 uses automatic time-stretch settings with the following + // values: + // Tempo 0.5: 90ms sequence, 20ms seekwindow, 8ms overlap + // Tempo 2.0: 40ms sequence, 15ms seekwindow, 8ms overlap + // We are going to use a smaller 10ms sequence size to improve speech + // clarity, giving more resolution at high tempo and less reverb at low + // tempo. Maintain 15ms seekwindow and 8ms overlap for smoothness. + mTimeStretcher->setSetting( + SETTING_SEQUENCE_MS, + StaticPrefs::media_audio_playbackrate_soundtouch_sequence_ms()); + mTimeStretcher->setSetting( + SETTING_SEEKWINDOW_MS, + StaticPrefs::media_audio_playbackrate_soundtouch_seekwindow_ms()); + mTimeStretcher->setSetting( + SETTING_OVERLAP_MS, + StaticPrefs::media_audio_playbackrate_soundtouch_overlap_ms()); + SetTempoAndRateForTimeStretcher(); + LOG("Create TimeStretcher (channel=%d, playbackRate=%f, preservePitch=%d)", + GetChannelCountForTimeStretcher(), mPlaybackRate, mPreservesPitch); + } +} + +void AudioDecoderInputTrack::SetTempoAndRateForTimeStretcher() { + AssertOnGraphThread(); + if (!mTimeStretcher) { + return; + } + if (mPreservesPitch) { + mTimeStretcher->setTempo(mPlaybackRate); + mTimeStretcher->setRate(1.0f); + } else { + mTimeStretcher->setTempo(1.0f); + mTimeStretcher->setRate(mPlaybackRate); + } +} + +uint32_t AudioDecoderInputTrack::GetChannelCountForTimeStretcher() const { + // The time stretcher MUST be initialized with a fixed channel count, but the + // channel count in audio chunks might vary. Therefore, we always use the + // initial input channel count to initialize the time stretcher and perform a + // real-time down-mix/up-mix for audio chunks which have different channel + // count than the initial input channel count. + return mInitialInputChannels; +} + +#undef LOG +} // namespace mozilla |