diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 01:47:29 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 01:47:29 +0000 |
commit | 0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d (patch) | |
tree | a31f07c9bcca9d56ce61e9a1ffd30ef350d513aa /dom/media/mediasink | |
parent | Initial commit. (diff) | |
download | firefox-esr-0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d.tar.xz firefox-esr-0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d.zip |
Adding upstream version 115.8.0esr.upstream/115.8.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'dom/media/mediasink')
-rw-r--r-- | dom/media/mediasink/AudioDecoderInputTrack.cpp | 681 | ||||
-rw-r--r-- | dom/media/mediasink/AudioDecoderInputTrack.h | 242 | ||||
-rw-r--r-- | dom/media/mediasink/AudioSink.cpp | 664 | ||||
-rw-r--r-- | dom/media/mediasink/AudioSink.h | 188 | ||||
-rw-r--r-- | dom/media/mediasink/AudioSinkWrapper.cpp | 496 | ||||
-rw-r--r-- | dom/media/mediasink/AudioSinkWrapper.h | 161 | ||||
-rw-r--r-- | dom/media/mediasink/DecodedStream.cpp | 1171 | ||||
-rw-r--r-- | dom/media/mediasink/DecodedStream.h | 154 | ||||
-rw-r--r-- | dom/media/mediasink/MediaSink.h | 142 | ||||
-rw-r--r-- | dom/media/mediasink/VideoSink.cpp | 706 | ||||
-rw-r--r-- | dom/media/mediasink/VideoSink.h | 177 | ||||
-rw-r--r-- | dom/media/mediasink/moz.build | 25 |
12 files changed, 4807 insertions, 0 deletions
diff --git a/dom/media/mediasink/AudioDecoderInputTrack.cpp b/dom/media/mediasink/AudioDecoderInputTrack.cpp new file mode 100644 index 0000000000..7f970f0e4f --- /dev/null +++ b/dom/media/mediasink/AudioDecoderInputTrack.cpp @@ -0,0 +1,681 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "AudioDecoderInputTrack.h" + +#include "MediaData.h" +#include "mozilla/ScopeExit.h" +#include "mozilla/StaticPrefs_media.h" +#include "Tracing.h" + +// Use abort() instead of exception in SoundTouch. +#define ST_NO_EXCEPTION_HANDLING 1 +#include "soundtouch/SoundTouchFactory.h" + +namespace mozilla { + +extern LazyLogModule gMediaDecoderLog; + +#define LOG(msg, ...) \ + MOZ_LOG(gMediaDecoderLog, LogLevel::Debug, \ + ("AudioDecoderInputTrack=%p " msg, this, ##__VA_ARGS__)) + +#define LOG_M(msg, this, ...) \ + MOZ_LOG(gMediaDecoderLog, LogLevel::Debug, \ + ("AudioDecoderInputTrack=%p " msg, this, ##__VA_ARGS__)) + +/* static */ +AudioDecoderInputTrack* AudioDecoderInputTrack::Create( + MediaTrackGraph* aGraph, nsISerialEventTarget* aDecoderThread, + const AudioInfo& aInfo, float aPlaybackRate, float aVolume, + bool aPreservesPitch) { + MOZ_ASSERT(aGraph); + MOZ_ASSERT(aDecoderThread); + AudioDecoderInputTrack* track = + new AudioDecoderInputTrack(aDecoderThread, aGraph->GraphRate(), aInfo, + aPlaybackRate, aVolume, aPreservesPitch); + aGraph->AddTrack(track); + return track; +} + +AudioDecoderInputTrack::AudioDecoderInputTrack( + nsISerialEventTarget* aDecoderThread, TrackRate aGraphRate, + const AudioInfo& aInfo, float aPlaybackRate, float aVolume, + bool aPreservesPitch) + : ProcessedMediaTrack(aGraphRate, MediaSegment::AUDIO, new AudioSegment()), + mDecoderThread(aDecoderThread), + mResamplerChannelCount(0), + mInitialInputChannels(aInfo.mChannels), + mInputSampleRate(aInfo.mRate), + mDelayedScheduler(mDecoderThread), + mPlaybackRate(aPlaybackRate), + mVolume(aVolume), + mPreservesPitch(aPreservesPitch) {} + +bool AudioDecoderInputTrack::ConvertAudioDataToSegment( + AudioData* aAudio, AudioSegment& aSegment, + const PrincipalHandle& aPrincipalHandle) { + AssertOnDecoderThread(); + MOZ_ASSERT(aAudio); + MOZ_ASSERT(aSegment.IsEmpty()); + if (!aAudio->Frames()) { + LOG("Ignore audio with zero frame"); + return false; + } + + aAudio->EnsureAudioBuffer(); + RefPtr<SharedBuffer> buffer = aAudio->mAudioBuffer; + AudioDataValue* bufferData = static_cast<AudioDataValue*>(buffer->Data()); + AutoTArray<const AudioDataValue*, 2> channels; + for (uint32_t i = 0; i < aAudio->mChannels; ++i) { + channels.AppendElement(bufferData + i * aAudio->Frames()); + } + aSegment.AppendFrames(buffer.forget(), channels, aAudio->Frames(), + aPrincipalHandle); + const TrackRate newInputRate = static_cast<TrackRate>(aAudio->mRate); + if (newInputRate != mInputSampleRate) { + LOG("Input sample rate changed %u -> %u", mInputSampleRate, newInputRate); + mInputSampleRate = newInputRate; + mResampler.own(nullptr); + mResamplerChannelCount = 0; + } + if (mInputSampleRate != GraphImpl()->GraphRate()) { + aSegment.ResampleChunks(mResampler, &mResamplerChannelCount, + mInputSampleRate, GraphImpl()->GraphRate()); + } + return aSegment.GetDuration() > 0; +} + +void AudioDecoderInputTrack::AppendData( + AudioData* aAudio, const PrincipalHandle& aPrincipalHandle) { + AssertOnDecoderThread(); + MOZ_ASSERT(aAudio); + nsTArray<RefPtr<AudioData>> audio; + audio.AppendElement(aAudio); + AppendData(audio, aPrincipalHandle); +} + +void AudioDecoderInputTrack::AppendData( + nsTArray<RefPtr<AudioData>>& aAudioArray, + const PrincipalHandle& aPrincipalHandle) { + AssertOnDecoderThread(); + MOZ_ASSERT(!mShutdownSPSCQueue); + + // Batching all new data together in order to push them as a single unit that + // gives the SPSC queue more spaces. + for (const auto& audio : aAudioArray) { + BatchData(audio, aPrincipalHandle); + } + + // If SPSC queue doesn't have much available capacity now, we would push + // batched later. + if (ShouldBatchData()) { + return; + } + PushBatchedDataIfNeeded(); +} + +bool AudioDecoderInputTrack::ShouldBatchData() const { + AssertOnDecoderThread(); + // If the SPSC queue has less available capacity than the threshold, then all + // input audio data should be batched together, in order not to increase the + // pressure of SPSC queue. + static const int kThresholdNumerator = 3; + static const int kThresholdDenominator = 10; + return mSPSCQueue.AvailableWrite() < + mSPSCQueue.Capacity() * kThresholdNumerator / kThresholdDenominator; +} + +bool AudioDecoderInputTrack::HasBatchedData() const { + AssertOnDecoderThread(); + return !mBatchedData.mSegment.IsEmpty(); +} + +void AudioDecoderInputTrack::BatchData( + AudioData* aAudio, const PrincipalHandle& aPrincipalHandle) { + AssertOnDecoderThread(); + AudioSegment segment; + if (!ConvertAudioDataToSegment(aAudio, segment, aPrincipalHandle)) { + return; + } + mBatchedData.mSegment.AppendFrom(&segment); + if (!mBatchedData.mStartTime.IsValid()) { + mBatchedData.mStartTime = aAudio->mTime; + } + mBatchedData.mEndTime = aAudio->GetEndTime(); + LOG("batched data [%" PRId64 ":%" PRId64 "] sz=%" PRId64, + aAudio->mTime.ToMicroseconds(), aAudio->GetEndTime().ToMicroseconds(), + mBatchedData.mSegment.GetDuration()); + DispatchPushBatchedDataIfNeeded(); +} + +void AudioDecoderInputTrack::DispatchPushBatchedDataIfNeeded() { + AssertOnDecoderThread(); + MOZ_ASSERT(!mShutdownSPSCQueue); + // The graph thread runs iteration around per 2~10ms. Doing this to ensure + // that we can keep consuming data. If the producer stops pushing new data + // due to MDSM stops decoding, which is because MDSM thinks the data stored + // in the audio queue are enough. The way to remove those data from the + // audio queue is driven by us, so we have to keep consuming data. + // Otherwise, we would get stuck because those batched data would never be + // consumed. + static const uint8_t kTimeoutMS = 10; + TimeStamp target = + TimeStamp::Now() + TimeDuration::FromMilliseconds(kTimeoutMS); + mDelayedScheduler.Ensure( + target, + [self = RefPtr<AudioDecoderInputTrack>(this), this]() { + LOG("In the task of DispatchPushBatchedDataIfNeeded"); + mDelayedScheduler.CompleteRequest(); + MOZ_ASSERT(!mShutdownSPSCQueue); + MOZ_ASSERT(HasBatchedData()); + // The capacity in SPSC is still not enough, so we can't push data now. + // Retrigger another task to push batched data. + if (ShouldBatchData()) { + DispatchPushBatchedDataIfNeeded(); + return; + } + PushBatchedDataIfNeeded(); + }, + []() { MOZ_DIAGNOSTIC_ASSERT(false); }); +} + +void AudioDecoderInputTrack::PushBatchedDataIfNeeded() { + AssertOnDecoderThread(); + if (!HasBatchedData()) { + return; + } + LOG("Append batched data [%" PRId64 ":%" PRId64 "], available SPSC sz=%u", + mBatchedData.mStartTime.ToMicroseconds(), + mBatchedData.mEndTime.ToMicroseconds(), mSPSCQueue.AvailableWrite()); + SPSCData data({SPSCData::DecodedData(std::move(mBatchedData))}); + PushDataToSPSCQueue(data); + MOZ_ASSERT(mBatchedData.mSegment.IsEmpty()); + // No batched data remains, we can cancel the pending tasks. + mDelayedScheduler.Reset(); +} + +void AudioDecoderInputTrack::NotifyEndOfStream() { + AssertOnDecoderThread(); + // Force to push all data before EOS. Otherwise, the track would be ended too + // early without sending all data. + PushBatchedDataIfNeeded(); + SPSCData data({SPSCData::EOS()}); + LOG("Set EOS, available SPSC sz=%u", mSPSCQueue.AvailableWrite()); + PushDataToSPSCQueue(data); +} + +void AudioDecoderInputTrack::ClearFutureData() { + AssertOnDecoderThread(); + // Clear the data hasn't been pushed to SPSC queue yet. + mBatchedData.Clear(); + mDelayedScheduler.Reset(); + SPSCData data({SPSCData::ClearFutureData()}); + LOG("Set clear future data, available SPSC sz=%u", + mSPSCQueue.AvailableWrite()); + PushDataToSPSCQueue(data); +} + +void AudioDecoderInputTrack::PushDataToSPSCQueue(SPSCData& data) { + AssertOnDecoderThread(); + const bool rv = mSPSCQueue.Enqueue(data); + MOZ_DIAGNOSTIC_ASSERT(rv, "Failed to push data, SPSC queue is full!"); + Unused << rv; +} + +void AudioDecoderInputTrack::SetVolume(float aVolume) { + AssertOnDecoderThread(); + LOG("Set volume=%f", aVolume); + GetMainThreadSerialEventTarget()->Dispatch( + NS_NewRunnableFunction("AudioDecoderInputTrack::SetVolume", + [self = RefPtr<AudioDecoderInputTrack>(this), + aVolume] { self->SetVolumeImpl(aVolume); })); +} + +void AudioDecoderInputTrack::SetVolumeImpl(float aVolume) { + MOZ_ASSERT(NS_IsMainThread()); + class Message : public ControlMessage { + public: + Message(AudioDecoderInputTrack* aTrack, float aVolume) + : ControlMessage(aTrack), mTrack(aTrack), mVolume(aVolume) {} + void Run() override { + TRACE_COMMENT("AudioDecoderInputTrack::SetVolume ControlMessage", "%f", + mVolume); + LOG_M("Apply volume=%f", mTrack.get(), mVolume); + mTrack->mVolume = mVolume; + } + + protected: + const RefPtr<AudioDecoderInputTrack> mTrack; + const float mVolume; + }; + GraphImpl()->AppendMessage(MakeUnique<Message>(this, aVolume)); +} + +void AudioDecoderInputTrack::SetPlaybackRate(float aPlaybackRate) { + AssertOnDecoderThread(); + LOG("Set playback rate=%f", aPlaybackRate); + GetMainThreadSerialEventTarget()->Dispatch(NS_NewRunnableFunction( + "AudioDecoderInputTrack::SetPlaybackRate", + [self = RefPtr<AudioDecoderInputTrack>(this), aPlaybackRate] { + self->SetPlaybackRateImpl(aPlaybackRate); + })); +} + +void AudioDecoderInputTrack::SetPlaybackRateImpl(float aPlaybackRate) { + MOZ_ASSERT(NS_IsMainThread()); + class Message : public ControlMessage { + public: + Message(AudioDecoderInputTrack* aTrack, float aPlaybackRate) + : ControlMessage(aTrack), + mTrack(aTrack), + mPlaybackRate(aPlaybackRate) {} + void Run() override { + TRACE_COMMENT("AudioDecoderInputTrack::SetPlaybackRate ControlMessage", + "%f", mPlaybackRate); + LOG_M("Apply playback rate=%f", mTrack.get(), mPlaybackRate); + mTrack->mPlaybackRate = mPlaybackRate; + mTrack->SetTempoAndRateForTimeStretcher(); + } + + protected: + const RefPtr<AudioDecoderInputTrack> mTrack; + const float mPlaybackRate; + }; + GraphImpl()->AppendMessage(MakeUnique<Message>(this, aPlaybackRate)); +} + +void AudioDecoderInputTrack::SetPreservesPitch(bool aPreservesPitch) { + AssertOnDecoderThread(); + LOG("Set preserves pitch=%d", aPreservesPitch); + GetMainThreadSerialEventTarget()->Dispatch(NS_NewRunnableFunction( + "AudioDecoderInputTrack::SetPreservesPitch", + [self = RefPtr<AudioDecoderInputTrack>(this), aPreservesPitch] { + self->SetPreservesPitchImpl(aPreservesPitch); + })); +} + +void AudioDecoderInputTrack::SetPreservesPitchImpl(bool aPreservesPitch) { + MOZ_ASSERT(NS_IsMainThread()); + class Message : public ControlMessage { + public: + Message(AudioDecoderInputTrack* aTrack, bool aPreservesPitch) + : ControlMessage(aTrack), + mTrack(aTrack), + mPreservesPitch(aPreservesPitch) {} + void Run() override { + TRACE_COMMENT("AudioDecoderInputTrack::SetPreservesPitch", "%s", + mPreservesPitch ? "true" : "false") + LOG_M("Apply preserves pitch=%d", mTrack.get(), mPreservesPitch); + mTrack->mPreservesPitch = mPreservesPitch; + mTrack->SetTempoAndRateForTimeStretcher(); + } + + protected: + const RefPtr<AudioDecoderInputTrack> mTrack; + const bool mPreservesPitch; + }; + GraphImpl()->AppendMessage(MakeUnique<Message>(this, aPreservesPitch)); +} + +void AudioDecoderInputTrack::Close() { + AssertOnDecoderThread(); + LOG("Close"); + mShutdownSPSCQueue = true; + mBatchedData.Clear(); + mDelayedScheduler.Reset(); +} + +void AudioDecoderInputTrack::DestroyImpl() { + LOG("DestroyImpl"); + AssertOnGraphThreadOrNotRunning(); + mBufferedData.Clear(); + if (mTimeStretcher) { + soundtouch::destroySoundTouchObj(mTimeStretcher); + } + ProcessedMediaTrack::DestroyImpl(); +} + +AudioDecoderInputTrack::~AudioDecoderInputTrack() { + MOZ_ASSERT(mBatchedData.mSegment.IsEmpty()); + MOZ_ASSERT(mShutdownSPSCQueue); + mResampler.own(nullptr); +} + +void AudioDecoderInputTrack::ProcessInput(GraphTime aFrom, GraphTime aTo, + uint32_t aFlags) { + AssertOnGraphThread(); + if (Ended()) { + return; + } + + TrackTime consumedDuration = 0; + auto notify = MakeScopeExit([this, &consumedDuration] { + NotifyInTheEndOfProcessInput(consumedDuration); + }); + + if (mSentAllData && (aFlags & ALLOW_END)) { + LOG("End track"); + mEnded = true; + return; + } + + const TrackTime expectedDuration = aTo - aFrom; + LOG("ProcessInput [%" PRId64 " to %" PRId64 "], duration=%" PRId64, aFrom, + aTo, expectedDuration); + + // Drain all data from SPSC queue first, because we want that the SPSC queue + // always has capacity of accepting data from the producer. In addition, we + // also need to check if there is any control related data that should be + // applied to output segment, eg. `ClearFutureData`. + SPSCData data; + while (mSPSCQueue.Dequeue(&data, 1) > 0) { + HandleSPSCData(data); + } + + consumedDuration += AppendBufferedDataToOutput(expectedDuration); + if (HasSentAllData()) { + LOG("Sent all data, should end track in next iteration"); + mSentAllData = true; + } +} + +void AudioDecoderInputTrack::HandleSPSCData(SPSCData& aData) { + AssertOnGraphThread(); + if (aData.IsDecodedData()) { + MOZ_ASSERT(!mReceivedEOS); + AudioSegment& segment = aData.AsDecodedData()->mSegment; + LOG("popped out data [%" PRId64 ":%" PRId64 "] sz=%" PRId64, + aData.AsDecodedData()->mStartTime.ToMicroseconds(), + aData.AsDecodedData()->mEndTime.ToMicroseconds(), + segment.GetDuration()); + mBufferedData.AppendFrom(&segment); + return; + } + if (aData.IsEOS()) { + MOZ_ASSERT(!Ended()); + LOG("Received EOS"); + mReceivedEOS = true; + return; + } + if (aData.IsClearFutureData()) { + LOG("Clear future data"); + mBufferedData.Clear(); + if (!Ended()) { + LOG("Clear EOS"); + mReceivedEOS = false; + } + return; + } + MOZ_ASSERT_UNREACHABLE("unsupported SPSC data"); +} + +TrackTime AudioDecoderInputTrack::AppendBufferedDataToOutput( + TrackTime aExpectedDuration) { + AssertOnGraphThread(); + + // Remove the necessary part from `mBufferedData` to create a new + // segment in order to apply some operation without affecting all data. + AudioSegment outputSegment; + TrackTime consumedDuration = 0; + if (mPlaybackRate != 1.0) { + consumedDuration = + AppendTimeStretchedDataToSegment(aExpectedDuration, outputSegment); + } else { + consumedDuration = + AppendUnstretchedDataToSegment(aExpectedDuration, outputSegment); + } + + // Apply any necessary change on the segement which would be outputed to the + // graph. + const TrackTime appendedDuration = outputSegment.GetDuration(); + outputSegment.ApplyVolume(mVolume); + ApplyTrackDisabling(&outputSegment); + mSegment->AppendFrom(&outputSegment); + + LOG("Appended %" PRId64 ", consumed %" PRId64 + ", remaining raw buffered %" PRId64 ", remaining time-stretched %u", + appendedDuration, consumedDuration, mBufferedData.GetDuration(), + mTimeStretcher ? mTimeStretcher->numSamples() : 0); + if (auto gap = aExpectedDuration - appendedDuration; gap > 0) { + LOG("Audio underrun, fill silence %" PRId64, gap); + MOZ_ASSERT(mBufferedData.IsEmpty()); + mSegment->AppendNullData(gap); + } + return consumedDuration; +} + +TrackTime AudioDecoderInputTrack::AppendTimeStretchedDataToSegment( + TrackTime aExpectedDuration, AudioSegment& aOutput) { + AssertOnGraphThread(); + EnsureTimeStretcher(); + + MOZ_ASSERT(mPlaybackRate != 1.0f); + MOZ_ASSERT(aExpectedDuration >= 0); + MOZ_ASSERT(mTimeStretcher); + MOZ_ASSERT(aOutput.IsEmpty()); + + // If we don't have enough data that have been time-stretched, fill raw data + // into the time stretcher until the amount of samples that time stretcher + // finishes processed reaches or exceeds the expected duration. + TrackTime consumedDuration = 0; + if (mTimeStretcher->numSamples() < aExpectedDuration) { + consumedDuration = FillDataToTimeStretcher(aExpectedDuration); + } + MOZ_ASSERT(consumedDuration >= 0); + Unused << GetDataFromTimeStretcher(aExpectedDuration, aOutput); + return consumedDuration; +} + +TrackTime AudioDecoderInputTrack::FillDataToTimeStretcher( + TrackTime aExpectedDuration) { + AssertOnGraphThread(); + MOZ_ASSERT(mPlaybackRate != 1.0f); + MOZ_ASSERT(aExpectedDuration >= 0); + MOZ_ASSERT(mTimeStretcher); + + TrackTime consumedDuration = 0; + const uint32_t channels = GetChannelCountForTimeStretcher(); + mBufferedData.IterateOnChunks([&](AudioChunk* aChunk) { + MOZ_ASSERT(aChunk); + if (aChunk->IsNull() && aChunk->GetDuration() == 0) { + // Skip this chunk and wait for next one. + return false; + } + const uint32_t bufferLength = channels * aChunk->GetDuration(); + if (bufferLength > mInterleavedBuffer.Capacity()) { + mInterleavedBuffer.SetCapacity(bufferLength); + } + mInterleavedBuffer.SetLengthAndRetainStorage(bufferLength); + if (aChunk->IsNull()) { + MOZ_ASSERT(aChunk->GetDuration(), "chunk with only silence"); + memset(mInterleavedBuffer.Elements(), 0, mInterleavedBuffer.Length()); + } else { + // Do the up-mix/down-mix first if necessary that forces to change the + // data's channel count to the time stretcher's channel count. Then + // perform a transformation from planar to interleaved. + switch (aChunk->mBufferFormat) { + case AUDIO_FORMAT_S16: + WriteChunk<int16_t>(*aChunk, channels, 1.0f, + mInterleavedBuffer.Elements()); + break; + case AUDIO_FORMAT_FLOAT32: + WriteChunk<float>(*aChunk, channels, 1.0f, + mInterleavedBuffer.Elements()); + break; + default: + MOZ_ASSERT_UNREACHABLE("Not expected format"); + } + } + mTimeStretcher->putSamples(mInterleavedBuffer.Elements(), + aChunk->GetDuration()); + consumedDuration += aChunk->GetDuration(); + return mTimeStretcher->numSamples() >= aExpectedDuration; + }); + mBufferedData.RemoveLeading(consumedDuration); + return consumedDuration; +} + +TrackTime AudioDecoderInputTrack::AppendUnstretchedDataToSegment( + TrackTime aExpectedDuration, AudioSegment& aOutput) { + AssertOnGraphThread(); + MOZ_ASSERT(mPlaybackRate == 1.0f); + MOZ_ASSERT(aExpectedDuration >= 0); + MOZ_ASSERT(aOutput.IsEmpty()); + + const TrackTime drained = + DrainStretchedDataIfNeeded(aExpectedDuration, aOutput); + const TrackTime available = + std::min(aExpectedDuration - drained, mBufferedData.GetDuration()); + aOutput.AppendSlice(mBufferedData, 0, available); + MOZ_ASSERT(aOutput.GetDuration() <= aExpectedDuration); + mBufferedData.RemoveLeading(available); + return available; +} + +TrackTime AudioDecoderInputTrack::DrainStretchedDataIfNeeded( + TrackTime aExpectedDuration, AudioSegment& aOutput) { + AssertOnGraphThread(); + MOZ_ASSERT(mPlaybackRate == 1.0f); + MOZ_ASSERT(aExpectedDuration >= 0); + + if (!mTimeStretcher) { + return 0; + } + if (mTimeStretcher->numSamples() == 0) { + return 0; + } + return GetDataFromTimeStretcher(aExpectedDuration, aOutput); +} + +TrackTime AudioDecoderInputTrack::GetDataFromTimeStretcher( + TrackTime aExpectedDuration, AudioSegment& aOutput) { + AssertOnGraphThread(); + MOZ_ASSERT(mTimeStretcher); + MOZ_ASSERT(aExpectedDuration >= 0); + + if (HasSentAllData() && mTimeStretcher->numUnprocessedSamples()) { + mTimeStretcher->flush(); + LOG("Flush %u frames from the time stretcher", + mTimeStretcher->numSamples()); + } + + const TrackTime available = + std::min((TrackTime)mTimeStretcher->numSamples(), aExpectedDuration); + if (available == 0) { + // Either running out of stretched data, or the raw data we filled into + // the time stretcher were not enough for producing stretched data. + return 0; + } + + // Retrieve interleaved data from the time stretcher. + const uint32_t channelCount = GetChannelCountForTimeStretcher(); + const uint32_t bufferLength = channelCount * available; + if (bufferLength > mInterleavedBuffer.Capacity()) { + mInterleavedBuffer.SetCapacity(bufferLength); + } + mInterleavedBuffer.SetLengthAndRetainStorage(bufferLength); + mTimeStretcher->receiveSamples(mInterleavedBuffer.Elements(), available); + + // Perform a transformation from interleaved to planar. + CheckedInt<size_t> bufferSize(sizeof(AudioDataValue)); + bufferSize *= bufferLength; + RefPtr<SharedBuffer> buffer = SharedBuffer::Create(bufferSize); + AudioDataValue* bufferData = static_cast<AudioDataValue*>(buffer->Data()); + AutoTArray<AudioDataValue*, 2> planarBuffer; + planarBuffer.SetLength(channelCount); + for (size_t idx = 0; idx < channelCount; idx++) { + planarBuffer[idx] = bufferData + idx * available; + } + DeinterleaveAndConvertBuffer(mInterleavedBuffer.Elements(), available, + channelCount, planarBuffer.Elements()); + AutoTArray<const AudioDataValue*, 2> outputChannels; + outputChannels.AppendElements(planarBuffer); + aOutput.AppendFrames(buffer.forget(), outputChannels, + static_cast<int32_t>(available), + mBufferedData.GetOldestPrinciple()); + return available; +} + +void AudioDecoderInputTrack::NotifyInTheEndOfProcessInput( + TrackTime aFillDuration) { + AssertOnGraphThread(); + mWrittenFrames += aFillDuration; + LOG("Notify, fill=%" PRId64 ", total written=%" PRId64 ", ended=%d", + aFillDuration, mWrittenFrames, Ended()); + if (aFillDuration > 0) { + mOnOutput.Notify(mWrittenFrames); + } + if (Ended()) { + mOnEnd.Notify(); + } +} + +bool AudioDecoderInputTrack::HasSentAllData() const { + AssertOnGraphThread(); + return mReceivedEOS && mSPSCQueue.AvailableRead() == 0 && + mBufferedData.IsEmpty(); +} + +uint32_t AudioDecoderInputTrack::NumberOfChannels() const { + AssertOnGraphThread(); + const uint32_t maxChannelCount = GetData<AudioSegment>()->MaxChannelCount(); + return maxChannelCount ? maxChannelCount : mInitialInputChannels; +} + +void AudioDecoderInputTrack::EnsureTimeStretcher() { + AssertOnGraphThread(); + if (!mTimeStretcher) { + mTimeStretcher = soundtouch::createSoundTouchObj(); + mTimeStretcher->setSampleRate(GraphImpl()->GraphRate()); + mTimeStretcher->setChannels(GetChannelCountForTimeStretcher()); + mTimeStretcher->setPitch(1.0); + + // SoundTouch v2.1.2 uses automatic time-stretch settings with the following + // values: + // Tempo 0.5: 90ms sequence, 20ms seekwindow, 8ms overlap + // Tempo 2.0: 40ms sequence, 15ms seekwindow, 8ms overlap + // We are going to use a smaller 10ms sequence size to improve speech + // clarity, giving more resolution at high tempo and less reverb at low + // tempo. Maintain 15ms seekwindow and 8ms overlap for smoothness. + mTimeStretcher->setSetting( + SETTING_SEQUENCE_MS, + StaticPrefs::media_audio_playbackrate_soundtouch_sequence_ms()); + mTimeStretcher->setSetting( + SETTING_SEEKWINDOW_MS, + StaticPrefs::media_audio_playbackrate_soundtouch_seekwindow_ms()); + mTimeStretcher->setSetting( + SETTING_OVERLAP_MS, + StaticPrefs::media_audio_playbackrate_soundtouch_overlap_ms()); + SetTempoAndRateForTimeStretcher(); + LOG("Create TimeStretcher (channel=%d, playbackRate=%f, preservePitch=%d)", + GetChannelCountForTimeStretcher(), mPlaybackRate, mPreservesPitch); + } +} + +void AudioDecoderInputTrack::SetTempoAndRateForTimeStretcher() { + AssertOnGraphThread(); + if (!mTimeStretcher) { + return; + } + if (mPreservesPitch) { + mTimeStretcher->setTempo(mPlaybackRate); + mTimeStretcher->setRate(1.0f); + } else { + mTimeStretcher->setTempo(1.0f); + mTimeStretcher->setRate(mPlaybackRate); + } +} + +uint32_t AudioDecoderInputTrack::GetChannelCountForTimeStretcher() const { + // The time stretcher MUST be initialized with a fixed channel count, but the + // channel count in audio chunks might vary. Therefore, we always use the + // initial input channel count to initialize the time stretcher and perform a + // real-time down-mix/up-mix for audio chunks which have different channel + // count than the initial input channel count. + return mInitialInputChannels; +} + +#undef LOG +} // namespace mozilla diff --git a/dom/media/mediasink/AudioDecoderInputTrack.h b/dom/media/mediasink/AudioDecoderInputTrack.h new file mode 100644 index 0000000000..8c82d7bed6 --- /dev/null +++ b/dom/media/mediasink/AudioDecoderInputTrack.h @@ -0,0 +1,242 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef AudioDecoderInputTrack_h +#define AudioDecoderInputTrack_h + +#include "AudioSegment.h" +#include "MediaEventSource.h" +#include "MediaTimer.h" +#include "MediaTrackGraph.h" +#include "MediaTrackGraphImpl.h" +#include "MediaSegment.h" +#include "mozilla/SPSCQueue.h" +#include "mozilla/StateMirroring.h" +#include "nsISerialEventTarget.h" + +namespace soundtouch { +class MOZ_EXPORT SoundTouch; +} + +namespace mozilla { + +class AudioData; + +/** + * AudioDecoderInputTrack is used as a source for the audio decoder data, which + * supports adjusting playback rate and preserve pitch. + * The owner of this track would be responsible to push audio data via + * `AppendData()` into a SPSC queue, which is a thread-safe queue between the + * decoder thread (producer) and the graph thread (consumer). MediaTrackGraph + * requires data via `ProcessInput()`, then AudioDecoderInputTrack would convert + * (based on sample rate and playback rate) and append the amount of needed + * audio frames onto the output segment that would be used by MediaTrackGraph. + */ +class AudioDecoderInputTrack final : public ProcessedMediaTrack { + public: + static AudioDecoderInputTrack* Create(MediaTrackGraph* aGraph, + nsISerialEventTarget* aDecoderThread, + const AudioInfo& aInfo, + float aPlaybackRate, float aVolume, + bool aPreservesPitch); + + // SPSCData suppports filling different supported type variants, and is used + // to achieve a thread-safe information exchange between the decoder thread + // and the graph thread. + struct SPSCData final { + struct Empty {}; + struct ClearFutureData {}; + struct DecodedData { + DecodedData() + : mStartTime(media::TimeUnit::Invalid()), + mEndTime(media::TimeUnit::Invalid()) {} + DecodedData(DecodedData&& aDecodedData) + : mSegment(std::move(aDecodedData.mSegment)) { + mStartTime = aDecodedData.mStartTime; + mEndTime = aDecodedData.mEndTime; + aDecodedData.Clear(); + } + DecodedData(media::TimeUnit aStartTime, media::TimeUnit aEndTime) + : mStartTime(aStartTime), mEndTime(aEndTime) {} + DecodedData(const DecodedData&) = delete; + DecodedData& operator=(const DecodedData&) = delete; + void Clear() { + mSegment.Clear(); + mStartTime = media::TimeUnit::Invalid(); + mEndTime = media::TimeUnit::Invalid(); + } + AudioSegment mSegment; + media::TimeUnit mStartTime; + media::TimeUnit mEndTime; + }; + struct EOS {}; + + SPSCData() : mData(Empty()){}; + explicit SPSCData(ClearFutureData&& aArg) : mData(std::move(aArg)){}; + explicit SPSCData(DecodedData&& aArg) : mData(std::move(aArg)){}; + explicit SPSCData(EOS&& aArg) : mData(std::move(aArg)){}; + + bool HasData() const { return !mData.is<Empty>(); } + bool IsClearFutureData() const { return mData.is<ClearFutureData>(); } + bool IsDecodedData() const { return mData.is<DecodedData>(); } + bool IsEOS() const { return mData.is<EOS>(); } + + DecodedData* AsDecodedData() { + return IsDecodedData() ? &mData.as<DecodedData>() : nullptr; + } + + Variant<Empty, ClearFutureData, DecodedData, EOS> mData; + }; + + // Decoder thread API + void AppendData(AudioData* aAudio, const PrincipalHandle& aPrincipalHandle); + void AppendData(nsTArray<RefPtr<AudioData>>& aAudioArray, + const PrincipalHandle& aPrincipalHandle); + void NotifyEndOfStream(); + void ClearFutureData(); + void SetVolume(float aVolume); + void SetPlaybackRate(float aPlaybackRate); + void SetPreservesPitch(bool aPreservesPitch); + // After calling this, the track are not expected to receive any new data. + void Close(); + bool HasBatchedData() const; + + MediaEventSource<int64_t>& OnOutput() { return mOnOutput; } + MediaEventSource<void>& OnEnd() { return mOnEnd; } + + // Graph Thread API + void DestroyImpl() override; + void ProcessInput(GraphTime aFrom, GraphTime aTo, uint32_t aFlags) override; + uint32_t NumberOfChannels() const override; + + // The functions below are only used for testing. + TrackTime WrittenFrames() const { + AssertOnGraphThread(); + return mWrittenFrames; + } + float Volume() const { + AssertOnGraphThread(); + return mVolume; + } + float PlaybackRate() const { + AssertOnGraphThread(); + return mPlaybackRate; + } + + protected: + ~AudioDecoderInputTrack(); + + private: + AudioDecoderInputTrack(nsISerialEventTarget* aDecoderThread, + TrackRate aGraphRate, const AudioInfo& aInfo, + float aPlaybackRate, float aVolume, + bool aPreservesPitch); + + // Return false if the converted segment contains zero duration. + bool ConvertAudioDataToSegment(AudioData* aAudio, AudioSegment& aSegment, + const PrincipalHandle& aPrincipalHandle); + + void HandleSPSCData(SPSCData& aData); + + // These methods would return the total frames that we consumed from + // `mBufferedData`. + TrackTime AppendBufferedDataToOutput(TrackTime aExpectedDuration); + TrackTime FillDataToTimeStretcher(TrackTime aExpectedDuration); + TrackTime AppendTimeStretchedDataToSegment(TrackTime aExpectedDuration, + AudioSegment& aOutput); + TrackTime AppendUnstretchedDataToSegment(TrackTime aExpectedDuration, + AudioSegment& aOutput); + + // Return the total frames that we retrieve from the time stretcher. + TrackTime DrainStretchedDataIfNeeded(TrackTime aExpectedDuration, + AudioSegment& aOutput); + TrackTime GetDataFromTimeStretcher(TrackTime aExpectedDuration, + AudioSegment& aOutput); + void NotifyInTheEndOfProcessInput(TrackTime aFillDuration); + + bool HasSentAllData() const; + + bool ShouldBatchData() const; + void BatchData(AudioData* aAudio, const PrincipalHandle& aPrincipalHandle); + void DispatchPushBatchedDataIfNeeded(); + void PushBatchedDataIfNeeded(); + void PushDataToSPSCQueue(SPSCData& data); + + void SetVolumeImpl(float aVolume); + void SetPlaybackRateImpl(float aPlaybackRate); + void SetPreservesPitchImpl(bool aPreservesPitch); + + void EnsureTimeStretcher(); + void SetTempoAndRateForTimeStretcher(); + uint32_t GetChannelCountForTimeStretcher() const; + + inline void AssertOnDecoderThread() const { + MOZ_ASSERT(mDecoderThread->IsOnCurrentThread()); + } + inline void AssertOnGraphThread() const { + MOZ_ASSERT(GraphImpl()->OnGraphThread()); + } + inline void AssertOnGraphThreadOrNotRunning() const { + MOZ_ASSERT(GraphImpl()->OnGraphThreadOrNotRunning()); + } + + const RefPtr<nsISerialEventTarget> mDecoderThread; + + // Notify the amount of audio frames which have been sent to the track. + MediaEventProducer<int64_t> mOnOutput; + // Notify when the track is ended. + MediaEventProducer<void> mOnEnd; + + // These variables are ONLY used in the decoder thread. + nsAutoRef<SpeexResamplerState> mResampler; + uint32_t mResamplerChannelCount; + const uint32_t mInitialInputChannels; + TrackRate mInputSampleRate; + DelayedScheduler mDelayedScheduler; + bool mShutdownSPSCQueue = false; + + // These attributes are ONLY used in the graph thread. + bool mReceivedEOS = false; + TrackTime mWrittenFrames = 0; + float mPlaybackRate; + float mVolume; + bool mPreservesPitch; + + // A thread-safe queue shared by the decoder thread and the graph thread. + // The decoder thread is the producer side, and the graph thread is the + // consumer side. This queue should NEVER get full. In order to achieve that, + // we would batch input samples when SPSC queue doesn't have many available + // capacity. + // In addition, as the media track isn't guaranteed to be destroyed on the + // graph thread (it could be destroyed on the main thread as well) so we might + // not clear all data in SPSC queue when the track's `DestroyImpl()` gets + // called. We leave to destroy the queue later when the track gets destroyed. + SPSCQueue<SPSCData> mSPSCQueue{40}; + + // When the graph requires the less amount of audio frames than the amount of + // frames an audio data has, then the remaining part of frames would be stored + // and used in next iteration. + // This is ONLY used in the graph thread. + AudioSegment mBufferedData; + + // In order to prevent SPSC queue from being full, we want to batch multiple + // data into one to control the density of SPSC queue, the length of batched + // data would be dynamically adjusted by queue's available capacity. + // This is ONLY used in the decoder thread. + SPSCData::DecodedData mBatchedData; + + // True if we've sent all data to the graph, then the track will be marked as + // ended in the next iteration. + bool mSentAllData = false; + + // This is used to adjust the playback rate and pitch. + soundtouch::SoundTouch* mTimeStretcher = nullptr; + + // Buffers that would be used for the time stretching. + AutoTArray<AudioDataValue, 2> mInterleavedBuffer; +}; + +} // namespace mozilla + +#endif // AudioDecoderInputTrack_h diff --git a/dom/media/mediasink/AudioSink.cpp b/dom/media/mediasink/AudioSink.cpp new file mode 100644 index 0000000000..536a2a4f8a --- /dev/null +++ b/dom/media/mediasink/AudioSink.cpp @@ -0,0 +1,664 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "AudioSink.h" +#include "AudioConverter.h" +#include "AudioDeviceInfo.h" +#include "MediaQueue.h" +#include "VideoUtils.h" +#include "mozilla/CheckedInt.h" +#include "mozilla/DebugOnly.h" +#include "mozilla/IntegerPrintfMacros.h" +#include "mozilla/ProfilerMarkerTypes.h" +#include "mozilla/StaticPrefs_media.h" +#include "mozilla/StaticPrefs_dom.h" +#include "nsPrintfCString.h" +#include "Tracing.h" + +namespace mozilla { + +mozilla::LazyLogModule gAudioSinkLog("AudioSink"); +#define SINK_LOG(msg, ...) \ + MOZ_LOG(gAudioSinkLog, LogLevel::Debug, \ + ("AudioSink=%p " msg, this, ##__VA_ARGS__)) +#define SINK_LOG_V(msg, ...) \ + MOZ_LOG(gAudioSinkLog, LogLevel::Verbose, \ + ("AudioSink=%p " msg, this, ##__VA_ARGS__)) + +// The amount of audio frames that is used to fuzz rounding errors. +static const int64_t AUDIO_FUZZ_FRAMES = 1; + +using media::TimeUnit; + +AudioSink::AudioSink(AbstractThread* aThread, + MediaQueue<AudioData>& aAudioQueue, const AudioInfo& aInfo, + bool aShouldResistFingerprinting) + : mPlaying(true), + mWritten(0), + mErrored(false), + mOwnerThread(aThread), + mFramesParsed(0), + mOutputRate( + DecideAudioPlaybackSampleRate(aInfo, aShouldResistFingerprinting)), + mOutputChannels(DecideAudioPlaybackChannels(aInfo)), + mAudibilityMonitor( + mOutputRate, + StaticPrefs::dom_media_silence_duration_for_audibility()), + mIsAudioDataAudible(false), + mProcessedQueueFinished(false), + mAudioQueue(aAudioQueue), + mProcessedQueueThresholdMS( + StaticPrefs::media_audio_audiosink_threshold_ms()) { + // Not much to initialize here if there's no audio. + if (!aInfo.IsValid()) { + mProcessedSPSCQueue = MakeUnique<SPSCQueue<AudioDataValue>>(0); + return; + } + // Twice the limit that trigger a refill. + double capacitySeconds = mProcessedQueueThresholdMS / 1000.f * 2; + // Clamp to correct boundaries, and align on the channel count + int elementCount = static_cast<int>( + std::clamp(capacitySeconds * mOutputChannels * mOutputRate, 0., + std::numeric_limits<int>::max() - 1.)); + elementCount -= elementCount % mOutputChannels; + mProcessedSPSCQueue = MakeUnique<SPSCQueue<AudioDataValue>>(elementCount); + SINK_LOG("Ringbuffer has space for %u elements (%lf seconds)", + mProcessedSPSCQueue->Capacity(), + static_cast<float>(elementCount) / mOutputChannels / mOutputRate); + // Determine if the data is likely to be audible when the stream will be + // ready, if possible. + RefPtr<AudioData> frontPacket = mAudioQueue.PeekFront(); + if (frontPacket) { + mAudibilityMonitor.ProcessInterleaved(frontPacket->Data(), + frontPacket->mChannels); + mIsAudioDataAudible = mAudibilityMonitor.RecentlyAudible(); + SINK_LOG("New AudioSink -- audio is likely to be %s", + mIsAudioDataAudible ? "audible" : "inaudible"); + } else { + // If no packets are available, consider the audio audible. + mIsAudioDataAudible = true; + SINK_LOG( + "New AudioSink -- no audio packet avaialble, considering the stream " + "audible"); + } +} + +AudioSink::~AudioSink() { + // Generally instances of AudioSink should be properly Shutdown manually. + // The only way deleting an AudioSink without shutdown an happen is if the + // dispatch back to the MDSM thread after initializing it asynchronously + // fails. When that's the case, the stream has been initialized but not + // started. Manually shutdown the AudioStream in this case. + if (mAudioStream) { + mAudioStream->Shutdown(); + } +} + +nsresult AudioSink::InitializeAudioStream( + const PlaybackParams& aParams, const RefPtr<AudioDeviceInfo>& aAudioDevice, + AudioSink::InitializationType aInitializationType) { + if (aInitializationType == AudioSink::InitializationType::UNMUTING) { + // Consider the stream to be audible immediately, before initialization + // finishes when unmuting, in case initialization takes some time and it + // looked audible when the AudioSink was created. + mAudibleEvent.Notify(mIsAudioDataAudible); + SINK_LOG("InitializeAudioStream (Unmuting) notifying that audio is %s", + mIsAudioDataAudible ? "audible" : "inaudible"); + } else { + // If not unmuting, the audibility event will be dispatched as usual, + // inspecting the audio content as it's being played and signaling the + // audibility event when a different in state is detected. + SINK_LOG("InitializeAudioStream (initial)"); + mIsAudioDataAudible = false; + } + + // When AudioQueue is empty, there is no way to know the channel layout of + // the coming audio data, so we use the predefined channel map instead. + AudioConfig::ChannelLayout::ChannelMap channelMap = + AudioConfig::ChannelLayout(mOutputChannels).Map(); + // The layout map used here is already processed by mConverter with + // mOutputChannels into SMPTE format, so there is no need to worry if + // StaticPrefs::accessibility_monoaudio_enable() or + // StaticPrefs::media_forcestereo_enabled() is applied. + MOZ_ASSERT(!mAudioStream); + mAudioStream = + new AudioStream(*this, mOutputRate, mOutputChannels, channelMap); + nsresult rv = mAudioStream->Init(aAudioDevice); + if (NS_FAILED(rv)) { + mAudioStream->Shutdown(); + mAudioStream = nullptr; + return rv; + } + + // Set playback params before calling Start() so they can take effect + // as soon as the 1st DataCallback of the AudioStream fires. + mAudioStream->SetVolume(aParams.mVolume); + mAudioStream->SetPlaybackRate(aParams.mPlaybackRate); + mAudioStream->SetPreservesPitch(aParams.mPreservesPitch); + + return NS_OK; +} + +nsresult AudioSink::Start( + const media::TimeUnit& aStartTime, + MozPromiseHolder<MediaSink::EndedPromise>& aEndedPromise) { + MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn()); + + mAudioQueueListener = mAudioQueue.PushEvent().Connect( + mOwnerThread, this, &AudioSink::OnAudioPushed); + mAudioQueueFinishListener = mAudioQueue.FinishEvent().Connect( + mOwnerThread, this, &AudioSink::NotifyAudioNeeded); + mProcessedQueueListener = + mAudioPopped.Connect(mOwnerThread, this, &AudioSink::OnAudioPopped); + + mStartTime = aStartTime; + + // To ensure at least one audio packet will be popped from AudioQueue and + // ready to be played. + NotifyAudioNeeded(); + + return mAudioStream->Start(aEndedPromise); +} + +TimeUnit AudioSink::GetPosition() { + int64_t tmp; + if (mAudioStream && (tmp = mAudioStream->GetPosition()) >= 0) { + TimeUnit pos = TimeUnit::FromMicroseconds(tmp); + NS_ASSERTION(pos >= mLastGoodPosition, + "AudioStream position shouldn't go backward"); + TimeUnit tmp = mStartTime + pos; + if (!tmp.IsValid()) { + mErrored = true; + return mStartTime + mLastGoodPosition; + } + // Update the last good position when we got a good one. + if (pos >= mLastGoodPosition) { + mLastGoodPosition = pos; + } + } + + return mStartTime + mLastGoodPosition; +} + +bool AudioSink::HasUnplayedFrames() { + // Experimentation suggests that GetPositionInFrames() is zero-indexed, + // so we need to add 1 here before comparing it to mWritten. + return mProcessedSPSCQueue->AvailableRead() || + (mAudioStream && mAudioStream->GetPositionInFrames() + 1 < mWritten); +} + +TimeUnit AudioSink::UnplayedDuration() const { + return TimeUnit::FromMicroseconds(AudioQueuedInRingBufferMS()); +} + +void AudioSink::ReenqueueUnplayedAudioDataIfNeeded() { + // This is OK: the AudioStream has been shut down. Shutdown guarantees that + // the audio callback thread won't call back again. + mProcessedSPSCQueue->ResetThreadIds(); + + // construct an AudioData + int sampleInRingbuffer = mProcessedSPSCQueue->AvailableRead(); + + if (!sampleInRingbuffer) { + return; + } + + uint32_t channelCount; + uint32_t rate; + if (mConverter) { + channelCount = mConverter->OutputConfig().Channels(); + rate = mConverter->OutputConfig().Rate(); + } else { + channelCount = mOutputChannels; + rate = mOutputRate; + } + + uint32_t framesRemaining = sampleInRingbuffer / channelCount; + + nsTArray<AlignedAudioBuffer> packetsToReenqueue; + RefPtr<AudioData> frontPacket = mAudioQueue.PeekFront(); + uint32_t offset; + TimeUnit time; + uint32_t typicalPacketFrameCount; + // Extrapolate mOffset, mTime from the front of the queue + // We can't really find a good value for `mOffset`, so we take what we have + // at the front of the queue. + // For `mTime`, assume there hasn't been a discontinuity recently. + if (!frontPacket) { + // We do our best here, but it's not going to be perfect. + typicalPacketFrameCount = 1024; // typical for e.g. AAC + offset = 0; + time = GetPosition(); + } else { + typicalPacketFrameCount = frontPacket->Frames(); + offset = frontPacket->mOffset; + time = frontPacket->mTime; + } + + // Extract all audio data from the ring buffer, we can only read the data from + // the most recent, so we reenqueue the data, packetized, in a temporary + // array. + while (framesRemaining) { + uint32_t packetFrameCount = + std::min(framesRemaining, typicalPacketFrameCount); + framesRemaining -= packetFrameCount; + + int packetSampleCount = packetFrameCount * channelCount; + AlignedAudioBuffer packetData(packetSampleCount); + DebugOnly<int> samplesRead = + mProcessedSPSCQueue->Dequeue(packetData.Data(), packetSampleCount); + MOZ_ASSERT(samplesRead == packetSampleCount); + + packetsToReenqueue.AppendElement(packetData); + } + // Reenqueue in the audio queue in correct order in the audio queue, starting + // with the end of the temporary array. + while (!packetsToReenqueue.IsEmpty()) { + auto packetData = packetsToReenqueue.PopLastElement(); + uint32_t packetFrameCount = packetData.Length() / channelCount; + auto duration = TimeUnit(packetFrameCount, rate); + if (!duration.IsValid()) { + NS_WARNING("Int overflow in AudioSink"); + mErrored = true; + return; + } + time -= duration; + RefPtr<AudioData> packet = + new AudioData(offset, time, std::move(packetData), channelCount, rate); + MOZ_DIAGNOSTIC_ASSERT(duration == packet->mDuration, "must be equal"); + + SINK_LOG( + "Muting: Pushing back %u frames (%lfms) from the ring buffer back into " + "the audio queue at pts %lf", + packetFrameCount, 1000 * static_cast<float>(packetFrameCount) / rate, + time.ToSeconds()); + // The audio data's timestamp would be adjusted already if we're in looping, + // so we don't want to adjust them again. + mAudioQueue.PushFront(packet, + MediaQueue<AudioData>::TimestampAdjustment::Disable); + } +} + +Maybe<MozPromiseHolder<MediaSink::EndedPromise>> AudioSink::Shutdown( + ShutdownCause aShutdownCause) { + MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn()); + + mAudioQueueListener.DisconnectIfExists(); + mAudioQueueFinishListener.DisconnectIfExists(); + mProcessedQueueListener.DisconnectIfExists(); + + Maybe<MozPromiseHolder<MediaSink::EndedPromise>> rv; + + if (mAudioStream) { + rv = mAudioStream->Shutdown(aShutdownCause); + mAudioStream = nullptr; + if (aShutdownCause == ShutdownCause::Muting) { + ReenqueueUnplayedAudioDataIfNeeded(); + } + } + mProcessedQueueFinished = true; + + return rv; +} + +void AudioSink::SetVolume(double aVolume) { + if (mAudioStream) { + mAudioStream->SetVolume(aVolume); + } +} + +void AudioSink::SetStreamName(const nsAString& aStreamName) { + if (mAudioStream) { + mAudioStream->SetStreamName(aStreamName); + } +} + +void AudioSink::SetPlaybackRate(double aPlaybackRate) { + MOZ_ASSERT(aPlaybackRate != 0, + "Don't set the playbackRate to 0 on AudioStream"); + if (mAudioStream) { + mAudioStream->SetPlaybackRate(aPlaybackRate); + } +} + +void AudioSink::SetPreservesPitch(bool aPreservesPitch) { + if (mAudioStream) { + mAudioStream->SetPreservesPitch(aPreservesPitch); + } +} + +void AudioSink::SetPlaying(bool aPlaying) { + if (!mAudioStream || mAudioStream->IsPlaybackCompleted() || + mPlaying == aPlaying) { + return; + } + // pause/resume AudioStream as necessary. + if (!aPlaying) { + mAudioStream->Pause(); + } else if (aPlaying) { + mAudioStream->Resume(); + } + mPlaying = aPlaying; +} + +TimeUnit AudioSink::GetEndTime() const { + uint64_t written = mWritten; + TimeUnit played = media::TimeUnit(written, mOutputRate) + mStartTime; + if (!played.IsValid()) { + NS_WARNING("Int overflow calculating audio end time"); + return TimeUnit::Zero(); + } + // As we may be resampling, rounding errors may occur. Ensure we never get + // past the original end time. + return std::min(mLastEndTime, played); +} + +uint32_t AudioSink::PopFrames(AudioDataValue* aBuffer, uint32_t aFrames, + bool aAudioThreadChanged) { + // This is safe, because we have the guarantee, by the OS, that audio + // callbacks are never called concurrently. Audio thread changes can only + // happen when not using cubeb remoting, and often when changing audio device + // at the system level. + if (aAudioThreadChanged) { + mProcessedSPSCQueue->ResetThreadIds(); + } + + TRACE_COMMENT("AudioSink::PopFrames", "%u frames (ringbuffer: %u/%u)", + aFrames, SampleToFrame(mProcessedSPSCQueue->AvailableRead()), + SampleToFrame(mProcessedSPSCQueue->Capacity())); + + const int samplesToPop = static_cast<int>(aFrames * mOutputChannels); + const int samplesRead = mProcessedSPSCQueue->Dequeue(aBuffer, samplesToPop); + auto sampleOut = samplesRead; + MOZ_ASSERT(samplesRead % mOutputChannels == 0); + mWritten += SampleToFrame(samplesRead); + if (samplesRead != samplesToPop) { + if (Ended()) { + SINK_LOG("Last PopFrames -- Source ended."); + } else if (mTreatUnderrunAsSilence) { + SINK_LOG("Treat underrun frames (%u) as silence frames", + SampleToFrame(samplesToPop - samplesRead)); + sampleOut = samplesToPop; + } else { + NS_WARNING("Underrun when popping samples from audiosink ring buffer."); + TRACE_COMMENT("AudioSink::PopFrames", "Underrun %u frames missing", + SampleToFrame(samplesToPop - samplesRead)); + } + // silence the rest + PodZero(aBuffer + samplesRead, samplesToPop - samplesRead); + } + + mAudioPopped.Notify(); + + SINK_LOG_V("Popping %u frames. Remaining in ringbuffer %u / %u\n", aFrames, + SampleToFrame(mProcessedSPSCQueue->AvailableRead()), + SampleToFrame(mProcessedSPSCQueue->Capacity())); + CheckIsAudible(Span(aBuffer, sampleOut), mOutputChannels); + + return SampleToFrame(sampleOut); +} + +bool AudioSink::Ended() const { + // Return true when error encountered so AudioStream can start draining. + // Both atomic so we don't need locking + return mProcessedQueueFinished || mErrored; +} + +void AudioSink::CheckIsAudible(const Span<AudioDataValue>& aInterleaved, + size_t aChannel) { + mAudibilityMonitor.ProcessInterleaved(aInterleaved, aChannel); + bool isAudible = mAudibilityMonitor.RecentlyAudible(); + + if (isAudible != mIsAudioDataAudible) { + mIsAudioDataAudible = isAudible; + SINK_LOG("Notifying that audio is now %s", + mIsAudioDataAudible ? "audible" : "inaudible"); + mAudibleEvent.Notify(mIsAudioDataAudible); + } +} + +void AudioSink::OnAudioPopped() { + SINK_LOG_V("AudioStream has used an audio packet."); + NotifyAudioNeeded(); +} + +void AudioSink::OnAudioPushed(const RefPtr<AudioData>& aSample) { + SINK_LOG_V("One new audio packet available."); + NotifyAudioNeeded(); +} + +uint32_t AudioSink::AudioQueuedInRingBufferMS() const { + return static_cast<uint32_t>( + 1000 * SampleToFrame(mProcessedSPSCQueue->AvailableRead()) / mOutputRate); +} + +uint32_t AudioSink::SampleToFrame(uint32_t aSamples) const { + return aSamples / mOutputChannels; +} + +void AudioSink::NotifyAudioNeeded() { + MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn(), + "Not called from the owner's thread"); + + while (mAudioQueue.GetSize() && + AudioQueuedInRingBufferMS() < + static_cast<uint32_t>(mProcessedQueueThresholdMS)) { + // Check if there's room in our ring buffer. + if (mAudioQueue.PeekFront()->Frames() > + SampleToFrame(mProcessedSPSCQueue->AvailableWrite())) { + SINK_LOG_V("Can't push %u frames. In ringbuffer %u / %u\n", + mAudioQueue.PeekFront()->Frames(), + SampleToFrame(mProcessedSPSCQueue->AvailableRead()), + SampleToFrame(mProcessedSPSCQueue->Capacity())); + return; + } + SINK_LOG_V("Pushing %u frames. In ringbuffer %u / %u\n", + mAudioQueue.PeekFront()->Frames(), + SampleToFrame(mProcessedSPSCQueue->AvailableRead()), + SampleToFrame(mProcessedSPSCQueue->Capacity())); + RefPtr<AudioData> data = mAudioQueue.PopFront(); + + // Ignore the element with 0 frames and try next. + if (!data->Frames()) { + continue; + } + + if (!mConverter || + (data->mRate != mConverter->InputConfig().Rate() || + data->mChannels != mConverter->InputConfig().Channels())) { + SINK_LOG_V("Audio format changed from %u@%uHz to %u@%uHz", + mConverter ? mConverter->InputConfig().Channels() : 0, + mConverter ? mConverter->InputConfig().Rate() : 0, + data->mChannels, data->mRate); + + DrainConverter(SampleToFrame(mProcessedSPSCQueue->AvailableWrite())); + + // mFramesParsed indicates the current playtime in frames at the current + // input sampling rate. Recalculate it per the new sampling rate. + if (mFramesParsed) { + // We minimize overflow. + uint32_t oldRate = mConverter->InputConfig().Rate(); + uint32_t newRate = data->mRate; + CheckedInt64 result = SaferMultDiv(mFramesParsed, newRate, oldRate); + if (!result.isValid()) { + NS_WARNING("Int overflow in AudioSink"); + mErrored = true; + return; + } + mFramesParsed = result.value(); + } + + const AudioConfig::ChannelLayout inputLayout = + data->mChannelMap + ? AudioConfig::ChannelLayout::SMPTEDefault(data->mChannelMap) + : AudioConfig::ChannelLayout(data->mChannels); + const AudioConfig::ChannelLayout outputLayout = + mOutputChannels == data->mChannels + ? inputLayout + : AudioConfig::ChannelLayout(mOutputChannels); + AudioConfig inConfig = + AudioConfig(inputLayout, data->mChannels, data->mRate); + AudioConfig outConfig = + AudioConfig(outputLayout, mOutputChannels, mOutputRate); + if (!AudioConverter::CanConvert(inConfig, outConfig)) { + mErrored = true; + return; + } + mConverter = MakeUnique<AudioConverter>(inConfig, outConfig); + } + + // See if there's a gap in the audio. If there is, push silence into the + // audio hardware, so we can play across the gap. + // Calculate the timestamp of the next chunk of audio in numbers of + // samples. + CheckedInt64 sampleTime = + TimeUnitToFrames(data->mTime - mStartTime, data->mRate); + // Calculate the number of frames that have been pushed onto the audio + // hardware. + CheckedInt64 missingFrames = sampleTime - mFramesParsed; + + if (!missingFrames.isValid() || !sampleTime.isValid()) { + NS_WARNING("Int overflow in AudioSink"); + mErrored = true; + return; + } + + if (missingFrames.value() > AUDIO_FUZZ_FRAMES) { + // The next audio packet begins some time after the end of the last packet + // we pushed to the audio hardware. We must push silence into the audio + // hardware so that the next audio packet begins playback at the correct + // time. But don't push more than the ring buffer can receive. + missingFrames = std::min<int64_t>( + std::min<int64_t>(INT32_MAX, missingFrames.value()), + SampleToFrame(mProcessedSPSCQueue->AvailableWrite())); + mFramesParsed += missingFrames.value(); + + SINK_LOG("Gap in the audio input, push %" PRId64 " frames of silence", + missingFrames.value()); + + RefPtr<AudioData> silenceData; + AlignedAudioBuffer silenceBuffer(missingFrames.value() * data->mChannels); + if (!silenceBuffer) { + NS_WARNING("OOM in AudioSink"); + mErrored = true; + return; + } + if (mConverter->InputConfig() != mConverter->OutputConfig()) { + AlignedAudioBuffer convertedData = + mConverter->Process(AudioSampleBuffer(std::move(silenceBuffer))) + .Forget(); + silenceData = CreateAudioFromBuffer(std::move(convertedData), data); + } else { + silenceData = CreateAudioFromBuffer(std::move(silenceBuffer), data); + } + TRACE("Pushing silence"); + PushProcessedAudio(silenceData); + } + + mLastEndTime = data->GetEndTime(); + mFramesParsed += data->Frames(); + + if (mConverter->InputConfig() != mConverter->OutputConfig()) { + AlignedAudioBuffer buffer(data->MoveableData()); + AlignedAudioBuffer convertedData = + mConverter->Process(AudioSampleBuffer(std::move(buffer))).Forget(); + data = CreateAudioFromBuffer(std::move(convertedData), data); + } + if (PushProcessedAudio(data)) { + mLastProcessedPacket = Some(data); + } + } + + if (mAudioQueue.IsFinished() && mAudioQueue.GetSize() == 0) { + // We have reached the end of the data, drain the resampler. + DrainConverter(SampleToFrame(mProcessedSPSCQueue->AvailableWrite())); + mProcessedQueueFinished = true; + } +} + +uint32_t AudioSink::PushProcessedAudio(AudioData* aData) { + if (!aData || !aData->Frames()) { + return 0; + } + int framesToEnqueue = static_cast<int>(aData->Frames() * aData->mChannels); + TRACE_COMMENT("AudioSink::PushProcessedAudio", "%u frames (%u/%u)", + framesToEnqueue, + SampleToFrame(mProcessedSPSCQueue->AvailableWrite()), + SampleToFrame(mProcessedSPSCQueue->Capacity())); + DebugOnly<int> rv = + mProcessedSPSCQueue->Enqueue(aData->Data().Elements(), framesToEnqueue); + NS_WARNING_ASSERTION( + rv == static_cast<int>(aData->Frames() * aData->mChannels), + "AudioSink ring buffer over-run, can't push new data"); + return aData->Frames(); +} + +already_AddRefed<AudioData> AudioSink::CreateAudioFromBuffer( + AlignedAudioBuffer&& aBuffer, AudioData* aReference) { + uint32_t frames = SampleToFrame(aBuffer.Length()); + if (!frames) { + return nullptr; + } + auto duration = media::TimeUnit(frames, mOutputRate); + if (!duration.IsValid()) { + NS_WARNING("Int overflow in AudioSink"); + mErrored = true; + return nullptr; + } + RefPtr<AudioData> data = + new AudioData(aReference->mOffset, aReference->mTime, std::move(aBuffer), + mOutputChannels, mOutputRate); + MOZ_DIAGNOSTIC_ASSERT(duration == data->mDuration, "must be equal"); + return data.forget(); +} + +uint32_t AudioSink::DrainConverter(uint32_t aMaxFrames) { + MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn()); + + if (!mConverter || !mLastProcessedPacket || !aMaxFrames) { + // nothing to drain. + return 0; + } + + RefPtr<AudioData> lastPacket = mLastProcessedPacket.ref(); + mLastProcessedPacket.reset(); + + // To drain we simply provide an empty packet to the audio converter. + AlignedAudioBuffer convertedData = + mConverter->Process(AudioSampleBuffer(AlignedAudioBuffer())).Forget(); + + uint32_t frames = SampleToFrame(convertedData.Length()); + if (!convertedData.SetLength(std::min(frames, aMaxFrames) * + mOutputChannels)) { + // This can never happen as we were reducing the length of convertData. + mErrored = true; + return 0; + } + + RefPtr<AudioData> data = + CreateAudioFromBuffer(std::move(convertedData), lastPacket); + return PushProcessedAudio(data); +} + +void AudioSink::GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) { + MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn()); + aInfo.mAudioSinkWrapper.mAudioSink.mStartTime = mStartTime.ToMicroseconds(); + aInfo.mAudioSinkWrapper.mAudioSink.mLastGoodPosition = + mLastGoodPosition.ToMicroseconds(); + aInfo.mAudioSinkWrapper.mAudioSink.mIsPlaying = mPlaying; + aInfo.mAudioSinkWrapper.mAudioSink.mOutputRate = mOutputRate; + aInfo.mAudioSinkWrapper.mAudioSink.mWritten = mWritten; + aInfo.mAudioSinkWrapper.mAudioSink.mHasErrored = bool(mErrored); + aInfo.mAudioSinkWrapper.mAudioSink.mPlaybackComplete = + mAudioStream ? mAudioStream->IsPlaybackCompleted() : false; +} + +void AudioSink::EnableTreatAudioUnderrunAsSilence(bool aEnabled) { + SINK_LOG("set mTreatUnderrunAsSilence=%d", aEnabled); + mTreatUnderrunAsSilence = aEnabled; +} + +} // namespace mozilla diff --git a/dom/media/mediasink/AudioSink.h b/dom/media/mediasink/AudioSink.h new file mode 100644 index 0000000000..856227ee4c --- /dev/null +++ b/dom/media/mediasink/AudioSink.h @@ -0,0 +1,188 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef AudioSink_h__ +#define AudioSink_h__ + +#include "AudioStream.h" +#include "AudibilityMonitor.h" +#include "MediaEventSource.h" +#include "MediaInfo.h" +#include "MediaQueue.h" +#include "MediaSink.h" +#include "mozilla/Atomics.h" +#include "mozilla/Maybe.h" +#include "mozilla/Monitor.h" +#include "mozilla/MozPromise.h" +#include "mozilla/RefPtr.h" +#include "mozilla/Result.h" +#include "nsISupportsImpl.h" + +namespace mozilla { + +class AudioConverter; + +class AudioSink : private AudioStream::DataSource { + public: + enum class InitializationType { + // This AudioSink is being initialized for the first time + INITIAL, + UNMUTING + }; + struct PlaybackParams { + PlaybackParams(double aVolume, double aPlaybackRate, bool aPreservesPitch) + : mVolume(aVolume), + mPlaybackRate(aPlaybackRate), + mPreservesPitch(aPreservesPitch) {} + double mVolume; + double mPlaybackRate; + bool mPreservesPitch; + }; + + AudioSink(AbstractThread* aThread, MediaQueue<AudioData>& aAudioQueue, + const AudioInfo& aInfo, bool aShouldResistFingerprinting); + + ~AudioSink(); + + // Allocate and initialize mAudioStream. Returns NS_OK on success. + nsresult InitializeAudioStream(const PlaybackParams& aParams, + const RefPtr<AudioDeviceInfo>& aAudioDevice, + InitializationType aInitializationType); + + // Start audio playback. + nsresult Start(const media::TimeUnit& aStartTime, + MozPromiseHolder<MediaSink::EndedPromise>& aEndedPromise); + + /* + * All public functions are not thread-safe. + * Called on the task queue of MDSM only. + */ + media::TimeUnit GetPosition(); + media::TimeUnit GetEndTime() const; + + // Check whether we've pushed more frames to the audio stream than it + // has played. + bool HasUnplayedFrames(); + + // The duration of the buffered frames. + media::TimeUnit UnplayedDuration() const; + + // Shut down the AudioSink's resources. If an AudioStream existed, return the + // ended promise it had, if it's shutting down-mid stream becaues it's muting. + Maybe<MozPromiseHolder<MediaSink::EndedPromise>> Shutdown( + ShutdownCause aShutdownCause = ShutdownCause::Regular); + + void SetVolume(double aVolume); + void SetStreamName(const nsAString& aStreamName); + void SetPlaybackRate(double aPlaybackRate); + void SetPreservesPitch(bool aPreservesPitch); + void SetPlaying(bool aPlaying); + + MediaEventSource<bool>& AudibleEvent() { return mAudibleEvent; } + + void GetDebugInfo(dom::MediaSinkDebugInfo& aInfo); + + // This returns true if the audio callbacks are being called, and so the + // audio stream-based clock is moving forward. + bool AudioStreamCallbackStarted() { + return mAudioStream && mAudioStream->CallbackStarted(); + } + + void UpdateStartTime(const media::TimeUnit& aStartTime) { + mStartTime = aStartTime; + } + + void EnableTreatAudioUnderrunAsSilence(bool aEnabled); + + private: + // Interface of AudioStream::DataSource. + // Called on the callback thread of cubeb. Returns the number of frames that + // were available. + uint32_t PopFrames(AudioDataValue* aBuffer, uint32_t aFrames, + bool aAudioThreadChanged) override; + bool Ended() const override; + + // When shutting down, it's important to not lose any audio data, it might be + // still of use, in two scenarios: + // - If the audio is now captured to a MediaStream, whatever is enqueued in + // the ring buffer needs to be played out now ; + // - If the AudioSink is shutting down because the audio is muted, it's + // important to keep the audio around in case it's quickly unmuted, + // and in general to keep A/V sync correct when unmuted. + void ReenqueueUnplayedAudioDataIfNeeded(); + + void CheckIsAudible(const Span<AudioDataValue>& aInterleaved, + size_t aChannel); + + // The audio stream resource. Used on the task queue of MDSM only. + RefPtr<AudioStream> mAudioStream; + + // The presentation time of the first audio frame that was played. + // We can add this to the audio stream position to determine + // the current audio time. + media::TimeUnit mStartTime; + + // Keep the last good position returned from the audio stream. Used to ensure + // position returned by GetPosition() is mono-increasing in spite of audio + // stream error. Used on the task queue of MDSM only. + media::TimeUnit mLastGoodPosition; + + // Used on the task queue of MDSM only. + bool mPlaying; + + // PCM frames written to the stream so far. Written on the callback thread, + // read on the MDSM thread. + Atomic<int64_t> mWritten; + + // True if there is any error in processing audio data like overflow. + Atomic<bool> mErrored; + + const RefPtr<AbstractThread> mOwnerThread; + + // Audio Processing objects and methods + void OnAudioPopped(); + void OnAudioPushed(const RefPtr<AudioData>& aSample); + void NotifyAudioNeeded(); + // Drain the converter and add the output to the processed audio queue. + // A maximum of aMaxFrames will be added. + uint32_t DrainConverter(uint32_t aMaxFrames = UINT32_MAX); + already_AddRefed<AudioData> CreateAudioFromBuffer( + AlignedAudioBuffer&& aBuffer, AudioData* aReference); + // Add data to the processsed queue return the number of frames added. + uint32_t PushProcessedAudio(AudioData* aData); + uint32_t AudioQueuedInRingBufferMS() const; + uint32_t SampleToFrame(uint32_t aSamples) const; + UniquePtr<AudioConverter> mConverter; + UniquePtr<SPSCQueue<AudioDataValue>> mProcessedSPSCQueue; + MediaEventListener mAudioQueueListener; + MediaEventListener mAudioQueueFinishListener; + MediaEventListener mProcessedQueueListener; + // Number of frames processed from mAudioQueue. Used to determine gaps in + // the input stream. It indicates the time in frames since playback started + // at the current input framerate. + int64_t mFramesParsed; + Maybe<RefPtr<AudioData>> mLastProcessedPacket; + media::TimeUnit mLastEndTime; + // Never modifed after construction. + uint32_t mOutputRate; + uint32_t mOutputChannels; + AudibilityMonitor mAudibilityMonitor; + bool mIsAudioDataAudible; + MediaEventProducer<bool> mAudibleEvent; + // Only signed on the real-time audio thread. + MediaEventProducer<void> mAudioPopped; + + Atomic<bool> mProcessedQueueFinished; + MediaQueue<AudioData>& mAudioQueue; + const float mProcessedQueueThresholdMS; + + // True if we'd like to treat underrun as silent frames. But that can only be + // applied in the special situation for seamless looping. + bool mTreatUnderrunAsSilence = false; +}; + +} // namespace mozilla + +#endif // AudioSink_h__ diff --git a/dom/media/mediasink/AudioSinkWrapper.cpp b/dom/media/mediasink/AudioSinkWrapper.cpp new file mode 100644 index 0000000000..5a006479e1 --- /dev/null +++ b/dom/media/mediasink/AudioSinkWrapper.cpp @@ -0,0 +1,496 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "AudioSinkWrapper.h" +#include "AudioDeviceInfo.h" +#include "AudioSink.h" +#include "VideoUtils.h" +#include "mozilla/Logging.h" +#include "mozilla/Result.h" +#include "nsPrintfCString.h" + +mozilla::LazyLogModule gAudioSinkWrapperLog("AudioSinkWrapper"); +#define LOG(...) \ + MOZ_LOG(gAudioSinkWrapperLog, mozilla::LogLevel::Debug, (__VA_ARGS__)); +#define LOGV(...) \ + MOZ_LOG(gAudioSinkWrapperLog, mozilla::LogLevel::Verbose, (__VA_ARGS__)); + +namespace mozilla { + +using media::TimeUnit; + +AudioSinkWrapper::~AudioSinkWrapper() = default; + +void AudioSinkWrapper::Shutdown() { + AssertOwnerThread(); + MOZ_ASSERT(!mIsStarted, "Must be called after playback stopped."); + mCreator = nullptr; + mEndedPromiseHolder.ResolveIfExists(true, __func__); +} + +RefPtr<MediaSink::EndedPromise> AudioSinkWrapper::OnEnded(TrackType aType) { + AssertOwnerThread(); + MOZ_ASSERT(mIsStarted, "Must be called after playback starts."); + if (aType == TrackInfo::kAudioTrack) { + return mEndedPromise; + } + return nullptr; +} + +TimeUnit AudioSinkWrapper::GetEndTime(TrackType aType) const { + AssertOwnerThread(); + MOZ_ASSERT(mIsStarted, "Must be called after playback starts."); + if (aType == TrackInfo::kAudioTrack && mAudioSink && + mAudioSink->AudioStreamCallbackStarted()) { + return mAudioSink->GetEndTime(); + } + + if (aType == TrackInfo::kAudioTrack && !mAudioSink && IsMuted()) { + if (IsPlaying()) { + return GetSystemClockPosition(TimeStamp::Now()); + } + + return mPlayDuration; + } + return TimeUnit::Zero(); +} + +TimeUnit AudioSinkWrapper::GetSystemClockPosition(TimeStamp aNow) const { + AssertOwnerThread(); + MOZ_ASSERT(!mPlayStartTime.IsNull()); + // Time elapsed since we started playing. + double delta = (aNow - mPlayStartTime).ToSeconds(); + // Take playback rate into account. + return mPlayDuration + TimeUnit::FromSeconds(delta * mParams.mPlaybackRate); +} + +bool AudioSinkWrapper::IsMuted() const { + AssertOwnerThread(); + return mParams.mVolume == 0.0; +} + +TimeUnit AudioSinkWrapper::GetPosition(TimeStamp* aTimeStamp) { + AssertOwnerThread(); + MOZ_ASSERT(mIsStarted, "Must be called after playback starts."); + + TimeUnit pos; + TimeStamp t = TimeStamp::Now(); + + if (!mAudioEnded && !IsMuted() && mAudioSink) { + if (mLastClockSource == ClockSource::SystemClock) { + TimeUnit switchTime = GetSystemClockPosition(t); + // Update the _actual_ start time of the audio stream now that it has + // started, preventing any clock discontinuity. + mAudioSink->UpdateStartTime(switchTime); + LOGV("%p: switching to audio clock at media time %lf", this, + switchTime.ToSeconds()); + } + // Rely on the audio sink to report playback position when it is not ended. + pos = mAudioSink->GetPosition(); + LOGV("%p: Getting position from the Audio Sink %lf", this, pos.ToSeconds()); + mLastClockSource = ClockSource::AudioStream; + } else if (!mPlayStartTime.IsNull()) { + // Calculate playback position using system clock if we are still playing, + // but not rendering the audio, because this audio sink is muted. + pos = GetSystemClockPosition(t); + LOGV("%p: Getting position from the system clock %lf", this, + pos.ToSeconds()); + if (IsMuted()) { + if (mAudioQueue.GetSize() > 0) { + // audio track, but it's muted and won't be dequeued, discard packets + // that are behind the current media time, to keep the queue size under + // control. + DropAudioPacketsIfNeeded(pos); + } + // If muted, it's necessary to manually check if the audio has "ended", + // meaning that all the audio packets have been consumed, to resolve the + // ended promise. + if (CheckIfEnded()) { + MOZ_ASSERT(!mAudioSink); + mEndedPromiseHolder.ResolveIfExists(true, __func__); + } + } + mLastClockSource = ClockSource::SystemClock; + } else { + // Return how long we've played if we are not playing. + pos = mPlayDuration; + LOGV("%p: Getting static position, not playing %lf", this, pos.ToSeconds()); + mLastClockSource = ClockSource::Paused; + } + + if (aTimeStamp) { + *aTimeStamp = t; + } + + return pos; +} + +bool AudioSinkWrapper::CheckIfEnded() const { + return mAudioQueue.IsFinished() && mAudioQueue.GetSize() == 0u; +} + +bool AudioSinkWrapper::HasUnplayedFrames(TrackType aType) const { + AssertOwnerThread(); + return mAudioSink ? mAudioSink->HasUnplayedFrames() : false; +} + +media::TimeUnit AudioSinkWrapper::UnplayedDuration(TrackType aType) const { + AssertOwnerThread(); + return mAudioSink ? mAudioSink->UnplayedDuration() : media::TimeUnit::Zero(); +} + +void AudioSinkWrapper::DropAudioPacketsIfNeeded( + const TimeUnit& aMediaPosition) { + RefPtr<AudioData> audio = mAudioQueue.PeekFront(); + uint32_t dropped = 0; + while (audio && audio->mTime + audio->mDuration < aMediaPosition) { + // drop this packet, try the next one + audio = mAudioQueue.PopFront(); + dropped++; + if (audio) { + LOGV( + "Dropping audio packets: media position: %lf, " + "packet dropped: [%lf, %lf] (%u so far).\n", + aMediaPosition.ToSeconds(), audio->mTime.ToSeconds(), + (audio->GetEndTime()).ToSeconds(), dropped); + } + audio = mAudioQueue.PeekFront(); + } +} + +void AudioSinkWrapper::OnMuted(bool aMuted) { + AssertOwnerThread(); + LOG("%p: AudioSinkWrapper::OnMuted(%s)", this, aMuted ? "true" : "false"); + // Nothing to do + if (mAudioEnded) { + LOG("%p: AudioSinkWrapper::OnMuted, but no audio track", this); + return; + } + if (aMuted) { + if (mAudioSink) { + LOG("AudioSinkWrapper muted, shutting down AudioStream."); + mAudioSinkEndedPromise.DisconnectIfExists(); + if (IsPlaying()) { + mPlayDuration = mAudioSink->GetPosition(); + mPlayStartTime = TimeStamp::Now(); + } + Maybe<MozPromiseHolder<MediaSink::EndedPromise>> rv = + mAudioSink->Shutdown(ShutdownCause::Muting); + // There will generally be a promise here, except if the stream has + // errored out, or if it has just finished. In both cases, the promise has + // been handled appropriately, there is nothing to do. + if (rv.isSome()) { + mEndedPromiseHolder = std::move(rv.ref()); + } + mAudioSink = nullptr; + } + } else { + if (!IsPlaying()) { + LOG("%p: AudioSinkWrapper::OnMuted: not playing, not re-creating an " + "AudioSink", + this); + return; + } + LOG("%p: AudioSinkWrapper unmuted, re-creating an AudioStream.", this); + TimeUnit mediaPosition = GetSystemClockPosition(TimeStamp::Now()); + nsresult rv = StartAudioSink(mediaPosition, AudioSinkStartPolicy::ASYNC); + if (NS_FAILED(rv)) { + NS_WARNING( + "Could not start AudioSink from AudioSinkWrapper when unmuting"); + } + } +} + +void AudioSinkWrapper::SetVolume(double aVolume) { + AssertOwnerThread(); + + bool wasMuted = mParams.mVolume == 0; + bool nowMuted = aVolume == 0.; + mParams.mVolume = aVolume; + + if (!wasMuted && nowMuted) { + OnMuted(true); + } else if (wasMuted && !nowMuted) { + OnMuted(false); + } + + if (mAudioSink) { + mAudioSink->SetVolume(aVolume); + } +} + +void AudioSinkWrapper::SetStreamName(const nsAString& aStreamName) { + AssertOwnerThread(); + if (mAudioSink) { + mAudioSink->SetStreamName(aStreamName); + } +} + +void AudioSinkWrapper::SetPlaybackRate(double aPlaybackRate) { + AssertOwnerThread(); + if (!mAudioEnded && mAudioSink) { + // Pass the playback rate to the audio sink. The underlying AudioStream + // will handle playback rate changes and report correct audio position. + mAudioSink->SetPlaybackRate(aPlaybackRate); + } else if (!mPlayStartTime.IsNull()) { + // Adjust playback duration and start time when we are still playing. + TimeStamp now = TimeStamp::Now(); + mPlayDuration = GetSystemClockPosition(now); + mPlayStartTime = now; + } + // mParams.mPlaybackRate affects GetSystemClockPosition(). It should be + // updated after the calls to GetSystemClockPosition(); + mParams.mPlaybackRate = aPlaybackRate; + + // Do nothing when not playing. Changes in playback rate will be taken into + // account by GetSystemClockPosition(). +} + +void AudioSinkWrapper::SetPreservesPitch(bool aPreservesPitch) { + AssertOwnerThread(); + mParams.mPreservesPitch = aPreservesPitch; + if (mAudioSink) { + mAudioSink->SetPreservesPitch(aPreservesPitch); + } +} + +void AudioSinkWrapper::SetPlaying(bool aPlaying) { + AssertOwnerThread(); + LOG("%p: AudioSinkWrapper::SetPlaying %s", this, aPlaying ? "true" : "false"); + + // Resume/pause matters only when playback started. + if (!mIsStarted) { + return; + } + + if (mAudioSink) { + mAudioSink->SetPlaying(aPlaying); + } else { + if (aPlaying) { + LOG("%p: AudioSinkWrapper::SetPlaying : starting an AudioSink", this); + TimeUnit switchTime = GetPosition(); + DropAudioPacketsIfNeeded(switchTime); + StartAudioSink(switchTime, AudioSinkStartPolicy::SYNC); + } + } + + if (aPlaying) { + MOZ_ASSERT(mPlayStartTime.IsNull()); + mPlayStartTime = TimeStamp::Now(); + } else { + // Remember how long we've played. + mPlayDuration = GetPosition(); + // mPlayStartTime must be updated later since GetPosition() + // depends on the value of mPlayStartTime. + mPlayStartTime = TimeStamp(); + } +} + +double AudioSinkWrapper::PlaybackRate() const { + AssertOwnerThread(); + return mParams.mPlaybackRate; +} + +nsresult AudioSinkWrapper::Start(const TimeUnit& aStartTime, + const MediaInfo& aInfo) { + LOG("%p AudioSinkWrapper::Start", this); + AssertOwnerThread(); + MOZ_ASSERT(!mIsStarted, "playback already started."); + + mIsStarted = true; + mPlayDuration = aStartTime; + mPlayStartTime = TimeStamp::Now(); + mAudioEnded = IsAudioSourceEnded(aInfo); + + if (mAudioEnded) { + // Resolve promise if we start playback at the end position of the audio. + mEndedPromise = + aInfo.HasAudio() + ? MediaSink::EndedPromise::CreateAndResolve(true, __func__) + : nullptr; + return NS_OK; + } + + return StartAudioSink(aStartTime, AudioSinkStartPolicy::SYNC); +} + +nsresult AudioSinkWrapper::StartAudioSink(const TimeUnit& aStartTime, + AudioSinkStartPolicy aPolicy) { + MOZ_RELEASE_ASSERT(!mAudioSink); + + nsresult rv = NS_OK; + + mAudioSinkEndedPromise.DisconnectIfExists(); + mEndedPromise = mEndedPromiseHolder.Ensure(__func__); + mEndedPromise + ->Then(mOwnerThread.get(), __func__, this, + &AudioSinkWrapper::OnAudioEnded, &AudioSinkWrapper::OnAudioEnded) + ->Track(mAudioSinkEndedPromise); + + LOG("%p: AudioSinkWrapper::StartAudioSink (%s)", this, + aPolicy == AudioSinkStartPolicy::ASYNC ? "Async" : "Sync"); + + if (IsMuted()) { + LOG("%p: Muted: not starting an audio sink", this); + return NS_OK; + } + LOG("%p: Not muted: starting a new audio sink", this); + if (aPolicy == AudioSinkStartPolicy::ASYNC) { + UniquePtr<AudioSink> audioSink; + audioSink.reset(mCreator->Create()); + NS_DispatchBackgroundTask(NS_NewRunnableFunction( + "StartAudioSink (Async part: initialization)", + [self = RefPtr<AudioSinkWrapper>(this), audioSink{std::move(audioSink)}, + this]() mutable { + LOG("AudioSink initialization on background thread"); + // This can take about 200ms, e.g. on Windows, we don't want to do + // it on the MDSM thread, because it would make the clock not update + // for that amount of time, and the video would therefore not + // update. The Start() call is very cheap on the other hand, we can + // do it from the MDSM thread. + nsresult rv = audioSink->InitializeAudioStream( + mParams, mAudioDevice, AudioSink::InitializationType::UNMUTING); + mOwnerThread->Dispatch(NS_NewRunnableFunction( + "StartAudioSink (Async part: start from MDSM thread)", + [self = RefPtr<AudioSinkWrapper>(this), + audioSink{std::move(audioSink)}, this, rv]() mutable { + LOG("AudioSink async init done, back on MDSM thread"); + if (NS_FAILED(rv)) { + LOG("Async AudioSink initialization failed"); + mEndedPromiseHolder.RejectIfExists(rv, __func__); + return; + } + + // It's possible that the newly created isn't needed at this + // point, in some cases: + // 1. An AudioSink was created synchronously while this + // AudioSink was initialized asynchronously, bail out here. This + // happens when seeking (which does a synchronous + // initialization) right after unmuting. + // 2. The media element was muted while the async initialization + // was happening. + // 3. The AudioSinkWrapper was stopped during asynchronous + // creation. + // 4. The AudioSinkWrapper was paused during asynchronous + // creation. + if (mAudioSink || IsMuted() || !mIsStarted || + mPlayStartTime.IsNull()) { + LOG("AudioSink initialized async isn't needed, shutting " + "it down."); + DebugOnly<Maybe<MozPromiseHolder<EndedPromise>>> rv = + audioSink->Shutdown(); + MOZ_ASSERT(rv.inspect().isNothing()); + return; + } + + MOZ_ASSERT(!mAudioSink); + TimeUnit switchTime = GetPosition(); + DropAudioPacketsIfNeeded(switchTime); + mAudioSink.swap(audioSink); + if (mTreatUnderrunAsSilence) { + mAudioSink->EnableTreatAudioUnderrunAsSilence( + mTreatUnderrunAsSilence); + } + LOG("AudioSink async, start"); + nsresult rv2 = + mAudioSink->Start(switchTime, mEndedPromiseHolder); + if (NS_FAILED(rv2)) { + LOG("Async AudioSinkWrapper start failed"); + mEndedPromiseHolder.RejectIfExists(rv2, __func__); + } + })); + })); + } else { + mAudioSink.reset(mCreator->Create()); + nsresult rv = mAudioSink->InitializeAudioStream( + mParams, mAudioDevice, AudioSink::InitializationType::INITIAL); + if (NS_FAILED(rv)) { + mEndedPromiseHolder.RejectIfExists(rv, __func__); + LOG("Sync AudioSinkWrapper initialization failed"); + return rv; + } + if (mTreatUnderrunAsSilence) { + mAudioSink->EnableTreatAudioUnderrunAsSilence(mTreatUnderrunAsSilence); + } + rv = mAudioSink->Start(aStartTime, mEndedPromiseHolder); + if (NS_FAILED(rv)) { + LOG("Sync AudioSinkWrapper start failed"); + mEndedPromiseHolder.RejectIfExists(rv, __func__); + } + } + + return rv; +} + +bool AudioSinkWrapper::IsAudioSourceEnded(const MediaInfo& aInfo) const { + // no audio or empty audio queue which won't get data anymore is equivalent to + // audio ended + return !aInfo.HasAudio() || + (mAudioQueue.IsFinished() && mAudioQueue.GetSize() == 0u); +} + +void AudioSinkWrapper::Stop() { + AssertOwnerThread(); + MOZ_ASSERT(mIsStarted, "playback not started."); + + LOG("%p: AudioSinkWrapper::Stop", this); + + mIsStarted = false; + mAudioEnded = true; + + mAudioSinkEndedPromise.DisconnectIfExists(); + + if (mAudioSink) { + DebugOnly<Maybe<MozPromiseHolder<EndedPromise>>> rv = + mAudioSink->Shutdown(); + MOZ_ASSERT(rv.inspect().isNothing()); + mAudioSink = nullptr; + mEndedPromise = nullptr; + } +} + +bool AudioSinkWrapper::IsStarted() const { + AssertOwnerThread(); + return mIsStarted; +} + +bool AudioSinkWrapper::IsPlaying() const { + AssertOwnerThread(); + return IsStarted() && !mPlayStartTime.IsNull(); +} + +void AudioSinkWrapper::OnAudioEnded() { + AssertOwnerThread(); + LOG("%p: AudioSinkWrapper::OnAudioEnded", this); + mAudioSinkEndedPromise.Complete(); + mPlayDuration = GetPosition(); + if (!mPlayStartTime.IsNull()) { + mPlayStartTime = TimeStamp::Now(); + } + mAudioEnded = true; +} + +void AudioSinkWrapper::GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) { + AssertOwnerThread(); + aInfo.mAudioSinkWrapper.mIsPlaying = IsPlaying(); + aInfo.mAudioSinkWrapper.mIsStarted = IsStarted(); + aInfo.mAudioSinkWrapper.mAudioEnded = mAudioEnded; + if (mAudioSink) { + mAudioSink->GetDebugInfo(aInfo); + } +} + +void AudioSinkWrapper::EnableTreatAudioUnderrunAsSilence(bool aEnabled) { + mTreatUnderrunAsSilence = aEnabled; + if (mAudioSink) { + mAudioSink->EnableTreatAudioUnderrunAsSilence(aEnabled); + } +} + +} // namespace mozilla + +#undef LOG +#undef LOGV diff --git a/dom/media/mediasink/AudioSinkWrapper.h b/dom/media/mediasink/AudioSinkWrapper.h new file mode 100644 index 0000000000..411983c526 --- /dev/null +++ b/dom/media/mediasink/AudioSinkWrapper.h @@ -0,0 +1,161 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef AudioSinkWrapper_h_ +#define AudioSinkWrapper_h_ + +#include "mozilla/AbstractThread.h" +#include "mozilla/RefPtr.h" +#include "mozilla/TimeStamp.h" +#include "mozilla/UniquePtr.h" + +#include "AudioSink.h" +#include "MediaSink.h" + +namespace mozilla { +class MediaData; +template <class T> +class MediaQueue; + +/** + * A wrapper around AudioSink to provide the interface of MediaSink. + */ +class AudioSinkWrapper : public MediaSink { + using PlaybackParams = AudioSink::PlaybackParams; + + // An AudioSink factory. + class Creator { + public: + virtual ~Creator() = default; + virtual AudioSink* Create() = 0; + }; + + // Wrap around a function object which creates AudioSinks. + template <typename Function> + class CreatorImpl : public Creator { + public: + explicit CreatorImpl(const Function& aFunc) : mFunction(aFunc) {} + AudioSink* Create() override { return mFunction(); } + + private: + Function mFunction; + }; + + public: + template <typename Function> + AudioSinkWrapper(AbstractThread* aOwnerThread, + MediaQueue<AudioData>& aAudioQueue, const Function& aFunc, + double aVolume, double aPlaybackRate, bool aPreservesPitch, + RefPtr<AudioDeviceInfo> aAudioDevice) + : mOwnerThread(aOwnerThread), + mCreator(new CreatorImpl<Function>(aFunc)), + mAudioDevice(std::move(aAudioDevice)), + mIsStarted(false), + mParams(aVolume, aPlaybackRate, aPreservesPitch), + // Give an invalid value to facilitate debug if used before playback + // starts. + mPlayDuration(media::TimeUnit::Invalid()), + mAudioEnded(true), + mAudioQueue(aAudioQueue) {} + + RefPtr<EndedPromise> OnEnded(TrackType aType) override; + media::TimeUnit GetEndTime(TrackType aType) const override; + media::TimeUnit GetPosition(TimeStamp* aTimeStamp = nullptr) override; + bool HasUnplayedFrames(TrackType aType) const override; + media::TimeUnit UnplayedDuration(TrackType aType) const override; + void DropAudioPacketsIfNeeded(const media::TimeUnit& aMediaPosition); + + void SetVolume(double aVolume) override; + void SetStreamName(const nsAString& aStreamName) override; + void SetPlaybackRate(double aPlaybackRate) override; + void SetPreservesPitch(bool aPreservesPitch) override; + void SetPlaying(bool aPlaying) override; + + double PlaybackRate() const override; + + nsresult Start(const media::TimeUnit& aStartTime, + const MediaInfo& aInfo) override; + void Stop() override; + bool IsStarted() const override; + bool IsPlaying() const override; + + const AudioDeviceInfo* AudioDevice() const override { return mAudioDevice; } + + void Shutdown() override; + + void GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) override; + + void EnableTreatAudioUnderrunAsSilence(bool aEnabled) override; + + private: + // The clock that was in use for the previous position query, allowing to + // detect clock switches. + enum class ClockSource { + // The clock comes from an underlying system-level audio stream. + AudioStream, + // The clock comes from the system clock. + SystemClock, + // The stream is paused, a constant time is reported. + Paused + } mLastClockSource = ClockSource::Paused; + bool IsMuted() const; + void OnMuted(bool aMuted); + virtual ~AudioSinkWrapper(); + + void AssertOwnerThread() const { + MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn()); + } + + // An AudioSink can be started synchronously from the MDSM thread, or + // asynchronously. + // In synchronous mode, the clock doesn't advance until the sink has been + // created, initialized and started. This is useful for the initial startup, + // and when seeking. + // In asynchronous mode, the clock will keep going forward (using the system + // clock) until the AudioSink is started, at which point the clock will use + // the AudioSink clock. This is used when unmuting a media element. + enum class AudioSinkStartPolicy { SYNC, ASYNC }; + nsresult StartAudioSink(const media::TimeUnit& aStartTime, + AudioSinkStartPolicy aPolicy); + + // Get the current media position using the system clock. This is used when + // the audio is muted, or when the media has no audio track. Otherwise, the + // media's position is based on the clock of the AudioStream. + media::TimeUnit GetSystemClockPosition(TimeStamp aNow) const; + bool CheckIfEnded() const; + + void OnAudioEnded(); + + bool IsAudioSourceEnded(const MediaInfo& aInfo) const; + + const RefPtr<AbstractThread> mOwnerThread; + UniquePtr<Creator> mCreator; + UniquePtr<AudioSink> mAudioSink; + // The output device this AudioSink is playing data to. The system's default + // device is used if this is null. + const RefPtr<AudioDeviceInfo> mAudioDevice; + // Will only exist when media has an audio track. + RefPtr<EndedPromise> mEndedPromise; + MozPromiseHolder<EndedPromise> mEndedPromiseHolder; + + bool mIsStarted; + PlaybackParams mParams; + + TimeStamp mPlayStartTime; + media::TimeUnit mPlayDuration; + + bool mAudioEnded; + MozPromiseRequestHolder<EndedPromise> mAudioSinkEndedPromise; + MediaQueue<AudioData>& mAudioQueue; + + // True if we'd like to treat underrun as silent frames. But that can only be + // applied in the special situation for seamless looping. + bool mTreatUnderrunAsSilence = false; +}; + +} // namespace mozilla + +#endif // AudioSinkWrapper_h_ diff --git a/dom/media/mediasink/DecodedStream.cpp b/dom/media/mediasink/DecodedStream.cpp new file mode 100644 index 0000000000..0a488dcfdf --- /dev/null +++ b/dom/media/mediasink/DecodedStream.cpp @@ -0,0 +1,1171 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "DecodedStream.h" + +#include "AudioDecoderInputTrack.h" +#include "AudioSegment.h" +#include "MediaData.h" +#include "MediaDecoderStateMachine.h" +#include "MediaQueue.h" +#include "MediaTrackGraph.h" +#include "MediaTrackListener.h" +#include "SharedBuffer.h" +#include "Tracing.h" +#include "VideoSegment.h" +#include "VideoUtils.h" +#include "mozilla/AbstractThread.h" +#include "mozilla/CheckedInt.h" +#include "mozilla/ProfilerLabels.h" +#include "mozilla/ProfilerMarkerTypes.h" +#include "mozilla/SyncRunnable.h" +#include "mozilla/gfx/Point.h" +#include "mozilla/StaticPrefs_dom.h" +#include "nsProxyRelease.h" + +namespace mozilla { + +using media::NullableTimeUnit; +using media::TimeUnit; + +extern LazyLogModule gMediaDecoderLog; + +#define LOG_DS(type, fmt, ...) \ + MOZ_LOG(gMediaDecoderLog, type, \ + ("DecodedStream=%p " fmt, this, ##__VA_ARGS__)) + +#define PLAYBACK_PROFILER_MARKER(markerString) \ + PROFILER_MARKER_TEXT(FUNCTION_SIGNATURE, MEDIA_PLAYBACK, {}, markerString) + +/* + * A container class to make it easier to pass the playback info all the + * way to DecodedStreamGraphListener from DecodedStream. + */ +struct PlaybackInfoInit { + TimeUnit mStartTime; + MediaInfo mInfo; +}; + +class DecodedStreamGraphListener; + +class SourceVideoTrackListener : public MediaTrackListener { + public: + SourceVideoTrackListener(DecodedStreamGraphListener* aGraphListener, + SourceMediaTrack* aVideoTrack, + MediaTrack* aAudioTrack, + nsISerialEventTarget* aDecoderThread); + + void NotifyOutput(MediaTrackGraph* aGraph, + TrackTime aCurrentTrackTime) override; + void NotifyEnded(MediaTrackGraph* aGraph) override; + + private: + const RefPtr<DecodedStreamGraphListener> mGraphListener; + const RefPtr<SourceMediaTrack> mVideoTrack; + const RefPtr<const MediaTrack> mAudioTrack; + const RefPtr<nsISerialEventTarget> mDecoderThread; + TrackTime mLastVideoOutputTime = 0; +}; + +class DecodedStreamGraphListener { + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(DecodedStreamGraphListener) + public: + DecodedStreamGraphListener( + nsISerialEventTarget* aDecoderThread, AudioDecoderInputTrack* aAudioTrack, + MozPromiseHolder<DecodedStream::EndedPromise>&& aAudioEndedHolder, + SourceMediaTrack* aVideoTrack, + MozPromiseHolder<DecodedStream::EndedPromise>&& aVideoEndedHolder) + : mDecoderThread(aDecoderThread), + mVideoTrackListener( + aVideoTrack ? MakeRefPtr<SourceVideoTrackListener>( + this, aVideoTrack, aAudioTrack, aDecoderThread) + : nullptr), + mAudioEndedHolder(std::move(aAudioEndedHolder)), + mVideoEndedHolder(std::move(aVideoEndedHolder)), + mAudioTrack(aAudioTrack), + mVideoTrack(aVideoTrack) { + MOZ_ASSERT(NS_IsMainThread()); + MOZ_ASSERT(mDecoderThread); + + if (mAudioTrack) { + mOnAudioOutput = mAudioTrack->OnOutput().Connect( + mDecoderThread, + [self = RefPtr<DecodedStreamGraphListener>(this)](TrackTime aTime) { + self->NotifyOutput(MediaSegment::AUDIO, aTime); + }); + mOnAudioEnd = mAudioTrack->OnEnd().Connect( + mDecoderThread, [self = RefPtr<DecodedStreamGraphListener>(this)]() { + self->NotifyEnded(MediaSegment::AUDIO); + }); + } else { + mAudioEnded = true; + mAudioEndedHolder.ResolveIfExists(true, __func__); + } + + if (mVideoTrackListener) { + mVideoTrack->AddListener(mVideoTrackListener); + } else { + mVideoEnded = true; + mVideoEndedHolder.ResolveIfExists(true, __func__); + } + } + + void Close() { + AssertOnDecoderThread(); + if (mAudioTrack) { + mAudioTrack->Close(); + } + if (mVideoTrack) { + mVideoTrack->End(); + } + mAudioEndedHolder.ResolveIfExists(false, __func__); + mVideoEndedHolder.ResolveIfExists(false, __func__); + mOnAudioOutput.DisconnectIfExists(); + mOnAudioEnd.DisconnectIfExists(); + } + + void NotifyOutput(MediaSegment::Type aType, TrackTime aCurrentTrackTime) { + AssertOnDecoderThread(); + if (aType == MediaSegment::AUDIO) { + mAudioOutputFrames = aCurrentTrackTime; + } else if (aType == MediaSegment::VIDEO) { + if (aCurrentTrackTime >= mVideoEndTime) { + mVideoTrack->End(); + } + } else { + MOZ_CRASH("Unexpected track type"); + } + + MOZ_ASSERT_IF(aType == MediaSegment::AUDIO, !mAudioEnded); + MOZ_ASSERT_IF(aType == MediaSegment::VIDEO, !mVideoEnded); + // This situation would happen when playing audio in >1x playback rate, + // because the audio output clock isn't align the graph time and would go + // forward faster. Eg. playback rate=2, when the graph time passes 10s, the + // audio clock time actually already goes forward 20s. After audio track + // ended, video track would tirgger the clock, but the video time still + // follows the graph time, which is smaller than the preivous audio clock + // time and should be ignored. + if (aCurrentTrackTime <= mLastOutputTime) { + MOZ_ASSERT(aType == MediaSegment::VIDEO); + return; + } + MOZ_ASSERT(aCurrentTrackTime > mLastOutputTime); + mLastOutputTime = aCurrentTrackTime; + + // Only when audio track doesn't exists or has reached the end, video + // track should drive the clock. + MOZ_ASSERT_IF(aType == MediaSegment::VIDEO, mAudioEnded); + const MediaTrack* track = aType == MediaSegment::VIDEO + ? static_cast<MediaTrack*>(mVideoTrack) + : static_cast<MediaTrack*>(mAudioTrack); + mOnOutput.Notify(track->TrackTimeToMicroseconds(aCurrentTrackTime)); + } + + void NotifyEnded(MediaSegment::Type aType) { + AssertOnDecoderThread(); + if (aType == MediaSegment::AUDIO) { + MOZ_ASSERT(!mAudioEnded); + mAudioEnded = true; + mAudioEndedHolder.ResolveIfExists(true, __func__); + } else if (aType == MediaSegment::VIDEO) { + MOZ_ASSERT(!mVideoEnded); + mVideoEnded = true; + mVideoEndedHolder.ResolveIfExists(true, __func__); + } else { + MOZ_CRASH("Unexpected track type"); + } + } + + /** + * Tell the graph listener to end the track sourced by the given track after + * it has seen at least aEnd worth of output reported as processed by the + * graph. + * + * A TrackTime of TRACK_TIME_MAX indicates that the track has no end and is + * the default. + * + * This method of ending tracks is needed because the MediaTrackGraph + * processes ended tracks (through SourceMediaTrack::EndTrack) at the + * beginning of an iteration, but waits until the end of the iteration to + * process any ControlMessages. When such a ControlMessage is a listener that + * is to be added to a track that has ended in its very first iteration, the + * track ends before the listener tracking this ending is added. This can lead + * to a MediaStreamTrack ending on main thread (it uses another listener) + * before the listeners to render the track get added, potentially meaning a + * media element doesn't progress before reaching the end although data was + * available. + */ + void EndVideoTrackAt(MediaTrack* aTrack, TrackTime aEnd) { + AssertOnDecoderThread(); + MOZ_DIAGNOSTIC_ASSERT(aTrack == mVideoTrack); + mVideoEndTime = aEnd; + } + + void Forget() { + MOZ_ASSERT(NS_IsMainThread()); + if (mVideoTrackListener && !mVideoTrack->IsDestroyed()) { + mVideoTrack->RemoveListener(mVideoTrackListener); + } + mVideoTrackListener = nullptr; + } + + TrackTime GetAudioFramesPlayed() { + AssertOnDecoderThread(); + return mAudioOutputFrames; + } + + MediaEventSource<int64_t>& OnOutput() { return mOnOutput; } + + private: + ~DecodedStreamGraphListener() { + MOZ_ASSERT(mAudioEndedHolder.IsEmpty()); + MOZ_ASSERT(mVideoEndedHolder.IsEmpty()); + } + + inline void AssertOnDecoderThread() const { + MOZ_ASSERT(mDecoderThread->IsOnCurrentThread()); + } + + const RefPtr<nsISerialEventTarget> mDecoderThread; + + // Accessible on any thread, but only notify on the decoder thread. + MediaEventProducer<int64_t> mOnOutput; + + RefPtr<SourceVideoTrackListener> mVideoTrackListener; + + // These can be resolved on the main thread on creation if there is no + // corresponding track, otherwise they are resolved on the decoder thread. + MozPromiseHolder<DecodedStream::EndedPromise> mAudioEndedHolder; + MozPromiseHolder<DecodedStream::EndedPromise> mVideoEndedHolder; + + // Decoder thread only. + TrackTime mAudioOutputFrames = 0; + TrackTime mLastOutputTime = 0; + bool mAudioEnded = false; + bool mVideoEnded = false; + + // Any thread. + const RefPtr<AudioDecoderInputTrack> mAudioTrack; + const RefPtr<SourceMediaTrack> mVideoTrack; + MediaEventListener mOnAudioOutput; + MediaEventListener mOnAudioEnd; + Atomic<TrackTime> mVideoEndTime{TRACK_TIME_MAX}; +}; + +SourceVideoTrackListener::SourceVideoTrackListener( + DecodedStreamGraphListener* aGraphListener, SourceMediaTrack* aVideoTrack, + MediaTrack* aAudioTrack, nsISerialEventTarget* aDecoderThread) + : mGraphListener(aGraphListener), + mVideoTrack(aVideoTrack), + mAudioTrack(aAudioTrack), + mDecoderThread(aDecoderThread) {} + +void SourceVideoTrackListener::NotifyOutput(MediaTrackGraph* aGraph, + TrackTime aCurrentTrackTime) { + aGraph->AssertOnGraphThreadOrNotRunning(); + if (mAudioTrack && !mAudioTrack->Ended()) { + // Only audio playout drives the clock forward, if present and live. + return; + } + // The graph can iterate without time advancing, but the invariant is that + // time can never go backwards. + if (aCurrentTrackTime <= mLastVideoOutputTime) { + MOZ_ASSERT(aCurrentTrackTime == mLastVideoOutputTime); + return; + } + mLastVideoOutputTime = aCurrentTrackTime; + mDecoderThread->Dispatch(NS_NewRunnableFunction( + "SourceVideoTrackListener::NotifyOutput", + [self = RefPtr<SourceVideoTrackListener>(this), aCurrentTrackTime]() { + self->mGraphListener->NotifyOutput(MediaSegment::VIDEO, + aCurrentTrackTime); + })); +} + +void SourceVideoTrackListener::NotifyEnded(MediaTrackGraph* aGraph) { + aGraph->AssertOnGraphThreadOrNotRunning(); + mDecoderThread->Dispatch(NS_NewRunnableFunction( + "SourceVideoTrackListener::NotifyEnded", + [self = RefPtr<SourceVideoTrackListener>(this)]() { + self->mGraphListener->NotifyEnded(MediaSegment::VIDEO); + })); +} + +/** + * All MediaStream-related data is protected by the decoder's monitor. We have + * at most one DecodedStreamData per MediaDecoder. XXX Its tracks are used as + * inputs for all output tracks created by OutputStreamManager after calls to + * captureStream/UntilEnded. Seeking creates new source tracks, as does + * replaying after the input as ended. In the latter case, the new sources are + * not connected to tracks created by captureStreamUntilEnded. + */ +class DecodedStreamData final { + public: + DecodedStreamData( + PlaybackInfoInit&& aInit, MediaTrackGraph* aGraph, + RefPtr<ProcessedMediaTrack> aAudioOutputTrack, + RefPtr<ProcessedMediaTrack> aVideoOutputTrack, + MozPromiseHolder<DecodedStream::EndedPromise>&& aAudioEndedPromise, + MozPromiseHolder<DecodedStream::EndedPromise>&& aVideoEndedPromise, + float aPlaybackRate, float aVolume, bool aPreservesPitch, + nsISerialEventTarget* aDecoderThread); + ~DecodedStreamData(); + MediaEventSource<int64_t>& OnOutput(); + // This is used to mark track as closed and should be called before Forget(). + // Decoder thread only. + void Close(); + // After calling this function, the DecodedStreamData would be destroyed. + // Main thread only. + void Forget(); + void GetDebugInfo(dom::DecodedStreamDataDebugInfo& aInfo); + + void WriteVideoToSegment(layers::Image* aImage, const TimeUnit& aStart, + const TimeUnit& aEnd, + const gfx::IntSize& aIntrinsicSize, + const TimeStamp& aTimeStamp, VideoSegment* aOutput, + const PrincipalHandle& aPrincipalHandle, + double aPlaybackRate); + + /* The following group of fields are protected by the decoder's monitor + * and can be read or written on any thread. + */ + // Count of audio frames written to the track + int64_t mAudioFramesWritten; + // Count of video frames written to the track in the track's rate + TrackTime mVideoTrackWritten; + // mNextAudioTime is the end timestamp for the last packet sent to the track. + // Therefore audio packets starting at or after this time need to be copied + // to the output track. + TimeUnit mNextAudioTime; + // mLastVideoStartTime is the start timestamp for the last packet sent to the + // track. Therefore video packets starting after this time need to be copied + // to the output track. + NullableTimeUnit mLastVideoStartTime; + // mLastVideoEndTime is the end timestamp for the last packet sent to the + // track. It is used to adjust durations of chunks sent to the output track + // when there are overlaps in VideoData. + NullableTimeUnit mLastVideoEndTime; + // The timestamp of the last frame, so we can ensure time never goes + // backwards. + TimeStamp mLastVideoTimeStamp; + // The last video image sent to the track. Useful if we need to replicate + // the image. + RefPtr<layers::Image> mLastVideoImage; + gfx::IntSize mLastVideoImageDisplaySize; + bool mHaveSentFinishAudio; + bool mHaveSentFinishVideo; + + const RefPtr<AudioDecoderInputTrack> mAudioTrack; + const RefPtr<SourceMediaTrack> mVideoTrack; + const RefPtr<ProcessedMediaTrack> mAudioOutputTrack; + const RefPtr<ProcessedMediaTrack> mVideoOutputTrack; + const RefPtr<MediaInputPort> mAudioPort; + const RefPtr<MediaInputPort> mVideoPort; + const RefPtr<DecodedStream::EndedPromise> mAudioEndedPromise; + const RefPtr<DecodedStream::EndedPromise> mVideoEndedPromise; + const RefPtr<DecodedStreamGraphListener> mListener; +}; + +DecodedStreamData::DecodedStreamData( + PlaybackInfoInit&& aInit, MediaTrackGraph* aGraph, + RefPtr<ProcessedMediaTrack> aAudioOutputTrack, + RefPtr<ProcessedMediaTrack> aVideoOutputTrack, + MozPromiseHolder<DecodedStream::EndedPromise>&& aAudioEndedPromise, + MozPromiseHolder<DecodedStream::EndedPromise>&& aVideoEndedPromise, + float aPlaybackRate, float aVolume, bool aPreservesPitch, + nsISerialEventTarget* aDecoderThread) + : mAudioFramesWritten(0), + mVideoTrackWritten(0), + mNextAudioTime(aInit.mStartTime), + mHaveSentFinishAudio(false), + mHaveSentFinishVideo(false), + mAudioTrack(aInit.mInfo.HasAudio() + ? AudioDecoderInputTrack::Create( + aGraph, aDecoderThread, aInit.mInfo.mAudio, + aPlaybackRate, aVolume, aPreservesPitch) + : nullptr), + mVideoTrack(aInit.mInfo.HasVideo() + ? aGraph->CreateSourceTrack(MediaSegment::VIDEO) + : nullptr), + mAudioOutputTrack(std::move(aAudioOutputTrack)), + mVideoOutputTrack(std::move(aVideoOutputTrack)), + mAudioPort((mAudioOutputTrack && mAudioTrack) + ? mAudioOutputTrack->AllocateInputPort(mAudioTrack) + : nullptr), + mVideoPort((mVideoOutputTrack && mVideoTrack) + ? mVideoOutputTrack->AllocateInputPort(mVideoTrack) + : nullptr), + mAudioEndedPromise(aAudioEndedPromise.Ensure(__func__)), + mVideoEndedPromise(aVideoEndedPromise.Ensure(__func__)), + // DecodedStreamGraphListener will resolve these promises. + mListener(MakeRefPtr<DecodedStreamGraphListener>( + aDecoderThread, mAudioTrack, std::move(aAudioEndedPromise), + mVideoTrack, std::move(aVideoEndedPromise))) { + MOZ_ASSERT(NS_IsMainThread()); +} + +DecodedStreamData::~DecodedStreamData() { + MOZ_ASSERT(NS_IsMainThread()); + if (mAudioTrack) { + mAudioTrack->Destroy(); + } + if (mVideoTrack) { + mVideoTrack->Destroy(); + } + if (mAudioPort) { + mAudioPort->Destroy(); + } + if (mVideoPort) { + mVideoPort->Destroy(); + } +} + +MediaEventSource<int64_t>& DecodedStreamData::OnOutput() { + return mListener->OnOutput(); +} + +void DecodedStreamData::Close() { mListener->Close(); } + +void DecodedStreamData::Forget() { mListener->Forget(); } + +void DecodedStreamData::GetDebugInfo(dom::DecodedStreamDataDebugInfo& aInfo) { + CopyUTF8toUTF16(nsPrintfCString("%p", this), aInfo.mInstance); + aInfo.mAudioFramesWritten = mAudioFramesWritten; + aInfo.mStreamAudioWritten = mListener->GetAudioFramesPlayed(); + aInfo.mNextAudioTime = mNextAudioTime.ToMicroseconds(); + aInfo.mLastVideoStartTime = + mLastVideoStartTime.valueOr(TimeUnit::FromMicroseconds(-1)) + .ToMicroseconds(); + aInfo.mLastVideoEndTime = + mLastVideoEndTime.valueOr(TimeUnit::FromMicroseconds(-1)) + .ToMicroseconds(); + aInfo.mHaveSentFinishAudio = mHaveSentFinishAudio; + aInfo.mHaveSentFinishVideo = mHaveSentFinishVideo; +} + +DecodedStream::DecodedStream( + MediaDecoderStateMachine* aStateMachine, + nsMainThreadPtrHandle<SharedDummyTrack> aDummyTrack, + CopyableTArray<RefPtr<ProcessedMediaTrack>> aOutputTracks, double aVolume, + double aPlaybackRate, bool aPreservesPitch, + MediaQueue<AudioData>& aAudioQueue, MediaQueue<VideoData>& aVideoQueue, + RefPtr<AudioDeviceInfo> aAudioDevice) + : mOwnerThread(aStateMachine->OwnerThread()), + mDummyTrack(std::move(aDummyTrack)), + mWatchManager(this, mOwnerThread), + mPlaying(false, "DecodedStream::mPlaying"), + mPrincipalHandle(aStateMachine->OwnerThread(), PRINCIPAL_HANDLE_NONE, + "DecodedStream::mPrincipalHandle (Mirror)"), + mCanonicalOutputPrincipal(aStateMachine->CanonicalOutputPrincipal()), + mOutputTracks(std::move(aOutputTracks)), + mVolume(aVolume), + mPlaybackRate(aPlaybackRate), + mPreservesPitch(aPreservesPitch), + mAudioQueue(aAudioQueue), + mVideoQueue(aVideoQueue), + mAudioDevice(std::move(aAudioDevice)) {} + +DecodedStream::~DecodedStream() { + MOZ_ASSERT(mStartTime.isNothing(), "playback should've ended."); +} + +RefPtr<DecodedStream::EndedPromise> DecodedStream::OnEnded(TrackType aType) { + AssertOwnerThread(); + MOZ_ASSERT(mStartTime.isSome()); + + if (aType == TrackInfo::kAudioTrack && mInfo.HasAudio()) { + return mAudioEndedPromise; + } + if (aType == TrackInfo::kVideoTrack && mInfo.HasVideo()) { + return mVideoEndedPromise; + } + return nullptr; +} + +nsresult DecodedStream::Start(const TimeUnit& aStartTime, + const MediaInfo& aInfo) { + AssertOwnerThread(); + MOZ_ASSERT(mStartTime.isNothing(), "playback already started."); + + AUTO_PROFILER_LABEL(FUNCTION_SIGNATURE, MEDIA_PLAYBACK); + if (profiler_thread_is_being_profiled_for_markers()) { + nsPrintfCString markerString("StartTime=%" PRId64, + aStartTime.ToMicroseconds()); + PLAYBACK_PROFILER_MARKER(markerString); + } + LOG_DS(LogLevel::Debug, "Start() mStartTime=%" PRId64, + aStartTime.ToMicroseconds()); + + mStartTime.emplace(aStartTime); + mLastOutputTime = TimeUnit::Zero(); + mInfo = aInfo; + mPlaying = true; + mPrincipalHandle.Connect(mCanonicalOutputPrincipal); + mWatchManager.Watch(mPlaying, &DecodedStream::PlayingChanged); + mAudibilityMonitor.emplace( + mInfo.mAudio.mRate, + StaticPrefs::dom_media_silence_duration_for_audibility()); + ConnectListener(); + + class R : public Runnable { + public: + R(PlaybackInfoInit&& aInit, + nsMainThreadPtrHandle<SharedDummyTrack> aDummyTrack, + nsTArray<RefPtr<ProcessedMediaTrack>> aOutputTracks, + MozPromiseHolder<MediaSink::EndedPromise>&& aAudioEndedPromise, + MozPromiseHolder<MediaSink::EndedPromise>&& aVideoEndedPromise, + float aPlaybackRate, float aVolume, bool aPreservesPitch, + nsISerialEventTarget* aDecoderThread) + : Runnable("CreateDecodedStreamData"), + mInit(std::move(aInit)), + mDummyTrack(std::move(aDummyTrack)), + mOutputTracks(std::move(aOutputTracks)), + mAudioEndedPromise(std::move(aAudioEndedPromise)), + mVideoEndedPromise(std::move(aVideoEndedPromise)), + mPlaybackRate(aPlaybackRate), + mVolume(aVolume), + mPreservesPitch(aPreservesPitch), + mDecoderThread(aDecoderThread) {} + NS_IMETHOD Run() override { + MOZ_ASSERT(NS_IsMainThread()); + RefPtr<ProcessedMediaTrack> audioOutputTrack; + RefPtr<ProcessedMediaTrack> videoOutputTrack; + for (const auto& track : mOutputTracks) { + if (track->mType == MediaSegment::AUDIO) { + MOZ_DIAGNOSTIC_ASSERT( + !audioOutputTrack, + "We only support capturing to one output track per kind"); + audioOutputTrack = track; + } else if (track->mType == MediaSegment::VIDEO) { + MOZ_DIAGNOSTIC_ASSERT( + !videoOutputTrack, + "We only support capturing to one output track per kind"); + videoOutputTrack = track; + } else { + MOZ_CRASH("Unknown media type"); + } + } + if (!mDummyTrack) { + // No dummy track - no graph. This could be intentional as the owning + // media element needs access to the tracks on main thread to set up + // forwarding of them before playback starts. MDSM will re-create + // DecodedStream once a dummy track is available. This effectively halts + // playback for this DecodedStream. + return NS_OK; + } + if ((audioOutputTrack && audioOutputTrack->IsDestroyed()) || + (videoOutputTrack && videoOutputTrack->IsDestroyed())) { + // A track has been destroyed and we'll soon get re-created with a + // proper one. This effectively halts playback for this DecodedStream. + return NS_OK; + } + mData = MakeUnique<DecodedStreamData>( + std::move(mInit), mDummyTrack->mTrack->Graph(), + std::move(audioOutputTrack), std::move(videoOutputTrack), + std::move(mAudioEndedPromise), std::move(mVideoEndedPromise), + mPlaybackRate, mVolume, mPreservesPitch, mDecoderThread); + return NS_OK; + } + UniquePtr<DecodedStreamData> ReleaseData() { return std::move(mData); } + + private: + PlaybackInfoInit mInit; + nsMainThreadPtrHandle<SharedDummyTrack> mDummyTrack; + const nsTArray<RefPtr<ProcessedMediaTrack>> mOutputTracks; + MozPromiseHolder<MediaSink::EndedPromise> mAudioEndedPromise; + MozPromiseHolder<MediaSink::EndedPromise> mVideoEndedPromise; + UniquePtr<DecodedStreamData> mData; + const float mPlaybackRate; + const float mVolume; + const bool mPreservesPitch; + const RefPtr<nsISerialEventTarget> mDecoderThread; + }; + + MozPromiseHolder<DecodedStream::EndedPromise> audioEndedHolder; + MozPromiseHolder<DecodedStream::EndedPromise> videoEndedHolder; + PlaybackInfoInit init{aStartTime, aInfo}; + nsCOMPtr<nsIRunnable> r = + new R(std::move(init), mDummyTrack, mOutputTracks.Clone(), + std::move(audioEndedHolder), std::move(videoEndedHolder), + static_cast<float>(mPlaybackRate), static_cast<float>(mVolume), + mPreservesPitch, mOwnerThread); + SyncRunnable::DispatchToThread(GetMainThreadSerialEventTarget(), r); + mData = static_cast<R*>(r.get())->ReleaseData(); + + if (mData) { + mAudioEndedPromise = mData->mAudioEndedPromise; + mVideoEndedPromise = mData->mVideoEndedPromise; + mOutputListener = mData->OnOutput().Connect(mOwnerThread, this, + &DecodedStream::NotifyOutput); + SendData(); + } + return NS_OK; +} + +void DecodedStream::Stop() { + AssertOwnerThread(); + MOZ_ASSERT(mStartTime.isSome(), "playback not started."); + + TRACE("DecodedStream::Stop"); + LOG_DS(LogLevel::Debug, "Stop()"); + + DisconnectListener(); + ResetVideo(mPrincipalHandle); + ResetAudio(); + mStartTime.reset(); + mAudioEndedPromise = nullptr; + mVideoEndedPromise = nullptr; + + // Clear mData immediately when this playback session ends so we won't + // send data to the wrong track in SendData() in next playback session. + DestroyData(std::move(mData)); + + mPrincipalHandle.DisconnectIfConnected(); + mWatchManager.Unwatch(mPlaying, &DecodedStream::PlayingChanged); + mAudibilityMonitor.reset(); +} + +bool DecodedStream::IsStarted() const { + AssertOwnerThread(); + return mStartTime.isSome(); +} + +bool DecodedStream::IsPlaying() const { + AssertOwnerThread(); + return IsStarted() && mPlaying; +} + +void DecodedStream::Shutdown() { + AssertOwnerThread(); + mPrincipalHandle.DisconnectIfConnected(); + mWatchManager.Shutdown(); +} + +void DecodedStream::DestroyData(UniquePtr<DecodedStreamData>&& aData) { + AssertOwnerThread(); + + if (!aData) { + return; + } + + TRACE("DecodedStream::DestroyData"); + mOutputListener.Disconnect(); + + aData->Close(); + NS_DispatchToMainThread( + NS_NewRunnableFunction("DecodedStream::DestroyData", + [data = std::move(aData)]() { data->Forget(); })); +} + +void DecodedStream::SetPlaying(bool aPlaying) { + AssertOwnerThread(); + + // Resume/pause matters only when playback started. + if (mStartTime.isNothing()) { + return; + } + + if (profiler_thread_is_being_profiled_for_markers()) { + nsPrintfCString markerString("Playing=%s", aPlaying ? "true" : "false"); + PLAYBACK_PROFILER_MARKER(markerString); + } + LOG_DS(LogLevel::Debug, "playing (%d) -> (%d)", mPlaying.Ref(), aPlaying); + mPlaying = aPlaying; +} + +void DecodedStream::SetVolume(double aVolume) { + AssertOwnerThread(); + if (profiler_thread_is_being_profiled_for_markers()) { + nsPrintfCString markerString("Volume=%f", aVolume); + PLAYBACK_PROFILER_MARKER(markerString); + } + if (mVolume == aVolume) { + return; + } + mVolume = aVolume; + if (mData && mData->mAudioTrack) { + mData->mAudioTrack->SetVolume(static_cast<float>(aVolume)); + } +} + +void DecodedStream::SetPlaybackRate(double aPlaybackRate) { + AssertOwnerThread(); + if (profiler_thread_is_being_profiled_for_markers()) { + nsPrintfCString markerString("PlaybackRate=%f", aPlaybackRate); + PLAYBACK_PROFILER_MARKER(markerString); + } + if (mPlaybackRate == aPlaybackRate) { + return; + } + mPlaybackRate = aPlaybackRate; + if (mData && mData->mAudioTrack) { + mData->mAudioTrack->SetPlaybackRate(static_cast<float>(aPlaybackRate)); + } +} + +void DecodedStream::SetPreservesPitch(bool aPreservesPitch) { + AssertOwnerThread(); + if (profiler_thread_is_being_profiled_for_markers()) { + nsPrintfCString markerString("PreservesPitch=%s", + aPreservesPitch ? "true" : "false"); + PLAYBACK_PROFILER_MARKER(markerString); + } + if (mPreservesPitch == aPreservesPitch) { + return; + } + mPreservesPitch = aPreservesPitch; + if (mData && mData->mAudioTrack) { + mData->mAudioTrack->SetPreservesPitch(aPreservesPitch); + } +} + +double DecodedStream::PlaybackRate() const { + AssertOwnerThread(); + return mPlaybackRate; +} + +void DecodedStream::SendAudio(const PrincipalHandle& aPrincipalHandle) { + AssertOwnerThread(); + + if (!mInfo.HasAudio()) { + return; + } + + if (mData->mHaveSentFinishAudio) { + return; + } + + TRACE("DecodedStream::SendAudio"); + // It's OK to hold references to the AudioData because AudioData + // is ref-counted. + AutoTArray<RefPtr<AudioData>, 10> audio; + mAudioQueue.GetElementsAfter(mData->mNextAudioTime, &audio); + + // This will happen everytime when the media sink switches from `AudioSink` to + // `DecodedStream`. If we don't insert the silence then the A/V will be out of + // sync. + RefPtr<AudioData> nextAudio = audio.IsEmpty() ? nullptr : audio[0]; + if (RefPtr<AudioData> silence = CreateSilenceDataIfGapExists(nextAudio)) { + LOG_DS(LogLevel::Verbose, "Detect a gap in audio, insert silence=%u", + silence->Frames()); + audio.InsertElementAt(0, silence); + } + + // Append data which hasn't been sent to audio track before. + mData->mAudioTrack->AppendData(audio, aPrincipalHandle); + for (uint32_t i = 0; i < audio.Length(); ++i) { + CheckIsDataAudible(audio[i]); + mData->mNextAudioTime = audio[i]->GetEndTime(); + mData->mAudioFramesWritten += audio[i]->Frames(); + } + + if (mAudioQueue.IsFinished() && !mData->mHaveSentFinishAudio) { + mData->mAudioTrack->NotifyEndOfStream(); + mData->mHaveSentFinishAudio = true; + } +} + +already_AddRefed<AudioData> DecodedStream::CreateSilenceDataIfGapExists( + RefPtr<AudioData>& aNextAudio) { + AssertOwnerThread(); + if (!aNextAudio) { + return nullptr; + } + CheckedInt64 audioWrittenOffset = + mData->mAudioFramesWritten + + TimeUnitToFrames(*mStartTime, aNextAudio->mRate); + CheckedInt64 frameOffset = + TimeUnitToFrames(aNextAudio->mTime, aNextAudio->mRate); + if (audioWrittenOffset.value() >= frameOffset.value()) { + return nullptr; + } + // We've written less audio than our frame offset, return a silence data so we + // have enough audio to be at the correct offset for our current frames. + CheckedInt64 missingFrames = frameOffset - audioWrittenOffset; + AlignedAudioBuffer silenceBuffer(missingFrames.value() * + aNextAudio->mChannels); + if (!silenceBuffer) { + NS_WARNING("OOM in DecodedStream::CreateSilenceDataIfGapExists"); + return nullptr; + } + auto duration = media::TimeUnit(missingFrames.value(), aNextAudio->mRate); + if (!duration.IsValid()) { + NS_WARNING("Int overflow in DecodedStream::CreateSilenceDataIfGapExists"); + return nullptr; + } + RefPtr<AudioData> silenceData = new AudioData( + aNextAudio->mOffset, aNextAudio->mTime, std::move(silenceBuffer), + aNextAudio->mChannels, aNextAudio->mRate); + MOZ_DIAGNOSTIC_ASSERT(duration == silenceData->mDuration, "must be equal"); + return silenceData.forget(); +} + +void DecodedStream::CheckIsDataAudible(const AudioData* aData) { + MOZ_ASSERT(aData); + + mAudibilityMonitor->Process(aData); + bool isAudible = mAudibilityMonitor->RecentlyAudible(); + + if (isAudible != mIsAudioDataAudible) { + mIsAudioDataAudible = isAudible; + mAudibleEvent.Notify(mIsAudioDataAudible); + } +} + +void DecodedStreamData::WriteVideoToSegment( + layers::Image* aImage, const TimeUnit& aStart, const TimeUnit& aEnd, + const gfx::IntSize& aIntrinsicSize, const TimeStamp& aTimeStamp, + VideoSegment* aOutput, const PrincipalHandle& aPrincipalHandle, + double aPlaybackRate) { + RefPtr<layers::Image> image = aImage; + aOutput->AppendFrame(image.forget(), aIntrinsicSize, aPrincipalHandle, false, + aTimeStamp); + // Extend this so we get accurate durations for all frames. + // Because this track is pushed, we need durations so the graph can track + // when playout of the track has finished. + MOZ_ASSERT(aPlaybackRate > 0); + TrackTime start = aStart.ToTicksAtRate(mVideoTrack->mSampleRate); + TrackTime end = aEnd.ToTicksAtRate(mVideoTrack->mSampleRate); + aOutput->ExtendLastFrameBy( + static_cast<TrackTime>((float)(end - start) / aPlaybackRate)); + + mLastVideoStartTime = Some(aStart); + mLastVideoEndTime = Some(aEnd); + mLastVideoTimeStamp = aTimeStamp; +} + +static bool ZeroDurationAtLastChunk(VideoSegment& aInput) { + // Get the last video frame's start time in VideoSegment aInput. + // If the start time is equal to the duration of aInput, means the last video + // frame's duration is zero. + TrackTime lastVideoStratTime; + aInput.GetLastFrame(&lastVideoStratTime); + return lastVideoStratTime == aInput.GetDuration(); +} + +void DecodedStream::ResetAudio() { + AssertOwnerThread(); + + if (!mData) { + return; + } + + if (!mInfo.HasAudio()) { + return; + } + + TRACE("DecodedStream::ResetAudio"); + mData->mAudioTrack->ClearFutureData(); + if (const RefPtr<AudioData>& v = mAudioQueue.PeekFront()) { + mData->mNextAudioTime = v->mTime; + mData->mHaveSentFinishAudio = false; + } +} + +void DecodedStream::ResetVideo(const PrincipalHandle& aPrincipalHandle) { + AssertOwnerThread(); + + if (!mData) { + return; + } + + if (!mInfo.HasVideo()) { + return; + } + + TRACE("DecodedStream::ResetVideo"); + TrackTime cleared = mData->mVideoTrack->ClearFutureData(); + mData->mVideoTrackWritten -= cleared; + if (mData->mHaveSentFinishVideo && cleared > 0) { + mData->mHaveSentFinishVideo = false; + mData->mListener->EndVideoTrackAt(mData->mVideoTrack, TRACK_TIME_MAX); + } + + VideoSegment resetter; + TimeStamp currentTime; + TimeUnit currentPosition = GetPosition(¤tTime); + + // Giving direct consumers a frame (really *any* frame, so in this case: + // nullptr) at an earlier time than the previous, will signal to that consumer + // to discard any frames ahead in time of the new frame. To be honest, this is + // an ugly hack because the direct listeners of the MediaTrackGraph do not + // have an API that supports clearing the future frames. ImageContainer and + // VideoFrameContainer do though, and we will need to move to a similar API + // for video tracks as part of bug 1493618. + resetter.AppendFrame(nullptr, mData->mLastVideoImageDisplaySize, + aPrincipalHandle, false, currentTime); + mData->mVideoTrack->AppendData(&resetter); + + // Consumer buffers have been reset. We now set the next time to the start + // time of the current frame, so that it can be displayed again on resuming. + if (RefPtr<VideoData> v = mVideoQueue.PeekFront()) { + mData->mLastVideoStartTime = Some(v->mTime - TimeUnit::FromMicroseconds(1)); + mData->mLastVideoEndTime = Some(v->mTime); + } else { + // There was no current frame in the queue. We set the next time to the + // current time, so we at least don't resume starting in the future. + mData->mLastVideoStartTime = + Some(currentPosition - TimeUnit::FromMicroseconds(1)); + mData->mLastVideoEndTime = Some(currentPosition); + } + + mData->mLastVideoTimeStamp = currentTime; +} + +void DecodedStream::SendVideo(const PrincipalHandle& aPrincipalHandle) { + AssertOwnerThread(); + + if (!mInfo.HasVideo()) { + return; + } + + if (mData->mHaveSentFinishVideo) { + return; + } + + TRACE("DecodedStream::SendVideo"); + VideoSegment output; + AutoTArray<RefPtr<VideoData>, 10> video; + + // It's OK to hold references to the VideoData because VideoData + // is ref-counted. + mVideoQueue.GetElementsAfter( + mData->mLastVideoStartTime.valueOr(mStartTime.ref()), &video); + + TimeStamp currentTime; + TimeUnit currentPosition = GetPosition(¤tTime); + + if (mData->mLastVideoTimeStamp.IsNull()) { + mData->mLastVideoTimeStamp = currentTime; + } + + for (uint32_t i = 0; i < video.Length(); ++i) { + VideoData* v = video[i]; + TimeUnit lastStart = mData->mLastVideoStartTime.valueOr( + mStartTime.ref() - TimeUnit::FromMicroseconds(1)); + TimeUnit lastEnd = mData->mLastVideoEndTime.valueOr(mStartTime.ref()); + + if (lastEnd < v->mTime) { + // Write last video frame to catch up. mLastVideoImage can be null here + // which is fine, it just means there's no video. + + // TODO: |mLastVideoImage| should come from the last image rendered + // by the state machine. This will avoid the black frame when capture + // happens in the middle of playback (especially in th middle of a + // video frame). E.g. if we have a video frame that is 30 sec long + // and capture happens at 15 sec, we'll have to append a black frame + // that is 15 sec long. + TimeStamp t = + std::max(mData->mLastVideoTimeStamp, + currentTime + (lastEnd - currentPosition).ToTimeDuration()); + mData->WriteVideoToSegment(mData->mLastVideoImage, lastEnd, v->mTime, + mData->mLastVideoImageDisplaySize, t, &output, + aPrincipalHandle, mPlaybackRate); + lastEnd = v->mTime; + } + + if (lastStart < v->mTime) { + // This frame starts after the last frame's start. Note that this could be + // before the last frame's end time for some videos. This only matters for + // the track's lifetime in the MTG, as rendering is based on timestamps, + // aka frame start times. + TimeStamp t = + std::max(mData->mLastVideoTimeStamp, + currentTime + (lastEnd - currentPosition).ToTimeDuration()); + TimeUnit end = std::max( + v->GetEndTime(), + lastEnd + TimeUnit::FromMicroseconds( + mData->mVideoTrack->TrackTimeToMicroseconds(1) + 1)); + mData->mLastVideoImage = v->mImage; + mData->mLastVideoImageDisplaySize = v->mDisplay; + mData->WriteVideoToSegment(v->mImage, lastEnd, end, v->mDisplay, t, + &output, aPrincipalHandle, mPlaybackRate); + } + } + + // Check the output is not empty. + bool compensateEOS = false; + bool forceBlack = false; + if (output.GetLastFrame()) { + compensateEOS = ZeroDurationAtLastChunk(output); + } + + if (output.GetDuration() > 0) { + mData->mVideoTrackWritten += mData->mVideoTrack->AppendData(&output); + } + + if (mVideoQueue.IsFinished() && !mData->mHaveSentFinishVideo) { + if (!mData->mLastVideoImage) { + // We have video, but the video queue finished before we received any + // frame. We insert a black frame to progress any consuming + // HTMLMediaElement. This mirrors the behavior of VideoSink. + + // Force a frame - can be null + compensateEOS = true; + // Force frame to be black + forceBlack = true; + // Override the frame's size (will be 0x0 otherwise) + mData->mLastVideoImageDisplaySize = mInfo.mVideo.mDisplay; + LOG_DS(LogLevel::Debug, "No mLastVideoImage"); + } + if (compensateEOS) { + VideoSegment endSegment; + auto start = mData->mLastVideoEndTime.valueOr(mStartTime.ref()); + mData->WriteVideoToSegment( + mData->mLastVideoImage, start, start, + mData->mLastVideoImageDisplaySize, + currentTime + (start - currentPosition).ToTimeDuration(), &endSegment, + aPrincipalHandle, mPlaybackRate); + // ForwardedInputTrack drops zero duration frames, even at the end of + // the track. Give the frame a minimum duration so that it is not + // dropped. + endSegment.ExtendLastFrameBy(1); + LOG_DS(LogLevel::Debug, + "compensateEOS: start %s, duration %" PRId64 + ", mPlaybackRate %lf, sample rate %" PRId32, + start.ToString().get(), endSegment.GetDuration(), mPlaybackRate, + mData->mVideoTrack->mSampleRate); + MOZ_ASSERT(endSegment.GetDuration() > 0); + if (forceBlack) { + endSegment.ReplaceWithDisabled(); + } + mData->mVideoTrackWritten += mData->mVideoTrack->AppendData(&endSegment); + } + mData->mListener->EndVideoTrackAt(mData->mVideoTrack, + mData->mVideoTrackWritten); + mData->mHaveSentFinishVideo = true; + } +} + +void DecodedStream::SendData() { + AssertOwnerThread(); + + // Not yet created on the main thread. MDSM will try again later. + if (!mData) { + return; + } + + if (!mPlaying) { + return; + } + + LOG_DS(LogLevel::Verbose, "SendData()"); + SendAudio(mPrincipalHandle); + SendVideo(mPrincipalHandle); +} + +TimeUnit DecodedStream::GetEndTime(TrackType aType) const { + AssertOwnerThread(); + TRACE("DecodedStream::GetEndTime"); + if (aType == TrackInfo::kAudioTrack && mInfo.HasAudio() && mData) { + auto t = mStartTime.ref() + + media::TimeUnit(mData->mAudioFramesWritten, mInfo.mAudio.mRate); + if (t.IsValid()) { + return t; + } + } else if (aType == TrackInfo::kVideoTrack && mData) { + return mData->mLastVideoEndTime.valueOr(mStartTime.ref()); + } + return TimeUnit::Zero(); +} + +TimeUnit DecodedStream::GetPosition(TimeStamp* aTimeStamp) { + AssertOwnerThread(); + TRACE("DecodedStream::GetPosition"); + // This is only called after MDSM starts playback. So mStartTime is + // guaranteed to be something. + MOZ_ASSERT(mStartTime.isSome()); + if (aTimeStamp) { + *aTimeStamp = TimeStamp::Now(); + } + return mStartTime.ref() + mLastOutputTime; +} + +void DecodedStream::NotifyOutput(int64_t aTime) { + AssertOwnerThread(); + TimeUnit time = TimeUnit::FromMicroseconds(aTime); + if (time == mLastOutputTime) { + return; + } + MOZ_ASSERT(mLastOutputTime < time); + mLastOutputTime = time; + auto currentTime = GetPosition(); + + if (profiler_thread_is_being_profiled_for_markers()) { + nsPrintfCString markerString("OutputTime=%" PRId64, + currentTime.ToMicroseconds()); + PLAYBACK_PROFILER_MARKER(markerString); + } + LOG_DS(LogLevel::Verbose, "time is now %" PRId64, + currentTime.ToMicroseconds()); + + // Remove audio samples that have been played by MTG from the queue. + RefPtr<AudioData> a = mAudioQueue.PeekFront(); + for (; a && a->GetEndTime() <= currentTime;) { + LOG_DS(LogLevel::Debug, "Dropping audio [%" PRId64 ",%" PRId64 "]", + a->mTime.ToMicroseconds(), a->GetEndTime().ToMicroseconds()); + RefPtr<AudioData> releaseMe = mAudioQueue.PopFront(); + a = mAudioQueue.PeekFront(); + } +} + +void DecodedStream::PlayingChanged() { + AssertOwnerThread(); + TRACE("DecodedStream::PlayingChanged"); + + if (!mPlaying) { + // On seek or pause we discard future frames. + ResetVideo(mPrincipalHandle); + ResetAudio(); + } +} + +void DecodedStream::ConnectListener() { + AssertOwnerThread(); + + mAudioPushListener = mAudioQueue.PushEvent().Connect( + mOwnerThread, this, &DecodedStream::SendData); + mAudioFinishListener = mAudioQueue.FinishEvent().Connect( + mOwnerThread, this, &DecodedStream::SendData); + mVideoPushListener = mVideoQueue.PushEvent().Connect( + mOwnerThread, this, &DecodedStream::SendData); + mVideoFinishListener = mVideoQueue.FinishEvent().Connect( + mOwnerThread, this, &DecodedStream::SendData); + mWatchManager.Watch(mPlaying, &DecodedStream::SendData); +} + +void DecodedStream::DisconnectListener() { + AssertOwnerThread(); + + mAudioPushListener.Disconnect(); + mVideoPushListener.Disconnect(); + mAudioFinishListener.Disconnect(); + mVideoFinishListener.Disconnect(); + mWatchManager.Unwatch(mPlaying, &DecodedStream::SendData); +} + +void DecodedStream::GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) { + AssertOwnerThread(); + int64_t startTime = mStartTime.isSome() ? mStartTime->ToMicroseconds() : -1; + aInfo.mDecodedStream.mInstance = + NS_ConvertUTF8toUTF16(nsPrintfCString("%p", this)); + aInfo.mDecodedStream.mStartTime = startTime; + aInfo.mDecodedStream.mLastOutputTime = mLastOutputTime.ToMicroseconds(); + aInfo.mDecodedStream.mPlaying = mPlaying.Ref(); + auto lastAudio = mAudioQueue.PeekBack(); + aInfo.mDecodedStream.mLastAudio = + lastAudio ? lastAudio->GetEndTime().ToMicroseconds() : -1; + aInfo.mDecodedStream.mAudioQueueFinished = mAudioQueue.IsFinished(); + aInfo.mDecodedStream.mAudioQueueSize = + AssertedCast<int>(mAudioQueue.GetSize()); + if (mData) { + mData->GetDebugInfo(aInfo.mDecodedStream.mData); + } +} + +#undef LOG_DS + +} // namespace mozilla diff --git a/dom/media/mediasink/DecodedStream.h b/dom/media/mediasink/DecodedStream.h new file mode 100644 index 0000000000..4709ffeda6 --- /dev/null +++ b/dom/media/mediasink/DecodedStream.h @@ -0,0 +1,154 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef DecodedStream_h_ +#define DecodedStream_h_ + +#include "AudibilityMonitor.h" +#include "MediaEventSource.h" +#include "MediaInfo.h" +#include "MediaSegment.h" +#include "MediaSink.h" + +#include "mozilla/AbstractThread.h" +#include "mozilla/Maybe.h" +#include "mozilla/MozPromise.h" +#include "mozilla/RefPtr.h" +#include "mozilla/StateMirroring.h" +#include "mozilla/UniquePtr.h" + +namespace mozilla { + +class DecodedStreamData; +class MediaDecoderStateMachine; +class AudioData; +class VideoData; +struct PlaybackInfoInit; +class ProcessedMediaTrack; +class TimeStamp; + +template <class T> +class MediaQueue; + +class DecodedStream : public MediaSink { + public: + DecodedStream(MediaDecoderStateMachine* aStateMachine, + nsMainThreadPtrHandle<SharedDummyTrack> aDummyTrack, + CopyableTArray<RefPtr<ProcessedMediaTrack>> aOutputTracks, + double aVolume, double aPlaybackRate, bool aPreservesPitch, + MediaQueue<AudioData>& aAudioQueue, + MediaQueue<VideoData>& aVideoQueue, + RefPtr<AudioDeviceInfo> aAudioDevice); + + RefPtr<EndedPromise> OnEnded(TrackType aType) override; + media::TimeUnit GetEndTime(TrackType aType) const override; + media::TimeUnit GetPosition(TimeStamp* aTimeStamp = nullptr) override; + bool HasUnplayedFrames(TrackType aType) const override { + // TODO: bug 1755026 + return false; + } + + media::TimeUnit UnplayedDuration(TrackType aType) const override { + // TODO: bug 1755026 + return media::TimeUnit::Zero(); + } + + void SetVolume(double aVolume) override; + void SetPlaybackRate(double aPlaybackRate) override; + void SetPreservesPitch(bool aPreservesPitch) override; + void SetPlaying(bool aPlaying) override; + + double PlaybackRate() const override; + + nsresult Start(const media::TimeUnit& aStartTime, + const MediaInfo& aInfo) override; + void Stop() override; + bool IsStarted() const override; + bool IsPlaying() const override; + void Shutdown() override; + void GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) override; + const AudioDeviceInfo* AudioDevice() const override { return mAudioDevice; } + + MediaEventSource<bool>& AudibleEvent() { return mAudibleEvent; } + + protected: + virtual ~DecodedStream(); + + private: + void DestroyData(UniquePtr<DecodedStreamData>&& aData); + void SendAudio(const PrincipalHandle& aPrincipalHandle); + void SendVideo(const PrincipalHandle& aPrincipalHandle); + void ResetAudio(); + void ResetVideo(const PrincipalHandle& aPrincipalHandle); + void SendData(); + void NotifyOutput(int64_t aTime); + void CheckIsDataAudible(const AudioData* aData); + + void AssertOwnerThread() const { + MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn()); + } + + void PlayingChanged(); + + void ConnectListener(); + void DisconnectListener(); + + // Give the audio that is going to be appended next as an input, if there is + // a gap between audio's time and the frames that we've written, then return + // a silence data that has same amount of frames and can be used to fill the + // gap. If no gap exists, return nullptr. + already_AddRefed<AudioData> CreateSilenceDataIfGapExists( + RefPtr<AudioData>& aNextAudio); + + const RefPtr<AbstractThread> mOwnerThread; + + // Used to access the graph. + const nsMainThreadPtrHandle<SharedDummyTrack> mDummyTrack; + + /* + * Worker thread only members. + */ + WatchManager<DecodedStream> mWatchManager; + UniquePtr<DecodedStreamData> mData; + RefPtr<EndedPromise> mAudioEndedPromise; + RefPtr<EndedPromise> mVideoEndedPromise; + + Watchable<bool> mPlaying; + Mirror<PrincipalHandle> mPrincipalHandle; + AbstractCanonical<PrincipalHandle>* mCanonicalOutputPrincipal; + const nsTArray<RefPtr<ProcessedMediaTrack>> mOutputTracks; + + double mVolume; + double mPlaybackRate; + bool mPreservesPitch; + + media::NullableTimeUnit mStartTime; + media::TimeUnit mLastOutputTime; + MediaInfo mInfo; + // True when stream is producing audible sound, false when stream is silent. + bool mIsAudioDataAudible = false; + Maybe<AudibilityMonitor> mAudibilityMonitor; + MediaEventProducer<bool> mAudibleEvent; + + MediaQueue<AudioData>& mAudioQueue; + MediaQueue<VideoData>& mVideoQueue; + + // This is the audio device we were told to play out to. + // All audio is captured, so nothing is actually played out -- but we report + // this upwards as it could save us from being recreated when the sink + // changes. + const RefPtr<AudioDeviceInfo> mAudioDevice; + + MediaEventListener mAudioPushListener; + MediaEventListener mVideoPushListener; + MediaEventListener mAudioFinishListener; + MediaEventListener mVideoFinishListener; + MediaEventListener mOutputListener; +}; + +} // namespace mozilla + +#endif // DecodedStream_h_ diff --git a/dom/media/mediasink/MediaSink.h b/dom/media/mediasink/MediaSink.h new file mode 100644 index 0000000000..de6f26dcc9 --- /dev/null +++ b/dom/media/mediasink/MediaSink.h @@ -0,0 +1,142 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MediaSink_h_ +#define MediaSink_h_ + +#include "MediaInfo.h" +#include "mozilla/MozPromise.h" +#include "mozilla/RefPtr.h" +#include "mozilla/dom/MediaDebugInfoBinding.h" +#include "nsISupportsImpl.h" + +class AudioDeviceInfo; + +namespace mozilla { + +class TimeStamp; +class VideoFrameContainer; + +/** + * A consumer of audio/video data which plays audio and video tracks and + * manages A/V sync between them. + * + * A typical sink sends audio/video outputs to the speaker and screen. + * However, there are also sinks which capture the output of an media element + * and send the output to a MediaStream. + * + * This class is used to move A/V sync management and audio/video rendering + * out of MDSM so it is possible for subclasses to do external rendering using + * specific hardware which is required by TV projects and CDM. + * + * Note this class is not thread-safe and should be called from the state + * machine thread only. + */ +class MediaSink { + public: + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MediaSink); + typedef mozilla::TrackInfo::TrackType TrackType; + + // EndedPromise needs to be a non-exclusive promise as it is shared between + // both the AudioSink and VideoSink. + typedef MozPromise<bool, nsresult, /* IsExclusive = */ false> EndedPromise; + + // Return a promise which is resolved when the track finishes + // or null if no such track. + // Must be called after playback starts. + virtual RefPtr<EndedPromise> OnEnded(TrackType aType) = 0; + + // Return the end time of the audio/video data that has been consumed + // or 0 if no such track. + // Must be called after playback starts. + virtual media::TimeUnit GetEndTime(TrackType aType) const = 0; + + // Return playback position of the media. + // Since A/V sync is always maintained by this sink, there is no need to + // specify whether we want to get audio or video position. + // aTimeStamp returns the timeStamp corresponding to the returned position + // which is used by the compositor to derive the render time of video frames. + // Must be called after playback starts. + virtual media::TimeUnit GetPosition(TimeStamp* aTimeStamp = nullptr) = 0; + + // Return true if there are data consumed but not played yet. + // Can be called in any state. + virtual bool HasUnplayedFrames(TrackType aType) const = 0; + + // Return the duration of data consumed but not played yet. + // Can be called in any state. + virtual media::TimeUnit UnplayedDuration(TrackType aType) const = 0; + + // Set volume of the audio track. + // Do nothing if this sink has no audio track. + // Can be called in any state. + virtual void SetVolume(double aVolume) {} + + // Set the audio stream name. + // Does nothing if this sink has no audio stream. + // Can be called in any state. + virtual void SetStreamName(const nsAString& aStreamName) {} + + // Set the playback rate. + // Can be called in any state. + virtual void SetPlaybackRate(double aPlaybackRate) {} + + // Whether to preserve pitch of the audio track. + // Do nothing if this sink has no audio track. + // Can be called in any state. + virtual void SetPreservesPitch(bool aPreservesPitch) {} + + // Pause/resume the playback. Only work after playback starts. + virtual void SetPlaying(bool aPlaying) = 0; + + // Get the playback rate. + // Can be called in any state. + virtual double PlaybackRate() const = 0; + + // Single frame rendering operation may need to be done before playback + // started (1st frame) or right after seek completed or playback stopped. + // Do nothing if this sink has no video track. Can be called in any state. + virtual void Redraw(const VideoInfo& aInfo){}; + + // Begin a playback session with the provided start time and media info. + // Must be called when playback is stopped. + virtual nsresult Start(const media::TimeUnit& aStartTime, + const MediaInfo& aInfo) = 0; + + // Finish a playback session. + // Must be called after playback starts. + virtual void Stop() = 0; + + // Return true if playback has started. + // Can be called in any state. + virtual bool IsStarted() const = 0; + + // Return true if playback is started and not paused otherwise false. + // Can be called in any state. + virtual bool IsPlaying() const = 0; + + // The audio output device this MediaSink is playing audio data to. The + // default device is used if this returns null. + virtual const AudioDeviceInfo* AudioDevice() const = 0; + + // Called on the state machine thread to shut down the sink. All resources + // allocated by this sink should be released. + // Must be called after playback stopped. + virtual void Shutdown() {} + + virtual void SetSecondaryVideoContainer(VideoFrameContainer* aSecondary) {} + + virtual void GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) {} + + virtual void EnableTreatAudioUnderrunAsSilence(bool aEnabled) {} + + protected: + virtual ~MediaSink() = default; +}; + +} // namespace mozilla + +#endif // MediaSink_h_ diff --git a/dom/media/mediasink/VideoSink.cpp b/dom/media/mediasink/VideoSink.cpp new file mode 100644 index 0000000000..906efdf0db --- /dev/null +++ b/dom/media/mediasink/VideoSink.cpp @@ -0,0 +1,706 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef XP_WIN +// Include Windows headers required for enabling high precision timers. +# include <windows.h> +# include <mmsystem.h> +#endif + +#include "VideoSink.h" + +#include "MediaQueue.h" +#include "VideoUtils.h" + +#include "mozilla/IntegerPrintfMacros.h" +#include "mozilla/ProfilerLabels.h" +#include "mozilla/ProfilerMarkerTypes.h" +#include "mozilla/StaticPrefs_browser.h" +#include "mozilla/StaticPrefs_media.h" + +namespace mozilla { +extern LazyLogModule gMediaDecoderLog; +} + +#undef FMT + +#define FMT(x, ...) "VideoSink=%p " x, this, ##__VA_ARGS__ +#define VSINK_LOG(x, ...) \ + MOZ_LOG(gMediaDecoderLog, LogLevel::Debug, (FMT(x, ##__VA_ARGS__))) +#define VSINK_LOG_V(x, ...) \ + MOZ_LOG(gMediaDecoderLog, LogLevel::Verbose, (FMT(x, ##__VA_ARGS__))) + +namespace mozilla { + +using namespace mozilla::layers; + +// Minimum update frequency is 1/120th of a second, i.e. half the +// duration of a 60-fps frame. +static const int64_t MIN_UPDATE_INTERVAL_US = 1000000 / (60 * 2); + +static void SetImageToGreenPixel(PlanarYCbCrImage* aImage) { + static uint8_t greenPixel[] = {0x00, 0x00, 0x00}; + PlanarYCbCrData data; + data.mYChannel = greenPixel; + data.mCbChannel = greenPixel + 1; + data.mCrChannel = greenPixel + 2; + data.mYStride = data.mCbCrStride = 1; + data.mPictureRect = gfx::IntRect(0, 0, 1, 1); + data.mYUVColorSpace = gfx::YUVColorSpace::BT601; + aImage->CopyData(data); +} + +VideoSink::VideoSink(AbstractThread* aThread, MediaSink* aAudioSink, + MediaQueue<VideoData>& aVideoQueue, + VideoFrameContainer* aContainer, + FrameStatistics& aFrameStats, + uint32_t aVQueueSentToCompositerSize) + : mOwnerThread(aThread), + mAudioSink(aAudioSink), + mVideoQueue(aVideoQueue), + mContainer(aContainer), + mProducerID(ImageContainer::AllocateProducerID()), + mFrameStats(aFrameStats), + mOldCompositorDroppedCount(mContainer ? mContainer->GetDroppedImageCount() + : 0), + mPendingDroppedCount(0), + mHasVideo(false), + mUpdateScheduler(aThread), + mVideoQueueSendToCompositorSize(aVQueueSentToCompositerSize), + mMinVideoQueueSize(StaticPrefs::media_ruin_av_sync_enabled() ? 1 : 0) +#ifdef XP_WIN + , + mHiResTimersRequested(false) +#endif +{ + MOZ_ASSERT(mAudioSink, "AudioSink should exist."); + + if (StaticPrefs::browser_measurement_render_anims_and_video_solid() && + mContainer) { + InitializeBlankImage(); + MOZ_ASSERT(mBlankImage, "Blank image should exist."); + } +} + +VideoSink::~VideoSink() { +#ifdef XP_WIN + MOZ_ASSERT(!mHiResTimersRequested); +#endif +} + +RefPtr<VideoSink::EndedPromise> VideoSink::OnEnded(TrackType aType) { + AssertOwnerThread(); + MOZ_ASSERT(mAudioSink->IsStarted(), "Must be called after playback starts."); + + if (aType == TrackInfo::kAudioTrack) { + return mAudioSink->OnEnded(aType); + } else if (aType == TrackInfo::kVideoTrack) { + return mEndPromise; + } + return nullptr; +} + +media::TimeUnit VideoSink::GetEndTime(TrackType aType) const { + AssertOwnerThread(); + MOZ_ASSERT(mAudioSink->IsStarted(), "Must be called after playback starts."); + + if (aType == TrackInfo::kVideoTrack) { + return mVideoFrameEndTime; + } else if (aType == TrackInfo::kAudioTrack) { + return mAudioSink->GetEndTime(aType); + } + return media::TimeUnit::Zero(); +} + +media::TimeUnit VideoSink::GetPosition(TimeStamp* aTimeStamp) { + AssertOwnerThread(); + return mAudioSink->GetPosition(aTimeStamp); +} + +bool VideoSink::HasUnplayedFrames(TrackType aType) const { + AssertOwnerThread(); + MOZ_ASSERT(aType == TrackInfo::kAudioTrack, + "Not implemented for non audio tracks."); + + return mAudioSink->HasUnplayedFrames(aType); +} + +media::TimeUnit VideoSink::UnplayedDuration(TrackType aType) const { + AssertOwnerThread(); + MOZ_ASSERT(aType == TrackInfo::kAudioTrack, + "Not implemented for non audio tracks."); + + return mAudioSink->UnplayedDuration(aType); +} + +void VideoSink::SetPlaybackRate(double aPlaybackRate) { + AssertOwnerThread(); + + mAudioSink->SetPlaybackRate(aPlaybackRate); +} + +void VideoSink::SetVolume(double aVolume) { + AssertOwnerThread(); + + mAudioSink->SetVolume(aVolume); +} + +void VideoSink::SetStreamName(const nsAString& aStreamName) { + AssertOwnerThread(); + + mAudioSink->SetStreamName(aStreamName); +} + +void VideoSink::SetPreservesPitch(bool aPreservesPitch) { + AssertOwnerThread(); + + mAudioSink->SetPreservesPitch(aPreservesPitch); +} + +double VideoSink::PlaybackRate() const { + AssertOwnerThread(); + + return mAudioSink->PlaybackRate(); +} + +void VideoSink::EnsureHighResTimersOnOnlyIfPlaying() { +#ifdef XP_WIN + const bool needed = IsPlaying(); + if (needed == mHiResTimersRequested) { + return; + } + if (needed) { + // Ensure high precision timers are enabled on Windows, otherwise the + // VideoSink isn't woken up at reliable intervals to set the next frame, and + // we drop frames while painting. Note that each call must be matched by a + // corresponding timeEndPeriod() call. Enabling high precision timers causes + // the CPU to wake up more frequently on Windows 7 and earlier, which causes + // more CPU load and battery use. So we only enable high precision timers + // when we're actually playing. + timeBeginPeriod(1); + } else { + timeEndPeriod(1); + } + mHiResTimersRequested = needed; +#endif +} + +void VideoSink::SetPlaying(bool aPlaying) { + AssertOwnerThread(); + VSINK_LOG_V(" playing (%d) -> (%d)", mAudioSink->IsPlaying(), aPlaying); + + if (!aPlaying) { + // Reset any update timer if paused. + mUpdateScheduler.Reset(); + // Since playback is paused, tell compositor to render only current frame. + TimeStamp nowTime; + const auto clockTime = mAudioSink->GetPosition(&nowTime); + RenderVideoFrames(1, clockTime.ToMicroseconds(), nowTime); + if (mContainer) { + mContainer->ClearCachedResources(); + } + if (mSecondaryContainer) { + mSecondaryContainer->ClearCachedResources(); + } + } + + mAudioSink->SetPlaying(aPlaying); + + if (mHasVideo && aPlaying) { + // There's no thread in VideoSink for pulling video frames, need to trigger + // rendering while becoming playing status. because the VideoQueue may be + // full already. + TryUpdateRenderedVideoFrames(); + } + + EnsureHighResTimersOnOnlyIfPlaying(); +} + +nsresult VideoSink::Start(const media::TimeUnit& aStartTime, + const MediaInfo& aInfo) { + AssertOwnerThread(); + VSINK_LOG("[%s]", __func__); + + nsresult rv = mAudioSink->Start(aStartTime, aInfo); + + mHasVideo = aInfo.HasVideo(); + + if (mHasVideo) { + mEndPromise = mEndPromiseHolder.Ensure(__func__); + + // If the underlying MediaSink has an end promise for the video track (which + // happens when mAudioSink refers to a DecodedStream), we must wait for it + // to complete before resolving our own end promise. Otherwise, MDSM might + // stop playback before DecodedStream plays to the end and cause + // test_streams_element_capture.html to time out. + RefPtr<EndedPromise> p = mAudioSink->OnEnded(TrackInfo::kVideoTrack); + if (p) { + RefPtr<VideoSink> self = this; + p->Then( + mOwnerThread, __func__, + [self]() { + self->mVideoSinkEndRequest.Complete(); + self->TryUpdateRenderedVideoFrames(); + // It is possible the video queue size is 0 and we have no + // frames to render. However, we need to call + // MaybeResolveEndPromise() to ensure mEndPromiseHolder is + // resolved. + self->MaybeResolveEndPromise(); + }, + [self]() { + self->mVideoSinkEndRequest.Complete(); + self->TryUpdateRenderedVideoFrames(); + self->MaybeResolveEndPromise(); + }) + ->Track(mVideoSinkEndRequest); + } + + ConnectListener(); + // Run the render loop at least once so we can resolve the end promise + // when video duration is 0. + UpdateRenderedVideoFrames(); + } + return rv; +} + +void VideoSink::Stop() { + AssertOwnerThread(); + MOZ_ASSERT(mAudioSink->IsStarted(), "playback not started."); + VSINK_LOG("[%s]", __func__); + + mAudioSink->Stop(); + + mUpdateScheduler.Reset(); + if (mHasVideo) { + DisconnectListener(); + mVideoSinkEndRequest.DisconnectIfExists(); + mEndPromiseHolder.ResolveIfExists(true, __func__); + mEndPromise = nullptr; + } + mVideoFrameEndTime = media::TimeUnit::Zero(); + + EnsureHighResTimersOnOnlyIfPlaying(); +} + +bool VideoSink::IsStarted() const { + AssertOwnerThread(); + + return mAudioSink->IsStarted(); +} + +bool VideoSink::IsPlaying() const { + AssertOwnerThread(); + + return mAudioSink->IsPlaying(); +} + +const AudioDeviceInfo* VideoSink::AudioDevice() const { + return mAudioSink->AudioDevice(); +} + +void VideoSink::Shutdown() { + AssertOwnerThread(); + MOZ_ASSERT(!mAudioSink->IsStarted(), "must be called after playback stops."); + VSINK_LOG("[%s]", __func__); + + mAudioSink->Shutdown(); +} + +void VideoSink::OnVideoQueuePushed(RefPtr<VideoData>&& aSample) { + AssertOwnerThread(); + // Listen to push event, VideoSink should try rendering ASAP if first frame + // arrives but update scheduler is not triggered yet. + if (!aSample->IsSentToCompositor()) { + // Since we push rendered frames back to the queue, we will receive + // push events for them. We only need to trigger render loop + // when this frame is not rendered yet. + TryUpdateRenderedVideoFrames(); + } +} + +void VideoSink::OnVideoQueueFinished() { + AssertOwnerThread(); + // Run render loop if the end promise is not resolved yet. + if (!mUpdateScheduler.IsScheduled() && mAudioSink->IsPlaying() && + !mEndPromiseHolder.IsEmpty()) { + UpdateRenderedVideoFrames(); + } +} + +void VideoSink::Redraw(const VideoInfo& aInfo) { + AUTO_PROFILER_LABEL("VideoSink::Redraw", MEDIA_PLAYBACK); + AssertOwnerThread(); + + // No video track, nothing to draw. + if (!aInfo.IsValid() || !mContainer) { + return; + } + + auto now = TimeStamp::Now(); + + RefPtr<VideoData> video = VideoQueue().PeekFront(); + if (video) { + if (mBlankImage) { + video->mImage = mBlankImage; + } + video->MarkSentToCompositor(); + mContainer->SetCurrentFrame(video->mDisplay, video->mImage, now); + if (mSecondaryContainer) { + mSecondaryContainer->SetCurrentFrame(video->mDisplay, video->mImage, now); + } + return; + } + + // When we reach here, it means there are no frames in this video track. + // Draw a blank frame to ensure there is something in the image container + // to fire 'loadeddata'. + + RefPtr<Image> blank = + mContainer->GetImageContainer()->CreatePlanarYCbCrImage(); + mContainer->SetCurrentFrame(aInfo.mDisplay, blank, now); + + if (mSecondaryContainer) { + mSecondaryContainer->SetCurrentFrame(aInfo.mDisplay, blank, now); + } +} + +void VideoSink::TryUpdateRenderedVideoFrames() { + AUTO_PROFILER_LABEL("VideoSink::TryUpdateRenderedVideoFrames", + MEDIA_PLAYBACK); + AssertOwnerThread(); + if (mUpdateScheduler.IsScheduled() || !mAudioSink->IsPlaying()) { + return; + } + RefPtr<VideoData> v = VideoQueue().PeekFront(); + if (!v) { + // No frames to render. + return; + } + + TimeStamp nowTime; + const media::TimeUnit clockTime = mAudioSink->GetPosition(&nowTime); + if (clockTime >= v->mTime) { + // Time to render this frame. + UpdateRenderedVideoFrames(); + return; + } + + // If we send this future frame to the compositor now, it will be rendered + // immediately and break A/V sync. Instead, we schedule a timer to send it + // later. + int64_t delta = + (v->mTime - clockTime).ToMicroseconds() / mAudioSink->PlaybackRate(); + TimeStamp target = nowTime + TimeDuration::FromMicroseconds(delta); + RefPtr<VideoSink> self = this; + mUpdateScheduler.Ensure( + target, [self]() { self->UpdateRenderedVideoFramesByTimer(); }, + [self]() { self->UpdateRenderedVideoFramesByTimer(); }); +} + +void VideoSink::UpdateRenderedVideoFramesByTimer() { + AssertOwnerThread(); + mUpdateScheduler.CompleteRequest(); + UpdateRenderedVideoFrames(); +} + +void VideoSink::ConnectListener() { + AssertOwnerThread(); + mPushListener = VideoQueue().PushEvent().Connect( + mOwnerThread, this, &VideoSink::OnVideoQueuePushed); + mFinishListener = VideoQueue().FinishEvent().Connect( + mOwnerThread, this, &VideoSink::OnVideoQueueFinished); +} + +void VideoSink::DisconnectListener() { + AssertOwnerThread(); + mPushListener.Disconnect(); + mFinishListener.Disconnect(); +} + +void VideoSink::RenderVideoFrames(int32_t aMaxFrames, int64_t aClockTime, + const TimeStamp& aClockTimeStamp) { + AUTO_PROFILER_LABEL("VideoSink::RenderVideoFrames", MEDIA_PLAYBACK); + AssertOwnerThread(); + + AutoTArray<RefPtr<VideoData>, 16> frames; + VideoQueue().GetFirstElements(aMaxFrames, &frames); + if (frames.IsEmpty() || !mContainer) { + return; + } + + AutoTArray<ImageContainer::NonOwningImage, 16> images; + TimeStamp lastFrameTime; + double playbackRate = mAudioSink->PlaybackRate(); + for (uint32_t i = 0; i < frames.Length(); ++i) { + VideoData* frame = frames[i]; + bool wasSent = frame->IsSentToCompositor(); + frame->MarkSentToCompositor(); + + if (!frame->mImage || !frame->mImage->IsValid() || + !frame->mImage->GetSize().width || !frame->mImage->GetSize().height) { + continue; + } + + if (frame->mTime.IsNegative()) { + // Frame times before the start time are invalid; drop such frames + continue; + } + + MOZ_ASSERT(!aClockTimeStamp.IsNull()); + int64_t delta = frame->mTime.ToMicroseconds() - aClockTime; + TimeStamp t = + aClockTimeStamp + TimeDuration::FromMicroseconds(delta / playbackRate); + if (!lastFrameTime.IsNull() && t <= lastFrameTime) { + // Timestamps out of order; drop the new frame. In theory we should + // probably replace the previous frame with the new frame if the + // timestamps are equal, but this is a corrupt video file already so + // never mind. + continue; + } + MOZ_ASSERT(!t.IsNull()); + lastFrameTime = t; + + ImageContainer::NonOwningImage* img = images.AppendElement(); + img->mTimeStamp = t; + img->mImage = frame->mImage; + if (mBlankImage) { + img->mImage = mBlankImage; + } + img->mFrameID = frame->mFrameID; + img->mProducerID = mProducerID; + + VSINK_LOG_V("playing video frame %" PRId64 + " (id=%x, vq-queued=%zu, clock=%" PRId64 ")", + frame->mTime.ToMicroseconds(), frame->mFrameID, + VideoQueue().GetSize(), aClockTime); + if (!wasSent) { + PROFILER_MARKER("PlayVideo", MEDIA_PLAYBACK, {}, MediaSampleMarker, + frame->mTime.ToMicroseconds(), + frame->GetEndTime().ToMicroseconds(), + VideoQueue().GetSize()); + } + } + + if (images.Length() > 0) { + mContainer->SetCurrentFrames(frames[0]->mDisplay, images); + + if (mSecondaryContainer) { + mSecondaryContainer->SetCurrentFrames(frames[0]->mDisplay, images); + } + } +} + +void VideoSink::UpdateRenderedVideoFrames() { + AUTO_PROFILER_LABEL("VideoSink::UpdateRenderedVideoFrames", MEDIA_PLAYBACK); + AssertOwnerThread(); + MOZ_ASSERT(mAudioSink->IsPlaying(), "should be called while playing."); + + // Get the current playback position. + TimeStamp nowTime; + const auto clockTime = mAudioSink->GetPosition(&nowTime); + MOZ_ASSERT(!clockTime.IsNegative(), "Should have positive clock time."); + + uint32_t sentToCompositorCount = 0; + uint32_t droppedInSink = 0; + + // Skip frames up to the playback position. + media::TimeUnit lastFrameEndTime; + while (VideoQueue().GetSize() > mMinVideoQueueSize && + clockTime >= VideoQueue().PeekFront()->GetEndTime()) { + RefPtr<VideoData> frame = VideoQueue().PopFront(); + lastFrameEndTime = frame->GetEndTime(); + if (frame->IsSentToCompositor()) { + sentToCompositorCount++; + } else { + droppedInSink++; + VSINK_LOG_V("discarding video frame mTime=%" PRId64 + " clock_time=%" PRId64, + frame->mTime.ToMicroseconds(), clockTime.ToMicroseconds()); + + struct VideoSinkDroppedFrameMarker { + static constexpr Span<const char> MarkerTypeName() { + return MakeStringSpan("VideoSinkDroppedFrame"); + } + static void StreamJSONMarkerData( + baseprofiler::SpliceableJSONWriter& aWriter, + int64_t aSampleStartTimeUs, int64_t aSampleEndTimeUs, + int64_t aClockTimeUs) { + aWriter.IntProperty("sampleStartTimeUs", aSampleStartTimeUs); + aWriter.IntProperty("sampleEndTimeUs", aSampleEndTimeUs); + aWriter.IntProperty("clockTimeUs", aClockTimeUs); + } + static MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable}; + schema.AddKeyLabelFormat("sampleStartTimeUs", "Sample start time", + MS::Format::Microseconds); + schema.AddKeyLabelFormat("sampleEndTimeUs", "Sample end time", + MS::Format::Microseconds); + schema.AddKeyLabelFormat("clockTimeUs", "Audio clock time", + MS::Format::Microseconds); + return schema; + } + }; + profiler_add_marker( + "VideoSinkDroppedFrame", geckoprofiler::category::MEDIA_PLAYBACK, {}, + VideoSinkDroppedFrameMarker{}, frame->mTime.ToMicroseconds(), + frame->GetEndTime().ToMicroseconds(), clockTime.ToMicroseconds()); + } + } + + if (droppedInSink || sentToCompositorCount) { + uint32_t totalCompositorDroppedCount = mContainer->GetDroppedImageCount(); + uint32_t droppedInCompositor = + totalCompositorDroppedCount - mOldCompositorDroppedCount; + if (droppedInCompositor > 0) { + mOldCompositorDroppedCount = totalCompositorDroppedCount; + VSINK_LOG_V("%u video frame previously discarded by compositor", + droppedInCompositor); + } + mPendingDroppedCount += droppedInCompositor; + uint32_t droppedReported = mPendingDroppedCount > sentToCompositorCount + ? sentToCompositorCount + : mPendingDroppedCount; + mPendingDroppedCount -= droppedReported; + + mFrameStats.Accumulate({0, 0, sentToCompositorCount - droppedReported, 0, + droppedInSink, droppedInCompositor}); + } + + // The presentation end time of the last video frame displayed is either + // the end time of the current frame, or if we dropped all frames in the + // queue, the end time of the last frame we removed from the queue. + RefPtr<VideoData> currentFrame = VideoQueue().PeekFront(); + mVideoFrameEndTime = + std::max(mVideoFrameEndTime, + currentFrame ? currentFrame->GetEndTime() : lastFrameEndTime); + + RenderVideoFrames(mVideoQueueSendToCompositorSize, clockTime.ToMicroseconds(), + nowTime); + + MaybeResolveEndPromise(); + + // Get the timestamp of the next frame. Schedule the next update at + // the start time of the next frame. If we don't have a next frame, + // we will run render loops again upon incoming frames. + nsTArray<RefPtr<VideoData>> frames; + VideoQueue().GetFirstElements(2, &frames); + if (frames.Length() < 2) { + return; + } + + int64_t nextFrameTime = frames[1]->mTime.ToMicroseconds(); + int64_t delta = std::max(nextFrameTime - clockTime.ToMicroseconds(), + MIN_UPDATE_INTERVAL_US); + TimeStamp target = nowTime + TimeDuration::FromMicroseconds( + delta / mAudioSink->PlaybackRate()); + + RefPtr<VideoSink> self = this; + mUpdateScheduler.Ensure( + target, [self]() { self->UpdateRenderedVideoFramesByTimer(); }, + [self]() { self->UpdateRenderedVideoFramesByTimer(); }); +} + +void VideoSink::MaybeResolveEndPromise() { + AssertOwnerThread(); + // All frames are rendered, Let's resolve the promise. + if (VideoQueue().IsFinished() && VideoQueue().GetSize() <= 1 && + !mVideoSinkEndRequest.Exists()) { + if (VideoQueue().GetSize() == 1) { + // Remove the last frame since we have sent it to compositor. + RefPtr<VideoData> frame = VideoQueue().PopFront(); + if (mPendingDroppedCount > 0) { + mFrameStats.Accumulate({0, 0, 0, 0, 0, 1}); + mPendingDroppedCount--; + } else { + mFrameStats.NotifyPresentedFrame(); + } + } + + TimeStamp nowTime; + const auto clockTime = mAudioSink->GetPosition(&nowTime); + + // Clear future frames from the compositor, in case the playback position + // unexpectedly jumped to the end, and all frames between the previous + // playback position and the end were discarded. Old frames based on the + // previous playback position might still be queued in the compositor. See + // bug 1598143 for when this can happen. + mContainer->ClearFutureFrames(nowTime); + if (mSecondaryContainer) { + mSecondaryContainer->ClearFutureFrames(nowTime); + } + + if (clockTime < mVideoFrameEndTime) { + VSINK_LOG_V( + "Not reach video end time yet, reschedule timer to resolve " + "end promise. clockTime=%" PRId64 ", endTime=%" PRId64, + clockTime.ToMicroseconds(), mVideoFrameEndTime.ToMicroseconds()); + int64_t delta = (mVideoFrameEndTime - clockTime).ToMicroseconds() / + mAudioSink->PlaybackRate(); + TimeStamp target = nowTime + TimeDuration::FromMicroseconds(delta); + auto resolveEndPromise = [self = RefPtr<VideoSink>(this)]() { + self->mEndPromiseHolder.ResolveIfExists(true, __func__); + self->mUpdateScheduler.CompleteRequest(); + }; + mUpdateScheduler.Ensure(target, std::move(resolveEndPromise), + std::move(resolveEndPromise)); + } else { + mEndPromiseHolder.ResolveIfExists(true, __func__); + } + } +} + +void VideoSink::SetSecondaryVideoContainer(VideoFrameContainer* aSecondary) { + AssertOwnerThread(); + mSecondaryContainer = aSecondary; + if (!IsPlaying() && mSecondaryContainer) { + ImageContainer* mainImageContainer = mContainer->GetImageContainer(); + ImageContainer* secondaryImageContainer = + mSecondaryContainer->GetImageContainer(); + MOZ_DIAGNOSTIC_ASSERT(mainImageContainer); + MOZ_DIAGNOSTIC_ASSERT(secondaryImageContainer); + + // If the video isn't currently playing, get the current frame and display + // that in the secondary container as well. + AutoLockImage lockImage(mainImageContainer); + TimeStamp now = TimeStamp::Now(); + if (RefPtr<Image> image = lockImage.GetImage(now)) { + AutoTArray<ImageContainer::NonOwningImage, 1> currentFrame; + currentFrame.AppendElement(ImageContainer::NonOwningImage( + image, now, /* frameID */ 1, + /* producerId */ ImageContainer::AllocateProducerID())); + secondaryImageContainer->SetCurrentImages(currentFrame); + } + } +} + +void VideoSink::GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) { + AssertOwnerThread(); + aInfo.mVideoSink.mIsStarted = IsStarted(); + aInfo.mVideoSink.mIsPlaying = IsPlaying(); + aInfo.mVideoSink.mFinished = VideoQueue().IsFinished(); + aInfo.mVideoSink.mSize = VideoQueue().GetSize(); + aInfo.mVideoSink.mVideoFrameEndTime = mVideoFrameEndTime.ToMicroseconds(); + aInfo.mVideoSink.mHasVideo = mHasVideo; + aInfo.mVideoSink.mVideoSinkEndRequestExists = mVideoSinkEndRequest.Exists(); + aInfo.mVideoSink.mEndPromiseHolderIsEmpty = mEndPromiseHolder.IsEmpty(); + mAudioSink->GetDebugInfo(aInfo); +} + +bool VideoSink::InitializeBlankImage() { + mBlankImage = mContainer->GetImageContainer()->CreatePlanarYCbCrImage(); + if (mBlankImage == nullptr) { + return false; + } + SetImageToGreenPixel(mBlankImage->AsPlanarYCbCrImage()); + return true; +} + +void VideoSink::EnableTreatAudioUnderrunAsSilence(bool aEnabled) { + mAudioSink->EnableTreatAudioUnderrunAsSilence(aEnabled); +} + +} // namespace mozilla diff --git a/dom/media/mediasink/VideoSink.h b/dom/media/mediasink/VideoSink.h new file mode 100644 index 0000000000..7f2528d870 --- /dev/null +++ b/dom/media/mediasink/VideoSink.h @@ -0,0 +1,177 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef VideoSink_h_ +#define VideoSink_h_ + +#include "FrameStatistics.h" +#include "ImageContainer.h" +#include "MediaEventSource.h" +#include "MediaSink.h" +#include "MediaTimer.h" +#include "VideoFrameContainer.h" +#include "mozilla/AbstractThread.h" +#include "mozilla/MozPromise.h" +#include "mozilla/RefPtr.h" +#include "mozilla/TimeStamp.h" + +namespace mozilla { + +class VideoFrameContainer; +template <class T> +class MediaQueue; + +class VideoSink : public MediaSink { + typedef mozilla::layers::ImageContainer::ProducerID ProducerID; + + public: + VideoSink(AbstractThread* aThread, MediaSink* aAudioSink, + MediaQueue<VideoData>& aVideoQueue, VideoFrameContainer* aContainer, + FrameStatistics& aFrameStats, uint32_t aVQueueSentToCompositerSize); + + RefPtr<EndedPromise> OnEnded(TrackType aType) override; + + media::TimeUnit GetEndTime(TrackType aType) const override; + + media::TimeUnit GetPosition(TimeStamp* aTimeStamp = nullptr) override; + + bool HasUnplayedFrames(TrackType aType) const override; + media::TimeUnit UnplayedDuration(TrackType aType) const override; + + void SetPlaybackRate(double aPlaybackRate) override; + + void SetVolume(double aVolume) override; + + void SetStreamName(const nsAString& aStreamName) override; + + void SetPreservesPitch(bool aPreservesPitch) override; + + void SetPlaying(bool aPlaying) override; + + double PlaybackRate() const override; + + void Redraw(const VideoInfo& aInfo) override; + + nsresult Start(const media::TimeUnit& aStartTime, + const MediaInfo& aInfo) override; + + void Stop() override; + + bool IsStarted() const override; + + bool IsPlaying() const override; + + const AudioDeviceInfo* AudioDevice() const override; + + void Shutdown() override; + + void SetSecondaryVideoContainer(VideoFrameContainer* aSecondary) override; + + void GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) override; + + void EnableTreatAudioUnderrunAsSilence(bool aEnabled) override; + + private: + virtual ~VideoSink(); + + // VideoQueue listener related. + void OnVideoQueuePushed(RefPtr<VideoData>&& aSample); + void OnVideoQueueFinished(); + void ConnectListener(); + void DisconnectListener(); + + void EnsureHighResTimersOnOnlyIfPlaying(); + + // Sets VideoQueue images into the VideoFrameContainer. Called on the shared + // state machine thread. The first aMaxFrames (at most) are set. + // aClockTime and aClockTimeStamp are used as the baseline for deriving + // timestamps for the frames; when omitted, aMaxFrames must be 1 and + // a null timestamp is passed to the VideoFrameContainer. + // If the VideoQueue is empty, this does nothing. + void RenderVideoFrames(int32_t aMaxFrames, int64_t aClockTime = 0, + const TimeStamp& aClickTimeStamp = TimeStamp()); + + // Triggered while videosink is started, videosink becomes "playing" status, + // or VideoQueue event arrived. + void TryUpdateRenderedVideoFrames(); + + // If we have video, display a video frame if it's time for display has + // arrived, otherwise sleep until it's time for the next frame. Update the + // current frame time as appropriate, and trigger ready state update. + // Called on the shared state machine thread. + void UpdateRenderedVideoFrames(); + void UpdateRenderedVideoFramesByTimer(); + + void MaybeResolveEndPromise(); + + void AssertOwnerThread() const { + MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn()); + } + + MediaQueue<VideoData>& VideoQueue() const { return mVideoQueue; } + + const RefPtr<AbstractThread> mOwnerThread; + const RefPtr<MediaSink> mAudioSink; + MediaQueue<VideoData>& mVideoQueue; + VideoFrameContainer* mContainer; + RefPtr<VideoFrameContainer> mSecondaryContainer; + + // Producer ID to help ImageContainer distinguish different streams of + // FrameIDs. A unique and immutable value per VideoSink. + const ProducerID mProducerID; + + // Used to notify MediaDecoder's frame statistics + FrameStatistics& mFrameStats; + + RefPtr<EndedPromise> mEndPromise; + MozPromiseHolder<EndedPromise> mEndPromiseHolder; + MozPromiseRequestHolder<EndedPromise> mVideoSinkEndRequest; + + // The presentation end time of the last video frame which has been displayed. + media::TimeUnit mVideoFrameEndTime; + + uint32_t mOldCompositorDroppedCount; + uint32_t mPendingDroppedCount; + + // Event listeners for VideoQueue + MediaEventListener mPushListener; + MediaEventListener mFinishListener; + + // True if this sink is going to handle video track. + bool mHasVideo; + + // Used to trigger another update of rendered frames in next round. + DelayedScheduler mUpdateScheduler; + + // Max frame number sent to compositor at a time. + // Based on the pref value obtained in MDSM. + const uint32_t mVideoQueueSendToCompositorSize; + + // Talos tests for the compositor require at least one frame in the + // video queue so that the compositor has something to composit during + // the talos test when the decode is stressed. We have a minimum size + // on the video queue in order to facilitate this talos test. + // Note: Normal playback should not have a queue size of more than 0, + // otherwise A/V sync will be ruined! *Only* make this non-zero for + // testing purposes. + const uint32_t mMinVideoQueueSize; + +#ifdef XP_WIN + // Whether we've called timeBeginPeriod(1) to request high resolution + // timers. We request high resolution timers when playback starts, and + // turn them off when playback is paused. Enabling high resolution + // timers can cause higher CPU usage and battery drain on Windows 7, + // but reduces our frame drop rate. + bool mHiResTimersRequested; +#endif + + RefPtr<layers::Image> mBlankImage; + bool InitializeBlankImage(); +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/mediasink/moz.build b/dom/media/mediasink/moz.build new file mode 100644 index 0000000000..6db074538f --- /dev/null +++ b/dom/media/mediasink/moz.build @@ -0,0 +1,25 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +UNIFIED_SOURCES += [ + "AudioDecoderInputTrack.cpp", + "AudioSink.cpp", + "AudioSinkWrapper.cpp", + "DecodedStream.cpp", + "VideoSink.cpp", +] + +EXPORTS += [ + "MediaSink.h", +] + +LOCAL_INCLUDES += [ + "/dom/media", +] + +include("/ipc/chromium/chromium-config.mozbuild") + +FINAL_LIBRARY = "xul" |