summaryrefslogtreecommitdiffstats
path: root/dom/media/mediasink
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 01:47:29 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 01:47:29 +0000
commit0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d (patch)
treea31f07c9bcca9d56ce61e9a1ffd30ef350d513aa /dom/media/mediasink
parentInitial commit. (diff)
downloadfirefox-esr-37a0381f8351b370577b65028ba1f6563ae23fdf.tar.xz
firefox-esr-37a0381f8351b370577b65028ba1f6563ae23fdf.zip
Adding upstream version 115.8.0esr.upstream/115.8.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'dom/media/mediasink')
-rw-r--r--dom/media/mediasink/AudioDecoderInputTrack.cpp681
-rw-r--r--dom/media/mediasink/AudioDecoderInputTrack.h242
-rw-r--r--dom/media/mediasink/AudioSink.cpp664
-rw-r--r--dom/media/mediasink/AudioSink.h188
-rw-r--r--dom/media/mediasink/AudioSinkWrapper.cpp496
-rw-r--r--dom/media/mediasink/AudioSinkWrapper.h161
-rw-r--r--dom/media/mediasink/DecodedStream.cpp1171
-rw-r--r--dom/media/mediasink/DecodedStream.h154
-rw-r--r--dom/media/mediasink/MediaSink.h142
-rw-r--r--dom/media/mediasink/VideoSink.cpp706
-rw-r--r--dom/media/mediasink/VideoSink.h177
-rw-r--r--dom/media/mediasink/moz.build25
12 files changed, 4807 insertions, 0 deletions
diff --git a/dom/media/mediasink/AudioDecoderInputTrack.cpp b/dom/media/mediasink/AudioDecoderInputTrack.cpp
new file mode 100644
index 0000000000..7f970f0e4f
--- /dev/null
+++ b/dom/media/mediasink/AudioDecoderInputTrack.cpp
@@ -0,0 +1,681 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "AudioDecoderInputTrack.h"
+
+#include "MediaData.h"
+#include "mozilla/ScopeExit.h"
+#include "mozilla/StaticPrefs_media.h"
+#include "Tracing.h"
+
+// Use abort() instead of exception in SoundTouch.
+#define ST_NO_EXCEPTION_HANDLING 1
+#include "soundtouch/SoundTouchFactory.h"
+
+namespace mozilla {
+
+extern LazyLogModule gMediaDecoderLog;
+
+#define LOG(msg, ...) \
+ MOZ_LOG(gMediaDecoderLog, LogLevel::Debug, \
+ ("AudioDecoderInputTrack=%p " msg, this, ##__VA_ARGS__))
+
+#define LOG_M(msg, this, ...) \
+ MOZ_LOG(gMediaDecoderLog, LogLevel::Debug, \
+ ("AudioDecoderInputTrack=%p " msg, this, ##__VA_ARGS__))
+
+/* static */
+AudioDecoderInputTrack* AudioDecoderInputTrack::Create(
+ MediaTrackGraph* aGraph, nsISerialEventTarget* aDecoderThread,
+ const AudioInfo& aInfo, float aPlaybackRate, float aVolume,
+ bool aPreservesPitch) {
+ MOZ_ASSERT(aGraph);
+ MOZ_ASSERT(aDecoderThread);
+ AudioDecoderInputTrack* track =
+ new AudioDecoderInputTrack(aDecoderThread, aGraph->GraphRate(), aInfo,
+ aPlaybackRate, aVolume, aPreservesPitch);
+ aGraph->AddTrack(track);
+ return track;
+}
+
+AudioDecoderInputTrack::AudioDecoderInputTrack(
+ nsISerialEventTarget* aDecoderThread, TrackRate aGraphRate,
+ const AudioInfo& aInfo, float aPlaybackRate, float aVolume,
+ bool aPreservesPitch)
+ : ProcessedMediaTrack(aGraphRate, MediaSegment::AUDIO, new AudioSegment()),
+ mDecoderThread(aDecoderThread),
+ mResamplerChannelCount(0),
+ mInitialInputChannels(aInfo.mChannels),
+ mInputSampleRate(aInfo.mRate),
+ mDelayedScheduler(mDecoderThread),
+ mPlaybackRate(aPlaybackRate),
+ mVolume(aVolume),
+ mPreservesPitch(aPreservesPitch) {}
+
+bool AudioDecoderInputTrack::ConvertAudioDataToSegment(
+ AudioData* aAudio, AudioSegment& aSegment,
+ const PrincipalHandle& aPrincipalHandle) {
+ AssertOnDecoderThread();
+ MOZ_ASSERT(aAudio);
+ MOZ_ASSERT(aSegment.IsEmpty());
+ if (!aAudio->Frames()) {
+ LOG("Ignore audio with zero frame");
+ return false;
+ }
+
+ aAudio->EnsureAudioBuffer();
+ RefPtr<SharedBuffer> buffer = aAudio->mAudioBuffer;
+ AudioDataValue* bufferData = static_cast<AudioDataValue*>(buffer->Data());
+ AutoTArray<const AudioDataValue*, 2> channels;
+ for (uint32_t i = 0; i < aAudio->mChannels; ++i) {
+ channels.AppendElement(bufferData + i * aAudio->Frames());
+ }
+ aSegment.AppendFrames(buffer.forget(), channels, aAudio->Frames(),
+ aPrincipalHandle);
+ const TrackRate newInputRate = static_cast<TrackRate>(aAudio->mRate);
+ if (newInputRate != mInputSampleRate) {
+ LOG("Input sample rate changed %u -> %u", mInputSampleRate, newInputRate);
+ mInputSampleRate = newInputRate;
+ mResampler.own(nullptr);
+ mResamplerChannelCount = 0;
+ }
+ if (mInputSampleRate != GraphImpl()->GraphRate()) {
+ aSegment.ResampleChunks(mResampler, &mResamplerChannelCount,
+ mInputSampleRate, GraphImpl()->GraphRate());
+ }
+ return aSegment.GetDuration() > 0;
+}
+
+void AudioDecoderInputTrack::AppendData(
+ AudioData* aAudio, const PrincipalHandle& aPrincipalHandle) {
+ AssertOnDecoderThread();
+ MOZ_ASSERT(aAudio);
+ nsTArray<RefPtr<AudioData>> audio;
+ audio.AppendElement(aAudio);
+ AppendData(audio, aPrincipalHandle);
+}
+
+void AudioDecoderInputTrack::AppendData(
+ nsTArray<RefPtr<AudioData>>& aAudioArray,
+ const PrincipalHandle& aPrincipalHandle) {
+ AssertOnDecoderThread();
+ MOZ_ASSERT(!mShutdownSPSCQueue);
+
+ // Batching all new data together in order to push them as a single unit that
+ // gives the SPSC queue more spaces.
+ for (const auto& audio : aAudioArray) {
+ BatchData(audio, aPrincipalHandle);
+ }
+
+ // If SPSC queue doesn't have much available capacity now, we would push
+ // batched later.
+ if (ShouldBatchData()) {
+ return;
+ }
+ PushBatchedDataIfNeeded();
+}
+
+bool AudioDecoderInputTrack::ShouldBatchData() const {
+ AssertOnDecoderThread();
+ // If the SPSC queue has less available capacity than the threshold, then all
+ // input audio data should be batched together, in order not to increase the
+ // pressure of SPSC queue.
+ static const int kThresholdNumerator = 3;
+ static const int kThresholdDenominator = 10;
+ return mSPSCQueue.AvailableWrite() <
+ mSPSCQueue.Capacity() * kThresholdNumerator / kThresholdDenominator;
+}
+
+bool AudioDecoderInputTrack::HasBatchedData() const {
+ AssertOnDecoderThread();
+ return !mBatchedData.mSegment.IsEmpty();
+}
+
+void AudioDecoderInputTrack::BatchData(
+ AudioData* aAudio, const PrincipalHandle& aPrincipalHandle) {
+ AssertOnDecoderThread();
+ AudioSegment segment;
+ if (!ConvertAudioDataToSegment(aAudio, segment, aPrincipalHandle)) {
+ return;
+ }
+ mBatchedData.mSegment.AppendFrom(&segment);
+ if (!mBatchedData.mStartTime.IsValid()) {
+ mBatchedData.mStartTime = aAudio->mTime;
+ }
+ mBatchedData.mEndTime = aAudio->GetEndTime();
+ LOG("batched data [%" PRId64 ":%" PRId64 "] sz=%" PRId64,
+ aAudio->mTime.ToMicroseconds(), aAudio->GetEndTime().ToMicroseconds(),
+ mBatchedData.mSegment.GetDuration());
+ DispatchPushBatchedDataIfNeeded();
+}
+
+void AudioDecoderInputTrack::DispatchPushBatchedDataIfNeeded() {
+ AssertOnDecoderThread();
+ MOZ_ASSERT(!mShutdownSPSCQueue);
+ // The graph thread runs iteration around per 2~10ms. Doing this to ensure
+ // that we can keep consuming data. If the producer stops pushing new data
+ // due to MDSM stops decoding, which is because MDSM thinks the data stored
+ // in the audio queue are enough. The way to remove those data from the
+ // audio queue is driven by us, so we have to keep consuming data.
+ // Otherwise, we would get stuck because those batched data would never be
+ // consumed.
+ static const uint8_t kTimeoutMS = 10;
+ TimeStamp target =
+ TimeStamp::Now() + TimeDuration::FromMilliseconds(kTimeoutMS);
+ mDelayedScheduler.Ensure(
+ target,
+ [self = RefPtr<AudioDecoderInputTrack>(this), this]() {
+ LOG("In the task of DispatchPushBatchedDataIfNeeded");
+ mDelayedScheduler.CompleteRequest();
+ MOZ_ASSERT(!mShutdownSPSCQueue);
+ MOZ_ASSERT(HasBatchedData());
+ // The capacity in SPSC is still not enough, so we can't push data now.
+ // Retrigger another task to push batched data.
+ if (ShouldBatchData()) {
+ DispatchPushBatchedDataIfNeeded();
+ return;
+ }
+ PushBatchedDataIfNeeded();
+ },
+ []() { MOZ_DIAGNOSTIC_ASSERT(false); });
+}
+
+void AudioDecoderInputTrack::PushBatchedDataIfNeeded() {
+ AssertOnDecoderThread();
+ if (!HasBatchedData()) {
+ return;
+ }
+ LOG("Append batched data [%" PRId64 ":%" PRId64 "], available SPSC sz=%u",
+ mBatchedData.mStartTime.ToMicroseconds(),
+ mBatchedData.mEndTime.ToMicroseconds(), mSPSCQueue.AvailableWrite());
+ SPSCData data({SPSCData::DecodedData(std::move(mBatchedData))});
+ PushDataToSPSCQueue(data);
+ MOZ_ASSERT(mBatchedData.mSegment.IsEmpty());
+ // No batched data remains, we can cancel the pending tasks.
+ mDelayedScheduler.Reset();
+}
+
+void AudioDecoderInputTrack::NotifyEndOfStream() {
+ AssertOnDecoderThread();
+ // Force to push all data before EOS. Otherwise, the track would be ended too
+ // early without sending all data.
+ PushBatchedDataIfNeeded();
+ SPSCData data({SPSCData::EOS()});
+ LOG("Set EOS, available SPSC sz=%u", mSPSCQueue.AvailableWrite());
+ PushDataToSPSCQueue(data);
+}
+
+void AudioDecoderInputTrack::ClearFutureData() {
+ AssertOnDecoderThread();
+ // Clear the data hasn't been pushed to SPSC queue yet.
+ mBatchedData.Clear();
+ mDelayedScheduler.Reset();
+ SPSCData data({SPSCData::ClearFutureData()});
+ LOG("Set clear future data, available SPSC sz=%u",
+ mSPSCQueue.AvailableWrite());
+ PushDataToSPSCQueue(data);
+}
+
+void AudioDecoderInputTrack::PushDataToSPSCQueue(SPSCData& data) {
+ AssertOnDecoderThread();
+ const bool rv = mSPSCQueue.Enqueue(data);
+ MOZ_DIAGNOSTIC_ASSERT(rv, "Failed to push data, SPSC queue is full!");
+ Unused << rv;
+}
+
+void AudioDecoderInputTrack::SetVolume(float aVolume) {
+ AssertOnDecoderThread();
+ LOG("Set volume=%f", aVolume);
+ GetMainThreadSerialEventTarget()->Dispatch(
+ NS_NewRunnableFunction("AudioDecoderInputTrack::SetVolume",
+ [self = RefPtr<AudioDecoderInputTrack>(this),
+ aVolume] { self->SetVolumeImpl(aVolume); }));
+}
+
+void AudioDecoderInputTrack::SetVolumeImpl(float aVolume) {
+ MOZ_ASSERT(NS_IsMainThread());
+ class Message : public ControlMessage {
+ public:
+ Message(AudioDecoderInputTrack* aTrack, float aVolume)
+ : ControlMessage(aTrack), mTrack(aTrack), mVolume(aVolume) {}
+ void Run() override {
+ TRACE_COMMENT("AudioDecoderInputTrack::SetVolume ControlMessage", "%f",
+ mVolume);
+ LOG_M("Apply volume=%f", mTrack.get(), mVolume);
+ mTrack->mVolume = mVolume;
+ }
+
+ protected:
+ const RefPtr<AudioDecoderInputTrack> mTrack;
+ const float mVolume;
+ };
+ GraphImpl()->AppendMessage(MakeUnique<Message>(this, aVolume));
+}
+
+void AudioDecoderInputTrack::SetPlaybackRate(float aPlaybackRate) {
+ AssertOnDecoderThread();
+ LOG("Set playback rate=%f", aPlaybackRate);
+ GetMainThreadSerialEventTarget()->Dispatch(NS_NewRunnableFunction(
+ "AudioDecoderInputTrack::SetPlaybackRate",
+ [self = RefPtr<AudioDecoderInputTrack>(this), aPlaybackRate] {
+ self->SetPlaybackRateImpl(aPlaybackRate);
+ }));
+}
+
+void AudioDecoderInputTrack::SetPlaybackRateImpl(float aPlaybackRate) {
+ MOZ_ASSERT(NS_IsMainThread());
+ class Message : public ControlMessage {
+ public:
+ Message(AudioDecoderInputTrack* aTrack, float aPlaybackRate)
+ : ControlMessage(aTrack),
+ mTrack(aTrack),
+ mPlaybackRate(aPlaybackRate) {}
+ void Run() override {
+ TRACE_COMMENT("AudioDecoderInputTrack::SetPlaybackRate ControlMessage",
+ "%f", mPlaybackRate);
+ LOG_M("Apply playback rate=%f", mTrack.get(), mPlaybackRate);
+ mTrack->mPlaybackRate = mPlaybackRate;
+ mTrack->SetTempoAndRateForTimeStretcher();
+ }
+
+ protected:
+ const RefPtr<AudioDecoderInputTrack> mTrack;
+ const float mPlaybackRate;
+ };
+ GraphImpl()->AppendMessage(MakeUnique<Message>(this, aPlaybackRate));
+}
+
+void AudioDecoderInputTrack::SetPreservesPitch(bool aPreservesPitch) {
+ AssertOnDecoderThread();
+ LOG("Set preserves pitch=%d", aPreservesPitch);
+ GetMainThreadSerialEventTarget()->Dispatch(NS_NewRunnableFunction(
+ "AudioDecoderInputTrack::SetPreservesPitch",
+ [self = RefPtr<AudioDecoderInputTrack>(this), aPreservesPitch] {
+ self->SetPreservesPitchImpl(aPreservesPitch);
+ }));
+}
+
+void AudioDecoderInputTrack::SetPreservesPitchImpl(bool aPreservesPitch) {
+ MOZ_ASSERT(NS_IsMainThread());
+ class Message : public ControlMessage {
+ public:
+ Message(AudioDecoderInputTrack* aTrack, bool aPreservesPitch)
+ : ControlMessage(aTrack),
+ mTrack(aTrack),
+ mPreservesPitch(aPreservesPitch) {}
+ void Run() override {
+ TRACE_COMMENT("AudioDecoderInputTrack::SetPreservesPitch", "%s",
+ mPreservesPitch ? "true" : "false")
+ LOG_M("Apply preserves pitch=%d", mTrack.get(), mPreservesPitch);
+ mTrack->mPreservesPitch = mPreservesPitch;
+ mTrack->SetTempoAndRateForTimeStretcher();
+ }
+
+ protected:
+ const RefPtr<AudioDecoderInputTrack> mTrack;
+ const bool mPreservesPitch;
+ };
+ GraphImpl()->AppendMessage(MakeUnique<Message>(this, aPreservesPitch));
+}
+
+void AudioDecoderInputTrack::Close() {
+ AssertOnDecoderThread();
+ LOG("Close");
+ mShutdownSPSCQueue = true;
+ mBatchedData.Clear();
+ mDelayedScheduler.Reset();
+}
+
+void AudioDecoderInputTrack::DestroyImpl() {
+ LOG("DestroyImpl");
+ AssertOnGraphThreadOrNotRunning();
+ mBufferedData.Clear();
+ if (mTimeStretcher) {
+ soundtouch::destroySoundTouchObj(mTimeStretcher);
+ }
+ ProcessedMediaTrack::DestroyImpl();
+}
+
+AudioDecoderInputTrack::~AudioDecoderInputTrack() {
+ MOZ_ASSERT(mBatchedData.mSegment.IsEmpty());
+ MOZ_ASSERT(mShutdownSPSCQueue);
+ mResampler.own(nullptr);
+}
+
+void AudioDecoderInputTrack::ProcessInput(GraphTime aFrom, GraphTime aTo,
+ uint32_t aFlags) {
+ AssertOnGraphThread();
+ if (Ended()) {
+ return;
+ }
+
+ TrackTime consumedDuration = 0;
+ auto notify = MakeScopeExit([this, &consumedDuration] {
+ NotifyInTheEndOfProcessInput(consumedDuration);
+ });
+
+ if (mSentAllData && (aFlags & ALLOW_END)) {
+ LOG("End track");
+ mEnded = true;
+ return;
+ }
+
+ const TrackTime expectedDuration = aTo - aFrom;
+ LOG("ProcessInput [%" PRId64 " to %" PRId64 "], duration=%" PRId64, aFrom,
+ aTo, expectedDuration);
+
+ // Drain all data from SPSC queue first, because we want that the SPSC queue
+ // always has capacity of accepting data from the producer. In addition, we
+ // also need to check if there is any control related data that should be
+ // applied to output segment, eg. `ClearFutureData`.
+ SPSCData data;
+ while (mSPSCQueue.Dequeue(&data, 1) > 0) {
+ HandleSPSCData(data);
+ }
+
+ consumedDuration += AppendBufferedDataToOutput(expectedDuration);
+ if (HasSentAllData()) {
+ LOG("Sent all data, should end track in next iteration");
+ mSentAllData = true;
+ }
+}
+
+void AudioDecoderInputTrack::HandleSPSCData(SPSCData& aData) {
+ AssertOnGraphThread();
+ if (aData.IsDecodedData()) {
+ MOZ_ASSERT(!mReceivedEOS);
+ AudioSegment& segment = aData.AsDecodedData()->mSegment;
+ LOG("popped out data [%" PRId64 ":%" PRId64 "] sz=%" PRId64,
+ aData.AsDecodedData()->mStartTime.ToMicroseconds(),
+ aData.AsDecodedData()->mEndTime.ToMicroseconds(),
+ segment.GetDuration());
+ mBufferedData.AppendFrom(&segment);
+ return;
+ }
+ if (aData.IsEOS()) {
+ MOZ_ASSERT(!Ended());
+ LOG("Received EOS");
+ mReceivedEOS = true;
+ return;
+ }
+ if (aData.IsClearFutureData()) {
+ LOG("Clear future data");
+ mBufferedData.Clear();
+ if (!Ended()) {
+ LOG("Clear EOS");
+ mReceivedEOS = false;
+ }
+ return;
+ }
+ MOZ_ASSERT_UNREACHABLE("unsupported SPSC data");
+}
+
+TrackTime AudioDecoderInputTrack::AppendBufferedDataToOutput(
+ TrackTime aExpectedDuration) {
+ AssertOnGraphThread();
+
+ // Remove the necessary part from `mBufferedData` to create a new
+ // segment in order to apply some operation without affecting all data.
+ AudioSegment outputSegment;
+ TrackTime consumedDuration = 0;
+ if (mPlaybackRate != 1.0) {
+ consumedDuration =
+ AppendTimeStretchedDataToSegment(aExpectedDuration, outputSegment);
+ } else {
+ consumedDuration =
+ AppendUnstretchedDataToSegment(aExpectedDuration, outputSegment);
+ }
+
+ // Apply any necessary change on the segement which would be outputed to the
+ // graph.
+ const TrackTime appendedDuration = outputSegment.GetDuration();
+ outputSegment.ApplyVolume(mVolume);
+ ApplyTrackDisabling(&outputSegment);
+ mSegment->AppendFrom(&outputSegment);
+
+ LOG("Appended %" PRId64 ", consumed %" PRId64
+ ", remaining raw buffered %" PRId64 ", remaining time-stretched %u",
+ appendedDuration, consumedDuration, mBufferedData.GetDuration(),
+ mTimeStretcher ? mTimeStretcher->numSamples() : 0);
+ if (auto gap = aExpectedDuration - appendedDuration; gap > 0) {
+ LOG("Audio underrun, fill silence %" PRId64, gap);
+ MOZ_ASSERT(mBufferedData.IsEmpty());
+ mSegment->AppendNullData(gap);
+ }
+ return consumedDuration;
+}
+
+TrackTime AudioDecoderInputTrack::AppendTimeStretchedDataToSegment(
+ TrackTime aExpectedDuration, AudioSegment& aOutput) {
+ AssertOnGraphThread();
+ EnsureTimeStretcher();
+
+ MOZ_ASSERT(mPlaybackRate != 1.0f);
+ MOZ_ASSERT(aExpectedDuration >= 0);
+ MOZ_ASSERT(mTimeStretcher);
+ MOZ_ASSERT(aOutput.IsEmpty());
+
+ // If we don't have enough data that have been time-stretched, fill raw data
+ // into the time stretcher until the amount of samples that time stretcher
+ // finishes processed reaches or exceeds the expected duration.
+ TrackTime consumedDuration = 0;
+ if (mTimeStretcher->numSamples() < aExpectedDuration) {
+ consumedDuration = FillDataToTimeStretcher(aExpectedDuration);
+ }
+ MOZ_ASSERT(consumedDuration >= 0);
+ Unused << GetDataFromTimeStretcher(aExpectedDuration, aOutput);
+ return consumedDuration;
+}
+
+TrackTime AudioDecoderInputTrack::FillDataToTimeStretcher(
+ TrackTime aExpectedDuration) {
+ AssertOnGraphThread();
+ MOZ_ASSERT(mPlaybackRate != 1.0f);
+ MOZ_ASSERT(aExpectedDuration >= 0);
+ MOZ_ASSERT(mTimeStretcher);
+
+ TrackTime consumedDuration = 0;
+ const uint32_t channels = GetChannelCountForTimeStretcher();
+ mBufferedData.IterateOnChunks([&](AudioChunk* aChunk) {
+ MOZ_ASSERT(aChunk);
+ if (aChunk->IsNull() && aChunk->GetDuration() == 0) {
+ // Skip this chunk and wait for next one.
+ return false;
+ }
+ const uint32_t bufferLength = channels * aChunk->GetDuration();
+ if (bufferLength > mInterleavedBuffer.Capacity()) {
+ mInterleavedBuffer.SetCapacity(bufferLength);
+ }
+ mInterleavedBuffer.SetLengthAndRetainStorage(bufferLength);
+ if (aChunk->IsNull()) {
+ MOZ_ASSERT(aChunk->GetDuration(), "chunk with only silence");
+ memset(mInterleavedBuffer.Elements(), 0, mInterleavedBuffer.Length());
+ } else {
+ // Do the up-mix/down-mix first if necessary that forces to change the
+ // data's channel count to the time stretcher's channel count. Then
+ // perform a transformation from planar to interleaved.
+ switch (aChunk->mBufferFormat) {
+ case AUDIO_FORMAT_S16:
+ WriteChunk<int16_t>(*aChunk, channels, 1.0f,
+ mInterleavedBuffer.Elements());
+ break;
+ case AUDIO_FORMAT_FLOAT32:
+ WriteChunk<float>(*aChunk, channels, 1.0f,
+ mInterleavedBuffer.Elements());
+ break;
+ default:
+ MOZ_ASSERT_UNREACHABLE("Not expected format");
+ }
+ }
+ mTimeStretcher->putSamples(mInterleavedBuffer.Elements(),
+ aChunk->GetDuration());
+ consumedDuration += aChunk->GetDuration();
+ return mTimeStretcher->numSamples() >= aExpectedDuration;
+ });
+ mBufferedData.RemoveLeading(consumedDuration);
+ return consumedDuration;
+}
+
+TrackTime AudioDecoderInputTrack::AppendUnstretchedDataToSegment(
+ TrackTime aExpectedDuration, AudioSegment& aOutput) {
+ AssertOnGraphThread();
+ MOZ_ASSERT(mPlaybackRate == 1.0f);
+ MOZ_ASSERT(aExpectedDuration >= 0);
+ MOZ_ASSERT(aOutput.IsEmpty());
+
+ const TrackTime drained =
+ DrainStretchedDataIfNeeded(aExpectedDuration, aOutput);
+ const TrackTime available =
+ std::min(aExpectedDuration - drained, mBufferedData.GetDuration());
+ aOutput.AppendSlice(mBufferedData, 0, available);
+ MOZ_ASSERT(aOutput.GetDuration() <= aExpectedDuration);
+ mBufferedData.RemoveLeading(available);
+ return available;
+}
+
+TrackTime AudioDecoderInputTrack::DrainStretchedDataIfNeeded(
+ TrackTime aExpectedDuration, AudioSegment& aOutput) {
+ AssertOnGraphThread();
+ MOZ_ASSERT(mPlaybackRate == 1.0f);
+ MOZ_ASSERT(aExpectedDuration >= 0);
+
+ if (!mTimeStretcher) {
+ return 0;
+ }
+ if (mTimeStretcher->numSamples() == 0) {
+ return 0;
+ }
+ return GetDataFromTimeStretcher(aExpectedDuration, aOutput);
+}
+
+TrackTime AudioDecoderInputTrack::GetDataFromTimeStretcher(
+ TrackTime aExpectedDuration, AudioSegment& aOutput) {
+ AssertOnGraphThread();
+ MOZ_ASSERT(mTimeStretcher);
+ MOZ_ASSERT(aExpectedDuration >= 0);
+
+ if (HasSentAllData() && mTimeStretcher->numUnprocessedSamples()) {
+ mTimeStretcher->flush();
+ LOG("Flush %u frames from the time stretcher",
+ mTimeStretcher->numSamples());
+ }
+
+ const TrackTime available =
+ std::min((TrackTime)mTimeStretcher->numSamples(), aExpectedDuration);
+ if (available == 0) {
+ // Either running out of stretched data, or the raw data we filled into
+ // the time stretcher were not enough for producing stretched data.
+ return 0;
+ }
+
+ // Retrieve interleaved data from the time stretcher.
+ const uint32_t channelCount = GetChannelCountForTimeStretcher();
+ const uint32_t bufferLength = channelCount * available;
+ if (bufferLength > mInterleavedBuffer.Capacity()) {
+ mInterleavedBuffer.SetCapacity(bufferLength);
+ }
+ mInterleavedBuffer.SetLengthAndRetainStorage(bufferLength);
+ mTimeStretcher->receiveSamples(mInterleavedBuffer.Elements(), available);
+
+ // Perform a transformation from interleaved to planar.
+ CheckedInt<size_t> bufferSize(sizeof(AudioDataValue));
+ bufferSize *= bufferLength;
+ RefPtr<SharedBuffer> buffer = SharedBuffer::Create(bufferSize);
+ AudioDataValue* bufferData = static_cast<AudioDataValue*>(buffer->Data());
+ AutoTArray<AudioDataValue*, 2> planarBuffer;
+ planarBuffer.SetLength(channelCount);
+ for (size_t idx = 0; idx < channelCount; idx++) {
+ planarBuffer[idx] = bufferData + idx * available;
+ }
+ DeinterleaveAndConvertBuffer(mInterleavedBuffer.Elements(), available,
+ channelCount, planarBuffer.Elements());
+ AutoTArray<const AudioDataValue*, 2> outputChannels;
+ outputChannels.AppendElements(planarBuffer);
+ aOutput.AppendFrames(buffer.forget(), outputChannels,
+ static_cast<int32_t>(available),
+ mBufferedData.GetOldestPrinciple());
+ return available;
+}
+
+void AudioDecoderInputTrack::NotifyInTheEndOfProcessInput(
+ TrackTime aFillDuration) {
+ AssertOnGraphThread();
+ mWrittenFrames += aFillDuration;
+ LOG("Notify, fill=%" PRId64 ", total written=%" PRId64 ", ended=%d",
+ aFillDuration, mWrittenFrames, Ended());
+ if (aFillDuration > 0) {
+ mOnOutput.Notify(mWrittenFrames);
+ }
+ if (Ended()) {
+ mOnEnd.Notify();
+ }
+}
+
+bool AudioDecoderInputTrack::HasSentAllData() const {
+ AssertOnGraphThread();
+ return mReceivedEOS && mSPSCQueue.AvailableRead() == 0 &&
+ mBufferedData.IsEmpty();
+}
+
+uint32_t AudioDecoderInputTrack::NumberOfChannels() const {
+ AssertOnGraphThread();
+ const uint32_t maxChannelCount = GetData<AudioSegment>()->MaxChannelCount();
+ return maxChannelCount ? maxChannelCount : mInitialInputChannels;
+}
+
+void AudioDecoderInputTrack::EnsureTimeStretcher() {
+ AssertOnGraphThread();
+ if (!mTimeStretcher) {
+ mTimeStretcher = soundtouch::createSoundTouchObj();
+ mTimeStretcher->setSampleRate(GraphImpl()->GraphRate());
+ mTimeStretcher->setChannels(GetChannelCountForTimeStretcher());
+ mTimeStretcher->setPitch(1.0);
+
+ // SoundTouch v2.1.2 uses automatic time-stretch settings with the following
+ // values:
+ // Tempo 0.5: 90ms sequence, 20ms seekwindow, 8ms overlap
+ // Tempo 2.0: 40ms sequence, 15ms seekwindow, 8ms overlap
+ // We are going to use a smaller 10ms sequence size to improve speech
+ // clarity, giving more resolution at high tempo and less reverb at low
+ // tempo. Maintain 15ms seekwindow and 8ms overlap for smoothness.
+ mTimeStretcher->setSetting(
+ SETTING_SEQUENCE_MS,
+ StaticPrefs::media_audio_playbackrate_soundtouch_sequence_ms());
+ mTimeStretcher->setSetting(
+ SETTING_SEEKWINDOW_MS,
+ StaticPrefs::media_audio_playbackrate_soundtouch_seekwindow_ms());
+ mTimeStretcher->setSetting(
+ SETTING_OVERLAP_MS,
+ StaticPrefs::media_audio_playbackrate_soundtouch_overlap_ms());
+ SetTempoAndRateForTimeStretcher();
+ LOG("Create TimeStretcher (channel=%d, playbackRate=%f, preservePitch=%d)",
+ GetChannelCountForTimeStretcher(), mPlaybackRate, mPreservesPitch);
+ }
+}
+
+void AudioDecoderInputTrack::SetTempoAndRateForTimeStretcher() {
+ AssertOnGraphThread();
+ if (!mTimeStretcher) {
+ return;
+ }
+ if (mPreservesPitch) {
+ mTimeStretcher->setTempo(mPlaybackRate);
+ mTimeStretcher->setRate(1.0f);
+ } else {
+ mTimeStretcher->setTempo(1.0f);
+ mTimeStretcher->setRate(mPlaybackRate);
+ }
+}
+
+uint32_t AudioDecoderInputTrack::GetChannelCountForTimeStretcher() const {
+ // The time stretcher MUST be initialized with a fixed channel count, but the
+ // channel count in audio chunks might vary. Therefore, we always use the
+ // initial input channel count to initialize the time stretcher and perform a
+ // real-time down-mix/up-mix for audio chunks which have different channel
+ // count than the initial input channel count.
+ return mInitialInputChannels;
+}
+
+#undef LOG
+} // namespace mozilla
diff --git a/dom/media/mediasink/AudioDecoderInputTrack.h b/dom/media/mediasink/AudioDecoderInputTrack.h
new file mode 100644
index 0000000000..8c82d7bed6
--- /dev/null
+++ b/dom/media/mediasink/AudioDecoderInputTrack.h
@@ -0,0 +1,242 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef AudioDecoderInputTrack_h
+#define AudioDecoderInputTrack_h
+
+#include "AudioSegment.h"
+#include "MediaEventSource.h"
+#include "MediaTimer.h"
+#include "MediaTrackGraph.h"
+#include "MediaTrackGraphImpl.h"
+#include "MediaSegment.h"
+#include "mozilla/SPSCQueue.h"
+#include "mozilla/StateMirroring.h"
+#include "nsISerialEventTarget.h"
+
+namespace soundtouch {
+class MOZ_EXPORT SoundTouch;
+}
+
+namespace mozilla {
+
+class AudioData;
+
+/**
+ * AudioDecoderInputTrack is used as a source for the audio decoder data, which
+ * supports adjusting playback rate and preserve pitch.
+ * The owner of this track would be responsible to push audio data via
+ * `AppendData()` into a SPSC queue, which is a thread-safe queue between the
+ * decoder thread (producer) and the graph thread (consumer). MediaTrackGraph
+ * requires data via `ProcessInput()`, then AudioDecoderInputTrack would convert
+ * (based on sample rate and playback rate) and append the amount of needed
+ * audio frames onto the output segment that would be used by MediaTrackGraph.
+ */
+class AudioDecoderInputTrack final : public ProcessedMediaTrack {
+ public:
+ static AudioDecoderInputTrack* Create(MediaTrackGraph* aGraph,
+ nsISerialEventTarget* aDecoderThread,
+ const AudioInfo& aInfo,
+ float aPlaybackRate, float aVolume,
+ bool aPreservesPitch);
+
+ // SPSCData suppports filling different supported type variants, and is used
+ // to achieve a thread-safe information exchange between the decoder thread
+ // and the graph thread.
+ struct SPSCData final {
+ struct Empty {};
+ struct ClearFutureData {};
+ struct DecodedData {
+ DecodedData()
+ : mStartTime(media::TimeUnit::Invalid()),
+ mEndTime(media::TimeUnit::Invalid()) {}
+ DecodedData(DecodedData&& aDecodedData)
+ : mSegment(std::move(aDecodedData.mSegment)) {
+ mStartTime = aDecodedData.mStartTime;
+ mEndTime = aDecodedData.mEndTime;
+ aDecodedData.Clear();
+ }
+ DecodedData(media::TimeUnit aStartTime, media::TimeUnit aEndTime)
+ : mStartTime(aStartTime), mEndTime(aEndTime) {}
+ DecodedData(const DecodedData&) = delete;
+ DecodedData& operator=(const DecodedData&) = delete;
+ void Clear() {
+ mSegment.Clear();
+ mStartTime = media::TimeUnit::Invalid();
+ mEndTime = media::TimeUnit::Invalid();
+ }
+ AudioSegment mSegment;
+ media::TimeUnit mStartTime;
+ media::TimeUnit mEndTime;
+ };
+ struct EOS {};
+
+ SPSCData() : mData(Empty()){};
+ explicit SPSCData(ClearFutureData&& aArg) : mData(std::move(aArg)){};
+ explicit SPSCData(DecodedData&& aArg) : mData(std::move(aArg)){};
+ explicit SPSCData(EOS&& aArg) : mData(std::move(aArg)){};
+
+ bool HasData() const { return !mData.is<Empty>(); }
+ bool IsClearFutureData() const { return mData.is<ClearFutureData>(); }
+ bool IsDecodedData() const { return mData.is<DecodedData>(); }
+ bool IsEOS() const { return mData.is<EOS>(); }
+
+ DecodedData* AsDecodedData() {
+ return IsDecodedData() ? &mData.as<DecodedData>() : nullptr;
+ }
+
+ Variant<Empty, ClearFutureData, DecodedData, EOS> mData;
+ };
+
+ // Decoder thread API
+ void AppendData(AudioData* aAudio, const PrincipalHandle& aPrincipalHandle);
+ void AppendData(nsTArray<RefPtr<AudioData>>& aAudioArray,
+ const PrincipalHandle& aPrincipalHandle);
+ void NotifyEndOfStream();
+ void ClearFutureData();
+ void SetVolume(float aVolume);
+ void SetPlaybackRate(float aPlaybackRate);
+ void SetPreservesPitch(bool aPreservesPitch);
+ // After calling this, the track are not expected to receive any new data.
+ void Close();
+ bool HasBatchedData() const;
+
+ MediaEventSource<int64_t>& OnOutput() { return mOnOutput; }
+ MediaEventSource<void>& OnEnd() { return mOnEnd; }
+
+ // Graph Thread API
+ void DestroyImpl() override;
+ void ProcessInput(GraphTime aFrom, GraphTime aTo, uint32_t aFlags) override;
+ uint32_t NumberOfChannels() const override;
+
+ // The functions below are only used for testing.
+ TrackTime WrittenFrames() const {
+ AssertOnGraphThread();
+ return mWrittenFrames;
+ }
+ float Volume() const {
+ AssertOnGraphThread();
+ return mVolume;
+ }
+ float PlaybackRate() const {
+ AssertOnGraphThread();
+ return mPlaybackRate;
+ }
+
+ protected:
+ ~AudioDecoderInputTrack();
+
+ private:
+ AudioDecoderInputTrack(nsISerialEventTarget* aDecoderThread,
+ TrackRate aGraphRate, const AudioInfo& aInfo,
+ float aPlaybackRate, float aVolume,
+ bool aPreservesPitch);
+
+ // Return false if the converted segment contains zero duration.
+ bool ConvertAudioDataToSegment(AudioData* aAudio, AudioSegment& aSegment,
+ const PrincipalHandle& aPrincipalHandle);
+
+ void HandleSPSCData(SPSCData& aData);
+
+ // These methods would return the total frames that we consumed from
+ // `mBufferedData`.
+ TrackTime AppendBufferedDataToOutput(TrackTime aExpectedDuration);
+ TrackTime FillDataToTimeStretcher(TrackTime aExpectedDuration);
+ TrackTime AppendTimeStretchedDataToSegment(TrackTime aExpectedDuration,
+ AudioSegment& aOutput);
+ TrackTime AppendUnstretchedDataToSegment(TrackTime aExpectedDuration,
+ AudioSegment& aOutput);
+
+ // Return the total frames that we retrieve from the time stretcher.
+ TrackTime DrainStretchedDataIfNeeded(TrackTime aExpectedDuration,
+ AudioSegment& aOutput);
+ TrackTime GetDataFromTimeStretcher(TrackTime aExpectedDuration,
+ AudioSegment& aOutput);
+ void NotifyInTheEndOfProcessInput(TrackTime aFillDuration);
+
+ bool HasSentAllData() const;
+
+ bool ShouldBatchData() const;
+ void BatchData(AudioData* aAudio, const PrincipalHandle& aPrincipalHandle);
+ void DispatchPushBatchedDataIfNeeded();
+ void PushBatchedDataIfNeeded();
+ void PushDataToSPSCQueue(SPSCData& data);
+
+ void SetVolumeImpl(float aVolume);
+ void SetPlaybackRateImpl(float aPlaybackRate);
+ void SetPreservesPitchImpl(bool aPreservesPitch);
+
+ void EnsureTimeStretcher();
+ void SetTempoAndRateForTimeStretcher();
+ uint32_t GetChannelCountForTimeStretcher() const;
+
+ inline void AssertOnDecoderThread() const {
+ MOZ_ASSERT(mDecoderThread->IsOnCurrentThread());
+ }
+ inline void AssertOnGraphThread() const {
+ MOZ_ASSERT(GraphImpl()->OnGraphThread());
+ }
+ inline void AssertOnGraphThreadOrNotRunning() const {
+ MOZ_ASSERT(GraphImpl()->OnGraphThreadOrNotRunning());
+ }
+
+ const RefPtr<nsISerialEventTarget> mDecoderThread;
+
+ // Notify the amount of audio frames which have been sent to the track.
+ MediaEventProducer<int64_t> mOnOutput;
+ // Notify when the track is ended.
+ MediaEventProducer<void> mOnEnd;
+
+ // These variables are ONLY used in the decoder thread.
+ nsAutoRef<SpeexResamplerState> mResampler;
+ uint32_t mResamplerChannelCount;
+ const uint32_t mInitialInputChannels;
+ TrackRate mInputSampleRate;
+ DelayedScheduler mDelayedScheduler;
+ bool mShutdownSPSCQueue = false;
+
+ // These attributes are ONLY used in the graph thread.
+ bool mReceivedEOS = false;
+ TrackTime mWrittenFrames = 0;
+ float mPlaybackRate;
+ float mVolume;
+ bool mPreservesPitch;
+
+ // A thread-safe queue shared by the decoder thread and the graph thread.
+ // The decoder thread is the producer side, and the graph thread is the
+ // consumer side. This queue should NEVER get full. In order to achieve that,
+ // we would batch input samples when SPSC queue doesn't have many available
+ // capacity.
+ // In addition, as the media track isn't guaranteed to be destroyed on the
+ // graph thread (it could be destroyed on the main thread as well) so we might
+ // not clear all data in SPSC queue when the track's `DestroyImpl()` gets
+ // called. We leave to destroy the queue later when the track gets destroyed.
+ SPSCQueue<SPSCData> mSPSCQueue{40};
+
+ // When the graph requires the less amount of audio frames than the amount of
+ // frames an audio data has, then the remaining part of frames would be stored
+ // and used in next iteration.
+ // This is ONLY used in the graph thread.
+ AudioSegment mBufferedData;
+
+ // In order to prevent SPSC queue from being full, we want to batch multiple
+ // data into one to control the density of SPSC queue, the length of batched
+ // data would be dynamically adjusted by queue's available capacity.
+ // This is ONLY used in the decoder thread.
+ SPSCData::DecodedData mBatchedData;
+
+ // True if we've sent all data to the graph, then the track will be marked as
+ // ended in the next iteration.
+ bool mSentAllData = false;
+
+ // This is used to adjust the playback rate and pitch.
+ soundtouch::SoundTouch* mTimeStretcher = nullptr;
+
+ // Buffers that would be used for the time stretching.
+ AutoTArray<AudioDataValue, 2> mInterleavedBuffer;
+};
+
+} // namespace mozilla
+
+#endif // AudioDecoderInputTrack_h
diff --git a/dom/media/mediasink/AudioSink.cpp b/dom/media/mediasink/AudioSink.cpp
new file mode 100644
index 0000000000..536a2a4f8a
--- /dev/null
+++ b/dom/media/mediasink/AudioSink.cpp
@@ -0,0 +1,664 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "AudioSink.h"
+#include "AudioConverter.h"
+#include "AudioDeviceInfo.h"
+#include "MediaQueue.h"
+#include "VideoUtils.h"
+#include "mozilla/CheckedInt.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/IntegerPrintfMacros.h"
+#include "mozilla/ProfilerMarkerTypes.h"
+#include "mozilla/StaticPrefs_media.h"
+#include "mozilla/StaticPrefs_dom.h"
+#include "nsPrintfCString.h"
+#include "Tracing.h"
+
+namespace mozilla {
+
+mozilla::LazyLogModule gAudioSinkLog("AudioSink");
+#define SINK_LOG(msg, ...) \
+ MOZ_LOG(gAudioSinkLog, LogLevel::Debug, \
+ ("AudioSink=%p " msg, this, ##__VA_ARGS__))
+#define SINK_LOG_V(msg, ...) \
+ MOZ_LOG(gAudioSinkLog, LogLevel::Verbose, \
+ ("AudioSink=%p " msg, this, ##__VA_ARGS__))
+
+// The amount of audio frames that is used to fuzz rounding errors.
+static const int64_t AUDIO_FUZZ_FRAMES = 1;
+
+using media::TimeUnit;
+
+AudioSink::AudioSink(AbstractThread* aThread,
+ MediaQueue<AudioData>& aAudioQueue, const AudioInfo& aInfo,
+ bool aShouldResistFingerprinting)
+ : mPlaying(true),
+ mWritten(0),
+ mErrored(false),
+ mOwnerThread(aThread),
+ mFramesParsed(0),
+ mOutputRate(
+ DecideAudioPlaybackSampleRate(aInfo, aShouldResistFingerprinting)),
+ mOutputChannels(DecideAudioPlaybackChannels(aInfo)),
+ mAudibilityMonitor(
+ mOutputRate,
+ StaticPrefs::dom_media_silence_duration_for_audibility()),
+ mIsAudioDataAudible(false),
+ mProcessedQueueFinished(false),
+ mAudioQueue(aAudioQueue),
+ mProcessedQueueThresholdMS(
+ StaticPrefs::media_audio_audiosink_threshold_ms()) {
+ // Not much to initialize here if there's no audio.
+ if (!aInfo.IsValid()) {
+ mProcessedSPSCQueue = MakeUnique<SPSCQueue<AudioDataValue>>(0);
+ return;
+ }
+ // Twice the limit that trigger a refill.
+ double capacitySeconds = mProcessedQueueThresholdMS / 1000.f * 2;
+ // Clamp to correct boundaries, and align on the channel count
+ int elementCount = static_cast<int>(
+ std::clamp(capacitySeconds * mOutputChannels * mOutputRate, 0.,
+ std::numeric_limits<int>::max() - 1.));
+ elementCount -= elementCount % mOutputChannels;
+ mProcessedSPSCQueue = MakeUnique<SPSCQueue<AudioDataValue>>(elementCount);
+ SINK_LOG("Ringbuffer has space for %u elements (%lf seconds)",
+ mProcessedSPSCQueue->Capacity(),
+ static_cast<float>(elementCount) / mOutputChannels / mOutputRate);
+ // Determine if the data is likely to be audible when the stream will be
+ // ready, if possible.
+ RefPtr<AudioData> frontPacket = mAudioQueue.PeekFront();
+ if (frontPacket) {
+ mAudibilityMonitor.ProcessInterleaved(frontPacket->Data(),
+ frontPacket->mChannels);
+ mIsAudioDataAudible = mAudibilityMonitor.RecentlyAudible();
+ SINK_LOG("New AudioSink -- audio is likely to be %s",
+ mIsAudioDataAudible ? "audible" : "inaudible");
+ } else {
+ // If no packets are available, consider the audio audible.
+ mIsAudioDataAudible = true;
+ SINK_LOG(
+ "New AudioSink -- no audio packet avaialble, considering the stream "
+ "audible");
+ }
+}
+
+AudioSink::~AudioSink() {
+ // Generally instances of AudioSink should be properly Shutdown manually.
+ // The only way deleting an AudioSink without shutdown an happen is if the
+ // dispatch back to the MDSM thread after initializing it asynchronously
+ // fails. When that's the case, the stream has been initialized but not
+ // started. Manually shutdown the AudioStream in this case.
+ if (mAudioStream) {
+ mAudioStream->Shutdown();
+ }
+}
+
+nsresult AudioSink::InitializeAudioStream(
+ const PlaybackParams& aParams, const RefPtr<AudioDeviceInfo>& aAudioDevice,
+ AudioSink::InitializationType aInitializationType) {
+ if (aInitializationType == AudioSink::InitializationType::UNMUTING) {
+ // Consider the stream to be audible immediately, before initialization
+ // finishes when unmuting, in case initialization takes some time and it
+ // looked audible when the AudioSink was created.
+ mAudibleEvent.Notify(mIsAudioDataAudible);
+ SINK_LOG("InitializeAudioStream (Unmuting) notifying that audio is %s",
+ mIsAudioDataAudible ? "audible" : "inaudible");
+ } else {
+ // If not unmuting, the audibility event will be dispatched as usual,
+ // inspecting the audio content as it's being played and signaling the
+ // audibility event when a different in state is detected.
+ SINK_LOG("InitializeAudioStream (initial)");
+ mIsAudioDataAudible = false;
+ }
+
+ // When AudioQueue is empty, there is no way to know the channel layout of
+ // the coming audio data, so we use the predefined channel map instead.
+ AudioConfig::ChannelLayout::ChannelMap channelMap =
+ AudioConfig::ChannelLayout(mOutputChannels).Map();
+ // The layout map used here is already processed by mConverter with
+ // mOutputChannels into SMPTE format, so there is no need to worry if
+ // StaticPrefs::accessibility_monoaudio_enable() or
+ // StaticPrefs::media_forcestereo_enabled() is applied.
+ MOZ_ASSERT(!mAudioStream);
+ mAudioStream =
+ new AudioStream(*this, mOutputRate, mOutputChannels, channelMap);
+ nsresult rv = mAudioStream->Init(aAudioDevice);
+ if (NS_FAILED(rv)) {
+ mAudioStream->Shutdown();
+ mAudioStream = nullptr;
+ return rv;
+ }
+
+ // Set playback params before calling Start() so they can take effect
+ // as soon as the 1st DataCallback of the AudioStream fires.
+ mAudioStream->SetVolume(aParams.mVolume);
+ mAudioStream->SetPlaybackRate(aParams.mPlaybackRate);
+ mAudioStream->SetPreservesPitch(aParams.mPreservesPitch);
+
+ return NS_OK;
+}
+
+nsresult AudioSink::Start(
+ const media::TimeUnit& aStartTime,
+ MozPromiseHolder<MediaSink::EndedPromise>& aEndedPromise) {
+ MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
+
+ mAudioQueueListener = mAudioQueue.PushEvent().Connect(
+ mOwnerThread, this, &AudioSink::OnAudioPushed);
+ mAudioQueueFinishListener = mAudioQueue.FinishEvent().Connect(
+ mOwnerThread, this, &AudioSink::NotifyAudioNeeded);
+ mProcessedQueueListener =
+ mAudioPopped.Connect(mOwnerThread, this, &AudioSink::OnAudioPopped);
+
+ mStartTime = aStartTime;
+
+ // To ensure at least one audio packet will be popped from AudioQueue and
+ // ready to be played.
+ NotifyAudioNeeded();
+
+ return mAudioStream->Start(aEndedPromise);
+}
+
+TimeUnit AudioSink::GetPosition() {
+ int64_t tmp;
+ if (mAudioStream && (tmp = mAudioStream->GetPosition()) >= 0) {
+ TimeUnit pos = TimeUnit::FromMicroseconds(tmp);
+ NS_ASSERTION(pos >= mLastGoodPosition,
+ "AudioStream position shouldn't go backward");
+ TimeUnit tmp = mStartTime + pos;
+ if (!tmp.IsValid()) {
+ mErrored = true;
+ return mStartTime + mLastGoodPosition;
+ }
+ // Update the last good position when we got a good one.
+ if (pos >= mLastGoodPosition) {
+ mLastGoodPosition = pos;
+ }
+ }
+
+ return mStartTime + mLastGoodPosition;
+}
+
+bool AudioSink::HasUnplayedFrames() {
+ // Experimentation suggests that GetPositionInFrames() is zero-indexed,
+ // so we need to add 1 here before comparing it to mWritten.
+ return mProcessedSPSCQueue->AvailableRead() ||
+ (mAudioStream && mAudioStream->GetPositionInFrames() + 1 < mWritten);
+}
+
+TimeUnit AudioSink::UnplayedDuration() const {
+ return TimeUnit::FromMicroseconds(AudioQueuedInRingBufferMS());
+}
+
+void AudioSink::ReenqueueUnplayedAudioDataIfNeeded() {
+ // This is OK: the AudioStream has been shut down. Shutdown guarantees that
+ // the audio callback thread won't call back again.
+ mProcessedSPSCQueue->ResetThreadIds();
+
+ // construct an AudioData
+ int sampleInRingbuffer = mProcessedSPSCQueue->AvailableRead();
+
+ if (!sampleInRingbuffer) {
+ return;
+ }
+
+ uint32_t channelCount;
+ uint32_t rate;
+ if (mConverter) {
+ channelCount = mConverter->OutputConfig().Channels();
+ rate = mConverter->OutputConfig().Rate();
+ } else {
+ channelCount = mOutputChannels;
+ rate = mOutputRate;
+ }
+
+ uint32_t framesRemaining = sampleInRingbuffer / channelCount;
+
+ nsTArray<AlignedAudioBuffer> packetsToReenqueue;
+ RefPtr<AudioData> frontPacket = mAudioQueue.PeekFront();
+ uint32_t offset;
+ TimeUnit time;
+ uint32_t typicalPacketFrameCount;
+ // Extrapolate mOffset, mTime from the front of the queue
+ // We can't really find a good value for `mOffset`, so we take what we have
+ // at the front of the queue.
+ // For `mTime`, assume there hasn't been a discontinuity recently.
+ if (!frontPacket) {
+ // We do our best here, but it's not going to be perfect.
+ typicalPacketFrameCount = 1024; // typical for e.g. AAC
+ offset = 0;
+ time = GetPosition();
+ } else {
+ typicalPacketFrameCount = frontPacket->Frames();
+ offset = frontPacket->mOffset;
+ time = frontPacket->mTime;
+ }
+
+ // Extract all audio data from the ring buffer, we can only read the data from
+ // the most recent, so we reenqueue the data, packetized, in a temporary
+ // array.
+ while (framesRemaining) {
+ uint32_t packetFrameCount =
+ std::min(framesRemaining, typicalPacketFrameCount);
+ framesRemaining -= packetFrameCount;
+
+ int packetSampleCount = packetFrameCount * channelCount;
+ AlignedAudioBuffer packetData(packetSampleCount);
+ DebugOnly<int> samplesRead =
+ mProcessedSPSCQueue->Dequeue(packetData.Data(), packetSampleCount);
+ MOZ_ASSERT(samplesRead == packetSampleCount);
+
+ packetsToReenqueue.AppendElement(packetData);
+ }
+ // Reenqueue in the audio queue in correct order in the audio queue, starting
+ // with the end of the temporary array.
+ while (!packetsToReenqueue.IsEmpty()) {
+ auto packetData = packetsToReenqueue.PopLastElement();
+ uint32_t packetFrameCount = packetData.Length() / channelCount;
+ auto duration = TimeUnit(packetFrameCount, rate);
+ if (!duration.IsValid()) {
+ NS_WARNING("Int overflow in AudioSink");
+ mErrored = true;
+ return;
+ }
+ time -= duration;
+ RefPtr<AudioData> packet =
+ new AudioData(offset, time, std::move(packetData), channelCount, rate);
+ MOZ_DIAGNOSTIC_ASSERT(duration == packet->mDuration, "must be equal");
+
+ SINK_LOG(
+ "Muting: Pushing back %u frames (%lfms) from the ring buffer back into "
+ "the audio queue at pts %lf",
+ packetFrameCount, 1000 * static_cast<float>(packetFrameCount) / rate,
+ time.ToSeconds());
+ // The audio data's timestamp would be adjusted already if we're in looping,
+ // so we don't want to adjust them again.
+ mAudioQueue.PushFront(packet,
+ MediaQueue<AudioData>::TimestampAdjustment::Disable);
+ }
+}
+
+Maybe<MozPromiseHolder<MediaSink::EndedPromise>> AudioSink::Shutdown(
+ ShutdownCause aShutdownCause) {
+ MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
+
+ mAudioQueueListener.DisconnectIfExists();
+ mAudioQueueFinishListener.DisconnectIfExists();
+ mProcessedQueueListener.DisconnectIfExists();
+
+ Maybe<MozPromiseHolder<MediaSink::EndedPromise>> rv;
+
+ if (mAudioStream) {
+ rv = mAudioStream->Shutdown(aShutdownCause);
+ mAudioStream = nullptr;
+ if (aShutdownCause == ShutdownCause::Muting) {
+ ReenqueueUnplayedAudioDataIfNeeded();
+ }
+ }
+ mProcessedQueueFinished = true;
+
+ return rv;
+}
+
+void AudioSink::SetVolume(double aVolume) {
+ if (mAudioStream) {
+ mAudioStream->SetVolume(aVolume);
+ }
+}
+
+void AudioSink::SetStreamName(const nsAString& aStreamName) {
+ if (mAudioStream) {
+ mAudioStream->SetStreamName(aStreamName);
+ }
+}
+
+void AudioSink::SetPlaybackRate(double aPlaybackRate) {
+ MOZ_ASSERT(aPlaybackRate != 0,
+ "Don't set the playbackRate to 0 on AudioStream");
+ if (mAudioStream) {
+ mAudioStream->SetPlaybackRate(aPlaybackRate);
+ }
+}
+
+void AudioSink::SetPreservesPitch(bool aPreservesPitch) {
+ if (mAudioStream) {
+ mAudioStream->SetPreservesPitch(aPreservesPitch);
+ }
+}
+
+void AudioSink::SetPlaying(bool aPlaying) {
+ if (!mAudioStream || mAudioStream->IsPlaybackCompleted() ||
+ mPlaying == aPlaying) {
+ return;
+ }
+ // pause/resume AudioStream as necessary.
+ if (!aPlaying) {
+ mAudioStream->Pause();
+ } else if (aPlaying) {
+ mAudioStream->Resume();
+ }
+ mPlaying = aPlaying;
+}
+
+TimeUnit AudioSink::GetEndTime() const {
+ uint64_t written = mWritten;
+ TimeUnit played = media::TimeUnit(written, mOutputRate) + mStartTime;
+ if (!played.IsValid()) {
+ NS_WARNING("Int overflow calculating audio end time");
+ return TimeUnit::Zero();
+ }
+ // As we may be resampling, rounding errors may occur. Ensure we never get
+ // past the original end time.
+ return std::min(mLastEndTime, played);
+}
+
+uint32_t AudioSink::PopFrames(AudioDataValue* aBuffer, uint32_t aFrames,
+ bool aAudioThreadChanged) {
+ // This is safe, because we have the guarantee, by the OS, that audio
+ // callbacks are never called concurrently. Audio thread changes can only
+ // happen when not using cubeb remoting, and often when changing audio device
+ // at the system level.
+ if (aAudioThreadChanged) {
+ mProcessedSPSCQueue->ResetThreadIds();
+ }
+
+ TRACE_COMMENT("AudioSink::PopFrames", "%u frames (ringbuffer: %u/%u)",
+ aFrames, SampleToFrame(mProcessedSPSCQueue->AvailableRead()),
+ SampleToFrame(mProcessedSPSCQueue->Capacity()));
+
+ const int samplesToPop = static_cast<int>(aFrames * mOutputChannels);
+ const int samplesRead = mProcessedSPSCQueue->Dequeue(aBuffer, samplesToPop);
+ auto sampleOut = samplesRead;
+ MOZ_ASSERT(samplesRead % mOutputChannels == 0);
+ mWritten += SampleToFrame(samplesRead);
+ if (samplesRead != samplesToPop) {
+ if (Ended()) {
+ SINK_LOG("Last PopFrames -- Source ended.");
+ } else if (mTreatUnderrunAsSilence) {
+ SINK_LOG("Treat underrun frames (%u) as silence frames",
+ SampleToFrame(samplesToPop - samplesRead));
+ sampleOut = samplesToPop;
+ } else {
+ NS_WARNING("Underrun when popping samples from audiosink ring buffer.");
+ TRACE_COMMENT("AudioSink::PopFrames", "Underrun %u frames missing",
+ SampleToFrame(samplesToPop - samplesRead));
+ }
+ // silence the rest
+ PodZero(aBuffer + samplesRead, samplesToPop - samplesRead);
+ }
+
+ mAudioPopped.Notify();
+
+ SINK_LOG_V("Popping %u frames. Remaining in ringbuffer %u / %u\n", aFrames,
+ SampleToFrame(mProcessedSPSCQueue->AvailableRead()),
+ SampleToFrame(mProcessedSPSCQueue->Capacity()));
+ CheckIsAudible(Span(aBuffer, sampleOut), mOutputChannels);
+
+ return SampleToFrame(sampleOut);
+}
+
+bool AudioSink::Ended() const {
+ // Return true when error encountered so AudioStream can start draining.
+ // Both atomic so we don't need locking
+ return mProcessedQueueFinished || mErrored;
+}
+
+void AudioSink::CheckIsAudible(const Span<AudioDataValue>& aInterleaved,
+ size_t aChannel) {
+ mAudibilityMonitor.ProcessInterleaved(aInterleaved, aChannel);
+ bool isAudible = mAudibilityMonitor.RecentlyAudible();
+
+ if (isAudible != mIsAudioDataAudible) {
+ mIsAudioDataAudible = isAudible;
+ SINK_LOG("Notifying that audio is now %s",
+ mIsAudioDataAudible ? "audible" : "inaudible");
+ mAudibleEvent.Notify(mIsAudioDataAudible);
+ }
+}
+
+void AudioSink::OnAudioPopped() {
+ SINK_LOG_V("AudioStream has used an audio packet.");
+ NotifyAudioNeeded();
+}
+
+void AudioSink::OnAudioPushed(const RefPtr<AudioData>& aSample) {
+ SINK_LOG_V("One new audio packet available.");
+ NotifyAudioNeeded();
+}
+
+uint32_t AudioSink::AudioQueuedInRingBufferMS() const {
+ return static_cast<uint32_t>(
+ 1000 * SampleToFrame(mProcessedSPSCQueue->AvailableRead()) / mOutputRate);
+}
+
+uint32_t AudioSink::SampleToFrame(uint32_t aSamples) const {
+ return aSamples / mOutputChannels;
+}
+
+void AudioSink::NotifyAudioNeeded() {
+ MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn(),
+ "Not called from the owner's thread");
+
+ while (mAudioQueue.GetSize() &&
+ AudioQueuedInRingBufferMS() <
+ static_cast<uint32_t>(mProcessedQueueThresholdMS)) {
+ // Check if there's room in our ring buffer.
+ if (mAudioQueue.PeekFront()->Frames() >
+ SampleToFrame(mProcessedSPSCQueue->AvailableWrite())) {
+ SINK_LOG_V("Can't push %u frames. In ringbuffer %u / %u\n",
+ mAudioQueue.PeekFront()->Frames(),
+ SampleToFrame(mProcessedSPSCQueue->AvailableRead()),
+ SampleToFrame(mProcessedSPSCQueue->Capacity()));
+ return;
+ }
+ SINK_LOG_V("Pushing %u frames. In ringbuffer %u / %u\n",
+ mAudioQueue.PeekFront()->Frames(),
+ SampleToFrame(mProcessedSPSCQueue->AvailableRead()),
+ SampleToFrame(mProcessedSPSCQueue->Capacity()));
+ RefPtr<AudioData> data = mAudioQueue.PopFront();
+
+ // Ignore the element with 0 frames and try next.
+ if (!data->Frames()) {
+ continue;
+ }
+
+ if (!mConverter ||
+ (data->mRate != mConverter->InputConfig().Rate() ||
+ data->mChannels != mConverter->InputConfig().Channels())) {
+ SINK_LOG_V("Audio format changed from %u@%uHz to %u@%uHz",
+ mConverter ? mConverter->InputConfig().Channels() : 0,
+ mConverter ? mConverter->InputConfig().Rate() : 0,
+ data->mChannels, data->mRate);
+
+ DrainConverter(SampleToFrame(mProcessedSPSCQueue->AvailableWrite()));
+
+ // mFramesParsed indicates the current playtime in frames at the current
+ // input sampling rate. Recalculate it per the new sampling rate.
+ if (mFramesParsed) {
+ // We minimize overflow.
+ uint32_t oldRate = mConverter->InputConfig().Rate();
+ uint32_t newRate = data->mRate;
+ CheckedInt64 result = SaferMultDiv(mFramesParsed, newRate, oldRate);
+ if (!result.isValid()) {
+ NS_WARNING("Int overflow in AudioSink");
+ mErrored = true;
+ return;
+ }
+ mFramesParsed = result.value();
+ }
+
+ const AudioConfig::ChannelLayout inputLayout =
+ data->mChannelMap
+ ? AudioConfig::ChannelLayout::SMPTEDefault(data->mChannelMap)
+ : AudioConfig::ChannelLayout(data->mChannels);
+ const AudioConfig::ChannelLayout outputLayout =
+ mOutputChannels == data->mChannels
+ ? inputLayout
+ : AudioConfig::ChannelLayout(mOutputChannels);
+ AudioConfig inConfig =
+ AudioConfig(inputLayout, data->mChannels, data->mRate);
+ AudioConfig outConfig =
+ AudioConfig(outputLayout, mOutputChannels, mOutputRate);
+ if (!AudioConverter::CanConvert(inConfig, outConfig)) {
+ mErrored = true;
+ return;
+ }
+ mConverter = MakeUnique<AudioConverter>(inConfig, outConfig);
+ }
+
+ // See if there's a gap in the audio. If there is, push silence into the
+ // audio hardware, so we can play across the gap.
+ // Calculate the timestamp of the next chunk of audio in numbers of
+ // samples.
+ CheckedInt64 sampleTime =
+ TimeUnitToFrames(data->mTime - mStartTime, data->mRate);
+ // Calculate the number of frames that have been pushed onto the audio
+ // hardware.
+ CheckedInt64 missingFrames = sampleTime - mFramesParsed;
+
+ if (!missingFrames.isValid() || !sampleTime.isValid()) {
+ NS_WARNING("Int overflow in AudioSink");
+ mErrored = true;
+ return;
+ }
+
+ if (missingFrames.value() > AUDIO_FUZZ_FRAMES) {
+ // The next audio packet begins some time after the end of the last packet
+ // we pushed to the audio hardware. We must push silence into the audio
+ // hardware so that the next audio packet begins playback at the correct
+ // time. But don't push more than the ring buffer can receive.
+ missingFrames = std::min<int64_t>(
+ std::min<int64_t>(INT32_MAX, missingFrames.value()),
+ SampleToFrame(mProcessedSPSCQueue->AvailableWrite()));
+ mFramesParsed += missingFrames.value();
+
+ SINK_LOG("Gap in the audio input, push %" PRId64 " frames of silence",
+ missingFrames.value());
+
+ RefPtr<AudioData> silenceData;
+ AlignedAudioBuffer silenceBuffer(missingFrames.value() * data->mChannels);
+ if (!silenceBuffer) {
+ NS_WARNING("OOM in AudioSink");
+ mErrored = true;
+ return;
+ }
+ if (mConverter->InputConfig() != mConverter->OutputConfig()) {
+ AlignedAudioBuffer convertedData =
+ mConverter->Process(AudioSampleBuffer(std::move(silenceBuffer)))
+ .Forget();
+ silenceData = CreateAudioFromBuffer(std::move(convertedData), data);
+ } else {
+ silenceData = CreateAudioFromBuffer(std::move(silenceBuffer), data);
+ }
+ TRACE("Pushing silence");
+ PushProcessedAudio(silenceData);
+ }
+
+ mLastEndTime = data->GetEndTime();
+ mFramesParsed += data->Frames();
+
+ if (mConverter->InputConfig() != mConverter->OutputConfig()) {
+ AlignedAudioBuffer buffer(data->MoveableData());
+ AlignedAudioBuffer convertedData =
+ mConverter->Process(AudioSampleBuffer(std::move(buffer))).Forget();
+ data = CreateAudioFromBuffer(std::move(convertedData), data);
+ }
+ if (PushProcessedAudio(data)) {
+ mLastProcessedPacket = Some(data);
+ }
+ }
+
+ if (mAudioQueue.IsFinished() && mAudioQueue.GetSize() == 0) {
+ // We have reached the end of the data, drain the resampler.
+ DrainConverter(SampleToFrame(mProcessedSPSCQueue->AvailableWrite()));
+ mProcessedQueueFinished = true;
+ }
+}
+
+uint32_t AudioSink::PushProcessedAudio(AudioData* aData) {
+ if (!aData || !aData->Frames()) {
+ return 0;
+ }
+ int framesToEnqueue = static_cast<int>(aData->Frames() * aData->mChannels);
+ TRACE_COMMENT("AudioSink::PushProcessedAudio", "%u frames (%u/%u)",
+ framesToEnqueue,
+ SampleToFrame(mProcessedSPSCQueue->AvailableWrite()),
+ SampleToFrame(mProcessedSPSCQueue->Capacity()));
+ DebugOnly<int> rv =
+ mProcessedSPSCQueue->Enqueue(aData->Data().Elements(), framesToEnqueue);
+ NS_WARNING_ASSERTION(
+ rv == static_cast<int>(aData->Frames() * aData->mChannels),
+ "AudioSink ring buffer over-run, can't push new data");
+ return aData->Frames();
+}
+
+already_AddRefed<AudioData> AudioSink::CreateAudioFromBuffer(
+ AlignedAudioBuffer&& aBuffer, AudioData* aReference) {
+ uint32_t frames = SampleToFrame(aBuffer.Length());
+ if (!frames) {
+ return nullptr;
+ }
+ auto duration = media::TimeUnit(frames, mOutputRate);
+ if (!duration.IsValid()) {
+ NS_WARNING("Int overflow in AudioSink");
+ mErrored = true;
+ return nullptr;
+ }
+ RefPtr<AudioData> data =
+ new AudioData(aReference->mOffset, aReference->mTime, std::move(aBuffer),
+ mOutputChannels, mOutputRate);
+ MOZ_DIAGNOSTIC_ASSERT(duration == data->mDuration, "must be equal");
+ return data.forget();
+}
+
+uint32_t AudioSink::DrainConverter(uint32_t aMaxFrames) {
+ MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
+
+ if (!mConverter || !mLastProcessedPacket || !aMaxFrames) {
+ // nothing to drain.
+ return 0;
+ }
+
+ RefPtr<AudioData> lastPacket = mLastProcessedPacket.ref();
+ mLastProcessedPacket.reset();
+
+ // To drain we simply provide an empty packet to the audio converter.
+ AlignedAudioBuffer convertedData =
+ mConverter->Process(AudioSampleBuffer(AlignedAudioBuffer())).Forget();
+
+ uint32_t frames = SampleToFrame(convertedData.Length());
+ if (!convertedData.SetLength(std::min(frames, aMaxFrames) *
+ mOutputChannels)) {
+ // This can never happen as we were reducing the length of convertData.
+ mErrored = true;
+ return 0;
+ }
+
+ RefPtr<AudioData> data =
+ CreateAudioFromBuffer(std::move(convertedData), lastPacket);
+ return PushProcessedAudio(data);
+}
+
+void AudioSink::GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) {
+ MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
+ aInfo.mAudioSinkWrapper.mAudioSink.mStartTime = mStartTime.ToMicroseconds();
+ aInfo.mAudioSinkWrapper.mAudioSink.mLastGoodPosition =
+ mLastGoodPosition.ToMicroseconds();
+ aInfo.mAudioSinkWrapper.mAudioSink.mIsPlaying = mPlaying;
+ aInfo.mAudioSinkWrapper.mAudioSink.mOutputRate = mOutputRate;
+ aInfo.mAudioSinkWrapper.mAudioSink.mWritten = mWritten;
+ aInfo.mAudioSinkWrapper.mAudioSink.mHasErrored = bool(mErrored);
+ aInfo.mAudioSinkWrapper.mAudioSink.mPlaybackComplete =
+ mAudioStream ? mAudioStream->IsPlaybackCompleted() : false;
+}
+
+void AudioSink::EnableTreatAudioUnderrunAsSilence(bool aEnabled) {
+ SINK_LOG("set mTreatUnderrunAsSilence=%d", aEnabled);
+ mTreatUnderrunAsSilence = aEnabled;
+}
+
+} // namespace mozilla
diff --git a/dom/media/mediasink/AudioSink.h b/dom/media/mediasink/AudioSink.h
new file mode 100644
index 0000000000..856227ee4c
--- /dev/null
+++ b/dom/media/mediasink/AudioSink.h
@@ -0,0 +1,188 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef AudioSink_h__
+#define AudioSink_h__
+
+#include "AudioStream.h"
+#include "AudibilityMonitor.h"
+#include "MediaEventSource.h"
+#include "MediaInfo.h"
+#include "MediaQueue.h"
+#include "MediaSink.h"
+#include "mozilla/Atomics.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/Monitor.h"
+#include "mozilla/MozPromise.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/Result.h"
+#include "nsISupportsImpl.h"
+
+namespace mozilla {
+
+class AudioConverter;
+
+class AudioSink : private AudioStream::DataSource {
+ public:
+ enum class InitializationType {
+ // This AudioSink is being initialized for the first time
+ INITIAL,
+ UNMUTING
+ };
+ struct PlaybackParams {
+ PlaybackParams(double aVolume, double aPlaybackRate, bool aPreservesPitch)
+ : mVolume(aVolume),
+ mPlaybackRate(aPlaybackRate),
+ mPreservesPitch(aPreservesPitch) {}
+ double mVolume;
+ double mPlaybackRate;
+ bool mPreservesPitch;
+ };
+
+ AudioSink(AbstractThread* aThread, MediaQueue<AudioData>& aAudioQueue,
+ const AudioInfo& aInfo, bool aShouldResistFingerprinting);
+
+ ~AudioSink();
+
+ // Allocate and initialize mAudioStream. Returns NS_OK on success.
+ nsresult InitializeAudioStream(const PlaybackParams& aParams,
+ const RefPtr<AudioDeviceInfo>& aAudioDevice,
+ InitializationType aInitializationType);
+
+ // Start audio playback.
+ nsresult Start(const media::TimeUnit& aStartTime,
+ MozPromiseHolder<MediaSink::EndedPromise>& aEndedPromise);
+
+ /*
+ * All public functions are not thread-safe.
+ * Called on the task queue of MDSM only.
+ */
+ media::TimeUnit GetPosition();
+ media::TimeUnit GetEndTime() const;
+
+ // Check whether we've pushed more frames to the audio stream than it
+ // has played.
+ bool HasUnplayedFrames();
+
+ // The duration of the buffered frames.
+ media::TimeUnit UnplayedDuration() const;
+
+ // Shut down the AudioSink's resources. If an AudioStream existed, return the
+ // ended promise it had, if it's shutting down-mid stream becaues it's muting.
+ Maybe<MozPromiseHolder<MediaSink::EndedPromise>> Shutdown(
+ ShutdownCause aShutdownCause = ShutdownCause::Regular);
+
+ void SetVolume(double aVolume);
+ void SetStreamName(const nsAString& aStreamName);
+ void SetPlaybackRate(double aPlaybackRate);
+ void SetPreservesPitch(bool aPreservesPitch);
+ void SetPlaying(bool aPlaying);
+
+ MediaEventSource<bool>& AudibleEvent() { return mAudibleEvent; }
+
+ void GetDebugInfo(dom::MediaSinkDebugInfo& aInfo);
+
+ // This returns true if the audio callbacks are being called, and so the
+ // audio stream-based clock is moving forward.
+ bool AudioStreamCallbackStarted() {
+ return mAudioStream && mAudioStream->CallbackStarted();
+ }
+
+ void UpdateStartTime(const media::TimeUnit& aStartTime) {
+ mStartTime = aStartTime;
+ }
+
+ void EnableTreatAudioUnderrunAsSilence(bool aEnabled);
+
+ private:
+ // Interface of AudioStream::DataSource.
+ // Called on the callback thread of cubeb. Returns the number of frames that
+ // were available.
+ uint32_t PopFrames(AudioDataValue* aBuffer, uint32_t aFrames,
+ bool aAudioThreadChanged) override;
+ bool Ended() const override;
+
+ // When shutting down, it's important to not lose any audio data, it might be
+ // still of use, in two scenarios:
+ // - If the audio is now captured to a MediaStream, whatever is enqueued in
+ // the ring buffer needs to be played out now ;
+ // - If the AudioSink is shutting down because the audio is muted, it's
+ // important to keep the audio around in case it's quickly unmuted,
+ // and in general to keep A/V sync correct when unmuted.
+ void ReenqueueUnplayedAudioDataIfNeeded();
+
+ void CheckIsAudible(const Span<AudioDataValue>& aInterleaved,
+ size_t aChannel);
+
+ // The audio stream resource. Used on the task queue of MDSM only.
+ RefPtr<AudioStream> mAudioStream;
+
+ // The presentation time of the first audio frame that was played.
+ // We can add this to the audio stream position to determine
+ // the current audio time.
+ media::TimeUnit mStartTime;
+
+ // Keep the last good position returned from the audio stream. Used to ensure
+ // position returned by GetPosition() is mono-increasing in spite of audio
+ // stream error. Used on the task queue of MDSM only.
+ media::TimeUnit mLastGoodPosition;
+
+ // Used on the task queue of MDSM only.
+ bool mPlaying;
+
+ // PCM frames written to the stream so far. Written on the callback thread,
+ // read on the MDSM thread.
+ Atomic<int64_t> mWritten;
+
+ // True if there is any error in processing audio data like overflow.
+ Atomic<bool> mErrored;
+
+ const RefPtr<AbstractThread> mOwnerThread;
+
+ // Audio Processing objects and methods
+ void OnAudioPopped();
+ void OnAudioPushed(const RefPtr<AudioData>& aSample);
+ void NotifyAudioNeeded();
+ // Drain the converter and add the output to the processed audio queue.
+ // A maximum of aMaxFrames will be added.
+ uint32_t DrainConverter(uint32_t aMaxFrames = UINT32_MAX);
+ already_AddRefed<AudioData> CreateAudioFromBuffer(
+ AlignedAudioBuffer&& aBuffer, AudioData* aReference);
+ // Add data to the processsed queue return the number of frames added.
+ uint32_t PushProcessedAudio(AudioData* aData);
+ uint32_t AudioQueuedInRingBufferMS() const;
+ uint32_t SampleToFrame(uint32_t aSamples) const;
+ UniquePtr<AudioConverter> mConverter;
+ UniquePtr<SPSCQueue<AudioDataValue>> mProcessedSPSCQueue;
+ MediaEventListener mAudioQueueListener;
+ MediaEventListener mAudioQueueFinishListener;
+ MediaEventListener mProcessedQueueListener;
+ // Number of frames processed from mAudioQueue. Used to determine gaps in
+ // the input stream. It indicates the time in frames since playback started
+ // at the current input framerate.
+ int64_t mFramesParsed;
+ Maybe<RefPtr<AudioData>> mLastProcessedPacket;
+ media::TimeUnit mLastEndTime;
+ // Never modifed after construction.
+ uint32_t mOutputRate;
+ uint32_t mOutputChannels;
+ AudibilityMonitor mAudibilityMonitor;
+ bool mIsAudioDataAudible;
+ MediaEventProducer<bool> mAudibleEvent;
+ // Only signed on the real-time audio thread.
+ MediaEventProducer<void> mAudioPopped;
+
+ Atomic<bool> mProcessedQueueFinished;
+ MediaQueue<AudioData>& mAudioQueue;
+ const float mProcessedQueueThresholdMS;
+
+ // True if we'd like to treat underrun as silent frames. But that can only be
+ // applied in the special situation for seamless looping.
+ bool mTreatUnderrunAsSilence = false;
+};
+
+} // namespace mozilla
+
+#endif // AudioSink_h__
diff --git a/dom/media/mediasink/AudioSinkWrapper.cpp b/dom/media/mediasink/AudioSinkWrapper.cpp
new file mode 100644
index 0000000000..5a006479e1
--- /dev/null
+++ b/dom/media/mediasink/AudioSinkWrapper.cpp
@@ -0,0 +1,496 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "AudioSinkWrapper.h"
+#include "AudioDeviceInfo.h"
+#include "AudioSink.h"
+#include "VideoUtils.h"
+#include "mozilla/Logging.h"
+#include "mozilla/Result.h"
+#include "nsPrintfCString.h"
+
+mozilla::LazyLogModule gAudioSinkWrapperLog("AudioSinkWrapper");
+#define LOG(...) \
+ MOZ_LOG(gAudioSinkWrapperLog, mozilla::LogLevel::Debug, (__VA_ARGS__));
+#define LOGV(...) \
+ MOZ_LOG(gAudioSinkWrapperLog, mozilla::LogLevel::Verbose, (__VA_ARGS__));
+
+namespace mozilla {
+
+using media::TimeUnit;
+
+AudioSinkWrapper::~AudioSinkWrapper() = default;
+
+void AudioSinkWrapper::Shutdown() {
+ AssertOwnerThread();
+ MOZ_ASSERT(!mIsStarted, "Must be called after playback stopped.");
+ mCreator = nullptr;
+ mEndedPromiseHolder.ResolveIfExists(true, __func__);
+}
+
+RefPtr<MediaSink::EndedPromise> AudioSinkWrapper::OnEnded(TrackType aType) {
+ AssertOwnerThread();
+ MOZ_ASSERT(mIsStarted, "Must be called after playback starts.");
+ if (aType == TrackInfo::kAudioTrack) {
+ return mEndedPromise;
+ }
+ return nullptr;
+}
+
+TimeUnit AudioSinkWrapper::GetEndTime(TrackType aType) const {
+ AssertOwnerThread();
+ MOZ_ASSERT(mIsStarted, "Must be called after playback starts.");
+ if (aType == TrackInfo::kAudioTrack && mAudioSink &&
+ mAudioSink->AudioStreamCallbackStarted()) {
+ return mAudioSink->GetEndTime();
+ }
+
+ if (aType == TrackInfo::kAudioTrack && !mAudioSink && IsMuted()) {
+ if (IsPlaying()) {
+ return GetSystemClockPosition(TimeStamp::Now());
+ }
+
+ return mPlayDuration;
+ }
+ return TimeUnit::Zero();
+}
+
+TimeUnit AudioSinkWrapper::GetSystemClockPosition(TimeStamp aNow) const {
+ AssertOwnerThread();
+ MOZ_ASSERT(!mPlayStartTime.IsNull());
+ // Time elapsed since we started playing.
+ double delta = (aNow - mPlayStartTime).ToSeconds();
+ // Take playback rate into account.
+ return mPlayDuration + TimeUnit::FromSeconds(delta * mParams.mPlaybackRate);
+}
+
+bool AudioSinkWrapper::IsMuted() const {
+ AssertOwnerThread();
+ return mParams.mVolume == 0.0;
+}
+
+TimeUnit AudioSinkWrapper::GetPosition(TimeStamp* aTimeStamp) {
+ AssertOwnerThread();
+ MOZ_ASSERT(mIsStarted, "Must be called after playback starts.");
+
+ TimeUnit pos;
+ TimeStamp t = TimeStamp::Now();
+
+ if (!mAudioEnded && !IsMuted() && mAudioSink) {
+ if (mLastClockSource == ClockSource::SystemClock) {
+ TimeUnit switchTime = GetSystemClockPosition(t);
+ // Update the _actual_ start time of the audio stream now that it has
+ // started, preventing any clock discontinuity.
+ mAudioSink->UpdateStartTime(switchTime);
+ LOGV("%p: switching to audio clock at media time %lf", this,
+ switchTime.ToSeconds());
+ }
+ // Rely on the audio sink to report playback position when it is not ended.
+ pos = mAudioSink->GetPosition();
+ LOGV("%p: Getting position from the Audio Sink %lf", this, pos.ToSeconds());
+ mLastClockSource = ClockSource::AudioStream;
+ } else if (!mPlayStartTime.IsNull()) {
+ // Calculate playback position using system clock if we are still playing,
+ // but not rendering the audio, because this audio sink is muted.
+ pos = GetSystemClockPosition(t);
+ LOGV("%p: Getting position from the system clock %lf", this,
+ pos.ToSeconds());
+ if (IsMuted()) {
+ if (mAudioQueue.GetSize() > 0) {
+ // audio track, but it's muted and won't be dequeued, discard packets
+ // that are behind the current media time, to keep the queue size under
+ // control.
+ DropAudioPacketsIfNeeded(pos);
+ }
+ // If muted, it's necessary to manually check if the audio has "ended",
+ // meaning that all the audio packets have been consumed, to resolve the
+ // ended promise.
+ if (CheckIfEnded()) {
+ MOZ_ASSERT(!mAudioSink);
+ mEndedPromiseHolder.ResolveIfExists(true, __func__);
+ }
+ }
+ mLastClockSource = ClockSource::SystemClock;
+ } else {
+ // Return how long we've played if we are not playing.
+ pos = mPlayDuration;
+ LOGV("%p: Getting static position, not playing %lf", this, pos.ToSeconds());
+ mLastClockSource = ClockSource::Paused;
+ }
+
+ if (aTimeStamp) {
+ *aTimeStamp = t;
+ }
+
+ return pos;
+}
+
+bool AudioSinkWrapper::CheckIfEnded() const {
+ return mAudioQueue.IsFinished() && mAudioQueue.GetSize() == 0u;
+}
+
+bool AudioSinkWrapper::HasUnplayedFrames(TrackType aType) const {
+ AssertOwnerThread();
+ return mAudioSink ? mAudioSink->HasUnplayedFrames() : false;
+}
+
+media::TimeUnit AudioSinkWrapper::UnplayedDuration(TrackType aType) const {
+ AssertOwnerThread();
+ return mAudioSink ? mAudioSink->UnplayedDuration() : media::TimeUnit::Zero();
+}
+
+void AudioSinkWrapper::DropAudioPacketsIfNeeded(
+ const TimeUnit& aMediaPosition) {
+ RefPtr<AudioData> audio = mAudioQueue.PeekFront();
+ uint32_t dropped = 0;
+ while (audio && audio->mTime + audio->mDuration < aMediaPosition) {
+ // drop this packet, try the next one
+ audio = mAudioQueue.PopFront();
+ dropped++;
+ if (audio) {
+ LOGV(
+ "Dropping audio packets: media position: %lf, "
+ "packet dropped: [%lf, %lf] (%u so far).\n",
+ aMediaPosition.ToSeconds(), audio->mTime.ToSeconds(),
+ (audio->GetEndTime()).ToSeconds(), dropped);
+ }
+ audio = mAudioQueue.PeekFront();
+ }
+}
+
+void AudioSinkWrapper::OnMuted(bool aMuted) {
+ AssertOwnerThread();
+ LOG("%p: AudioSinkWrapper::OnMuted(%s)", this, aMuted ? "true" : "false");
+ // Nothing to do
+ if (mAudioEnded) {
+ LOG("%p: AudioSinkWrapper::OnMuted, but no audio track", this);
+ return;
+ }
+ if (aMuted) {
+ if (mAudioSink) {
+ LOG("AudioSinkWrapper muted, shutting down AudioStream.");
+ mAudioSinkEndedPromise.DisconnectIfExists();
+ if (IsPlaying()) {
+ mPlayDuration = mAudioSink->GetPosition();
+ mPlayStartTime = TimeStamp::Now();
+ }
+ Maybe<MozPromiseHolder<MediaSink::EndedPromise>> rv =
+ mAudioSink->Shutdown(ShutdownCause::Muting);
+ // There will generally be a promise here, except if the stream has
+ // errored out, or if it has just finished. In both cases, the promise has
+ // been handled appropriately, there is nothing to do.
+ if (rv.isSome()) {
+ mEndedPromiseHolder = std::move(rv.ref());
+ }
+ mAudioSink = nullptr;
+ }
+ } else {
+ if (!IsPlaying()) {
+ LOG("%p: AudioSinkWrapper::OnMuted: not playing, not re-creating an "
+ "AudioSink",
+ this);
+ return;
+ }
+ LOG("%p: AudioSinkWrapper unmuted, re-creating an AudioStream.", this);
+ TimeUnit mediaPosition = GetSystemClockPosition(TimeStamp::Now());
+ nsresult rv = StartAudioSink(mediaPosition, AudioSinkStartPolicy::ASYNC);
+ if (NS_FAILED(rv)) {
+ NS_WARNING(
+ "Could not start AudioSink from AudioSinkWrapper when unmuting");
+ }
+ }
+}
+
+void AudioSinkWrapper::SetVolume(double aVolume) {
+ AssertOwnerThread();
+
+ bool wasMuted = mParams.mVolume == 0;
+ bool nowMuted = aVolume == 0.;
+ mParams.mVolume = aVolume;
+
+ if (!wasMuted && nowMuted) {
+ OnMuted(true);
+ } else if (wasMuted && !nowMuted) {
+ OnMuted(false);
+ }
+
+ if (mAudioSink) {
+ mAudioSink->SetVolume(aVolume);
+ }
+}
+
+void AudioSinkWrapper::SetStreamName(const nsAString& aStreamName) {
+ AssertOwnerThread();
+ if (mAudioSink) {
+ mAudioSink->SetStreamName(aStreamName);
+ }
+}
+
+void AudioSinkWrapper::SetPlaybackRate(double aPlaybackRate) {
+ AssertOwnerThread();
+ if (!mAudioEnded && mAudioSink) {
+ // Pass the playback rate to the audio sink. The underlying AudioStream
+ // will handle playback rate changes and report correct audio position.
+ mAudioSink->SetPlaybackRate(aPlaybackRate);
+ } else if (!mPlayStartTime.IsNull()) {
+ // Adjust playback duration and start time when we are still playing.
+ TimeStamp now = TimeStamp::Now();
+ mPlayDuration = GetSystemClockPosition(now);
+ mPlayStartTime = now;
+ }
+ // mParams.mPlaybackRate affects GetSystemClockPosition(). It should be
+ // updated after the calls to GetSystemClockPosition();
+ mParams.mPlaybackRate = aPlaybackRate;
+
+ // Do nothing when not playing. Changes in playback rate will be taken into
+ // account by GetSystemClockPosition().
+}
+
+void AudioSinkWrapper::SetPreservesPitch(bool aPreservesPitch) {
+ AssertOwnerThread();
+ mParams.mPreservesPitch = aPreservesPitch;
+ if (mAudioSink) {
+ mAudioSink->SetPreservesPitch(aPreservesPitch);
+ }
+}
+
+void AudioSinkWrapper::SetPlaying(bool aPlaying) {
+ AssertOwnerThread();
+ LOG("%p: AudioSinkWrapper::SetPlaying %s", this, aPlaying ? "true" : "false");
+
+ // Resume/pause matters only when playback started.
+ if (!mIsStarted) {
+ return;
+ }
+
+ if (mAudioSink) {
+ mAudioSink->SetPlaying(aPlaying);
+ } else {
+ if (aPlaying) {
+ LOG("%p: AudioSinkWrapper::SetPlaying : starting an AudioSink", this);
+ TimeUnit switchTime = GetPosition();
+ DropAudioPacketsIfNeeded(switchTime);
+ StartAudioSink(switchTime, AudioSinkStartPolicy::SYNC);
+ }
+ }
+
+ if (aPlaying) {
+ MOZ_ASSERT(mPlayStartTime.IsNull());
+ mPlayStartTime = TimeStamp::Now();
+ } else {
+ // Remember how long we've played.
+ mPlayDuration = GetPosition();
+ // mPlayStartTime must be updated later since GetPosition()
+ // depends on the value of mPlayStartTime.
+ mPlayStartTime = TimeStamp();
+ }
+}
+
+double AudioSinkWrapper::PlaybackRate() const {
+ AssertOwnerThread();
+ return mParams.mPlaybackRate;
+}
+
+nsresult AudioSinkWrapper::Start(const TimeUnit& aStartTime,
+ const MediaInfo& aInfo) {
+ LOG("%p AudioSinkWrapper::Start", this);
+ AssertOwnerThread();
+ MOZ_ASSERT(!mIsStarted, "playback already started.");
+
+ mIsStarted = true;
+ mPlayDuration = aStartTime;
+ mPlayStartTime = TimeStamp::Now();
+ mAudioEnded = IsAudioSourceEnded(aInfo);
+
+ if (mAudioEnded) {
+ // Resolve promise if we start playback at the end position of the audio.
+ mEndedPromise =
+ aInfo.HasAudio()
+ ? MediaSink::EndedPromise::CreateAndResolve(true, __func__)
+ : nullptr;
+ return NS_OK;
+ }
+
+ return StartAudioSink(aStartTime, AudioSinkStartPolicy::SYNC);
+}
+
+nsresult AudioSinkWrapper::StartAudioSink(const TimeUnit& aStartTime,
+ AudioSinkStartPolicy aPolicy) {
+ MOZ_RELEASE_ASSERT(!mAudioSink);
+
+ nsresult rv = NS_OK;
+
+ mAudioSinkEndedPromise.DisconnectIfExists();
+ mEndedPromise = mEndedPromiseHolder.Ensure(__func__);
+ mEndedPromise
+ ->Then(mOwnerThread.get(), __func__, this,
+ &AudioSinkWrapper::OnAudioEnded, &AudioSinkWrapper::OnAudioEnded)
+ ->Track(mAudioSinkEndedPromise);
+
+ LOG("%p: AudioSinkWrapper::StartAudioSink (%s)", this,
+ aPolicy == AudioSinkStartPolicy::ASYNC ? "Async" : "Sync");
+
+ if (IsMuted()) {
+ LOG("%p: Muted: not starting an audio sink", this);
+ return NS_OK;
+ }
+ LOG("%p: Not muted: starting a new audio sink", this);
+ if (aPolicy == AudioSinkStartPolicy::ASYNC) {
+ UniquePtr<AudioSink> audioSink;
+ audioSink.reset(mCreator->Create());
+ NS_DispatchBackgroundTask(NS_NewRunnableFunction(
+ "StartAudioSink (Async part: initialization)",
+ [self = RefPtr<AudioSinkWrapper>(this), audioSink{std::move(audioSink)},
+ this]() mutable {
+ LOG("AudioSink initialization on background thread");
+ // This can take about 200ms, e.g. on Windows, we don't want to do
+ // it on the MDSM thread, because it would make the clock not update
+ // for that amount of time, and the video would therefore not
+ // update. The Start() call is very cheap on the other hand, we can
+ // do it from the MDSM thread.
+ nsresult rv = audioSink->InitializeAudioStream(
+ mParams, mAudioDevice, AudioSink::InitializationType::UNMUTING);
+ mOwnerThread->Dispatch(NS_NewRunnableFunction(
+ "StartAudioSink (Async part: start from MDSM thread)",
+ [self = RefPtr<AudioSinkWrapper>(this),
+ audioSink{std::move(audioSink)}, this, rv]() mutable {
+ LOG("AudioSink async init done, back on MDSM thread");
+ if (NS_FAILED(rv)) {
+ LOG("Async AudioSink initialization failed");
+ mEndedPromiseHolder.RejectIfExists(rv, __func__);
+ return;
+ }
+
+ // It's possible that the newly created isn't needed at this
+ // point, in some cases:
+ // 1. An AudioSink was created synchronously while this
+ // AudioSink was initialized asynchronously, bail out here. This
+ // happens when seeking (which does a synchronous
+ // initialization) right after unmuting.
+ // 2. The media element was muted while the async initialization
+ // was happening.
+ // 3. The AudioSinkWrapper was stopped during asynchronous
+ // creation.
+ // 4. The AudioSinkWrapper was paused during asynchronous
+ // creation.
+ if (mAudioSink || IsMuted() || !mIsStarted ||
+ mPlayStartTime.IsNull()) {
+ LOG("AudioSink initialized async isn't needed, shutting "
+ "it down.");
+ DebugOnly<Maybe<MozPromiseHolder<EndedPromise>>> rv =
+ audioSink->Shutdown();
+ MOZ_ASSERT(rv.inspect().isNothing());
+ return;
+ }
+
+ MOZ_ASSERT(!mAudioSink);
+ TimeUnit switchTime = GetPosition();
+ DropAudioPacketsIfNeeded(switchTime);
+ mAudioSink.swap(audioSink);
+ if (mTreatUnderrunAsSilence) {
+ mAudioSink->EnableTreatAudioUnderrunAsSilence(
+ mTreatUnderrunAsSilence);
+ }
+ LOG("AudioSink async, start");
+ nsresult rv2 =
+ mAudioSink->Start(switchTime, mEndedPromiseHolder);
+ if (NS_FAILED(rv2)) {
+ LOG("Async AudioSinkWrapper start failed");
+ mEndedPromiseHolder.RejectIfExists(rv2, __func__);
+ }
+ }));
+ }));
+ } else {
+ mAudioSink.reset(mCreator->Create());
+ nsresult rv = mAudioSink->InitializeAudioStream(
+ mParams, mAudioDevice, AudioSink::InitializationType::INITIAL);
+ if (NS_FAILED(rv)) {
+ mEndedPromiseHolder.RejectIfExists(rv, __func__);
+ LOG("Sync AudioSinkWrapper initialization failed");
+ return rv;
+ }
+ if (mTreatUnderrunAsSilence) {
+ mAudioSink->EnableTreatAudioUnderrunAsSilence(mTreatUnderrunAsSilence);
+ }
+ rv = mAudioSink->Start(aStartTime, mEndedPromiseHolder);
+ if (NS_FAILED(rv)) {
+ LOG("Sync AudioSinkWrapper start failed");
+ mEndedPromiseHolder.RejectIfExists(rv, __func__);
+ }
+ }
+
+ return rv;
+}
+
+bool AudioSinkWrapper::IsAudioSourceEnded(const MediaInfo& aInfo) const {
+ // no audio or empty audio queue which won't get data anymore is equivalent to
+ // audio ended
+ return !aInfo.HasAudio() ||
+ (mAudioQueue.IsFinished() && mAudioQueue.GetSize() == 0u);
+}
+
+void AudioSinkWrapper::Stop() {
+ AssertOwnerThread();
+ MOZ_ASSERT(mIsStarted, "playback not started.");
+
+ LOG("%p: AudioSinkWrapper::Stop", this);
+
+ mIsStarted = false;
+ mAudioEnded = true;
+
+ mAudioSinkEndedPromise.DisconnectIfExists();
+
+ if (mAudioSink) {
+ DebugOnly<Maybe<MozPromiseHolder<EndedPromise>>> rv =
+ mAudioSink->Shutdown();
+ MOZ_ASSERT(rv.inspect().isNothing());
+ mAudioSink = nullptr;
+ mEndedPromise = nullptr;
+ }
+}
+
+bool AudioSinkWrapper::IsStarted() const {
+ AssertOwnerThread();
+ return mIsStarted;
+}
+
+bool AudioSinkWrapper::IsPlaying() const {
+ AssertOwnerThread();
+ return IsStarted() && !mPlayStartTime.IsNull();
+}
+
+void AudioSinkWrapper::OnAudioEnded() {
+ AssertOwnerThread();
+ LOG("%p: AudioSinkWrapper::OnAudioEnded", this);
+ mAudioSinkEndedPromise.Complete();
+ mPlayDuration = GetPosition();
+ if (!mPlayStartTime.IsNull()) {
+ mPlayStartTime = TimeStamp::Now();
+ }
+ mAudioEnded = true;
+}
+
+void AudioSinkWrapper::GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) {
+ AssertOwnerThread();
+ aInfo.mAudioSinkWrapper.mIsPlaying = IsPlaying();
+ aInfo.mAudioSinkWrapper.mIsStarted = IsStarted();
+ aInfo.mAudioSinkWrapper.mAudioEnded = mAudioEnded;
+ if (mAudioSink) {
+ mAudioSink->GetDebugInfo(aInfo);
+ }
+}
+
+void AudioSinkWrapper::EnableTreatAudioUnderrunAsSilence(bool aEnabled) {
+ mTreatUnderrunAsSilence = aEnabled;
+ if (mAudioSink) {
+ mAudioSink->EnableTreatAudioUnderrunAsSilence(aEnabled);
+ }
+}
+
+} // namespace mozilla
+
+#undef LOG
+#undef LOGV
diff --git a/dom/media/mediasink/AudioSinkWrapper.h b/dom/media/mediasink/AudioSinkWrapper.h
new file mode 100644
index 0000000000..411983c526
--- /dev/null
+++ b/dom/media/mediasink/AudioSinkWrapper.h
@@ -0,0 +1,161 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef AudioSinkWrapper_h_
+#define AudioSinkWrapper_h_
+
+#include "mozilla/AbstractThread.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/UniquePtr.h"
+
+#include "AudioSink.h"
+#include "MediaSink.h"
+
+namespace mozilla {
+class MediaData;
+template <class T>
+class MediaQueue;
+
+/**
+ * A wrapper around AudioSink to provide the interface of MediaSink.
+ */
+class AudioSinkWrapper : public MediaSink {
+ using PlaybackParams = AudioSink::PlaybackParams;
+
+ // An AudioSink factory.
+ class Creator {
+ public:
+ virtual ~Creator() = default;
+ virtual AudioSink* Create() = 0;
+ };
+
+ // Wrap around a function object which creates AudioSinks.
+ template <typename Function>
+ class CreatorImpl : public Creator {
+ public:
+ explicit CreatorImpl(const Function& aFunc) : mFunction(aFunc) {}
+ AudioSink* Create() override { return mFunction(); }
+
+ private:
+ Function mFunction;
+ };
+
+ public:
+ template <typename Function>
+ AudioSinkWrapper(AbstractThread* aOwnerThread,
+ MediaQueue<AudioData>& aAudioQueue, const Function& aFunc,
+ double aVolume, double aPlaybackRate, bool aPreservesPitch,
+ RefPtr<AudioDeviceInfo> aAudioDevice)
+ : mOwnerThread(aOwnerThread),
+ mCreator(new CreatorImpl<Function>(aFunc)),
+ mAudioDevice(std::move(aAudioDevice)),
+ mIsStarted(false),
+ mParams(aVolume, aPlaybackRate, aPreservesPitch),
+ // Give an invalid value to facilitate debug if used before playback
+ // starts.
+ mPlayDuration(media::TimeUnit::Invalid()),
+ mAudioEnded(true),
+ mAudioQueue(aAudioQueue) {}
+
+ RefPtr<EndedPromise> OnEnded(TrackType aType) override;
+ media::TimeUnit GetEndTime(TrackType aType) const override;
+ media::TimeUnit GetPosition(TimeStamp* aTimeStamp = nullptr) override;
+ bool HasUnplayedFrames(TrackType aType) const override;
+ media::TimeUnit UnplayedDuration(TrackType aType) const override;
+ void DropAudioPacketsIfNeeded(const media::TimeUnit& aMediaPosition);
+
+ void SetVolume(double aVolume) override;
+ void SetStreamName(const nsAString& aStreamName) override;
+ void SetPlaybackRate(double aPlaybackRate) override;
+ void SetPreservesPitch(bool aPreservesPitch) override;
+ void SetPlaying(bool aPlaying) override;
+
+ double PlaybackRate() const override;
+
+ nsresult Start(const media::TimeUnit& aStartTime,
+ const MediaInfo& aInfo) override;
+ void Stop() override;
+ bool IsStarted() const override;
+ bool IsPlaying() const override;
+
+ const AudioDeviceInfo* AudioDevice() const override { return mAudioDevice; }
+
+ void Shutdown() override;
+
+ void GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) override;
+
+ void EnableTreatAudioUnderrunAsSilence(bool aEnabled) override;
+
+ private:
+ // The clock that was in use for the previous position query, allowing to
+ // detect clock switches.
+ enum class ClockSource {
+ // The clock comes from an underlying system-level audio stream.
+ AudioStream,
+ // The clock comes from the system clock.
+ SystemClock,
+ // The stream is paused, a constant time is reported.
+ Paused
+ } mLastClockSource = ClockSource::Paused;
+ bool IsMuted() const;
+ void OnMuted(bool aMuted);
+ virtual ~AudioSinkWrapper();
+
+ void AssertOwnerThread() const {
+ MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
+ }
+
+ // An AudioSink can be started synchronously from the MDSM thread, or
+ // asynchronously.
+ // In synchronous mode, the clock doesn't advance until the sink has been
+ // created, initialized and started. This is useful for the initial startup,
+ // and when seeking.
+ // In asynchronous mode, the clock will keep going forward (using the system
+ // clock) until the AudioSink is started, at which point the clock will use
+ // the AudioSink clock. This is used when unmuting a media element.
+ enum class AudioSinkStartPolicy { SYNC, ASYNC };
+ nsresult StartAudioSink(const media::TimeUnit& aStartTime,
+ AudioSinkStartPolicy aPolicy);
+
+ // Get the current media position using the system clock. This is used when
+ // the audio is muted, or when the media has no audio track. Otherwise, the
+ // media's position is based on the clock of the AudioStream.
+ media::TimeUnit GetSystemClockPosition(TimeStamp aNow) const;
+ bool CheckIfEnded() const;
+
+ void OnAudioEnded();
+
+ bool IsAudioSourceEnded(const MediaInfo& aInfo) const;
+
+ const RefPtr<AbstractThread> mOwnerThread;
+ UniquePtr<Creator> mCreator;
+ UniquePtr<AudioSink> mAudioSink;
+ // The output device this AudioSink is playing data to. The system's default
+ // device is used if this is null.
+ const RefPtr<AudioDeviceInfo> mAudioDevice;
+ // Will only exist when media has an audio track.
+ RefPtr<EndedPromise> mEndedPromise;
+ MozPromiseHolder<EndedPromise> mEndedPromiseHolder;
+
+ bool mIsStarted;
+ PlaybackParams mParams;
+
+ TimeStamp mPlayStartTime;
+ media::TimeUnit mPlayDuration;
+
+ bool mAudioEnded;
+ MozPromiseRequestHolder<EndedPromise> mAudioSinkEndedPromise;
+ MediaQueue<AudioData>& mAudioQueue;
+
+ // True if we'd like to treat underrun as silent frames. But that can only be
+ // applied in the special situation for seamless looping.
+ bool mTreatUnderrunAsSilence = false;
+};
+
+} // namespace mozilla
+
+#endif // AudioSinkWrapper_h_
diff --git a/dom/media/mediasink/DecodedStream.cpp b/dom/media/mediasink/DecodedStream.cpp
new file mode 100644
index 0000000000..0a488dcfdf
--- /dev/null
+++ b/dom/media/mediasink/DecodedStream.cpp
@@ -0,0 +1,1171 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "DecodedStream.h"
+
+#include "AudioDecoderInputTrack.h"
+#include "AudioSegment.h"
+#include "MediaData.h"
+#include "MediaDecoderStateMachine.h"
+#include "MediaQueue.h"
+#include "MediaTrackGraph.h"
+#include "MediaTrackListener.h"
+#include "SharedBuffer.h"
+#include "Tracing.h"
+#include "VideoSegment.h"
+#include "VideoUtils.h"
+#include "mozilla/AbstractThread.h"
+#include "mozilla/CheckedInt.h"
+#include "mozilla/ProfilerLabels.h"
+#include "mozilla/ProfilerMarkerTypes.h"
+#include "mozilla/SyncRunnable.h"
+#include "mozilla/gfx/Point.h"
+#include "mozilla/StaticPrefs_dom.h"
+#include "nsProxyRelease.h"
+
+namespace mozilla {
+
+using media::NullableTimeUnit;
+using media::TimeUnit;
+
+extern LazyLogModule gMediaDecoderLog;
+
+#define LOG_DS(type, fmt, ...) \
+ MOZ_LOG(gMediaDecoderLog, type, \
+ ("DecodedStream=%p " fmt, this, ##__VA_ARGS__))
+
+#define PLAYBACK_PROFILER_MARKER(markerString) \
+ PROFILER_MARKER_TEXT(FUNCTION_SIGNATURE, MEDIA_PLAYBACK, {}, markerString)
+
+/*
+ * A container class to make it easier to pass the playback info all the
+ * way to DecodedStreamGraphListener from DecodedStream.
+ */
+struct PlaybackInfoInit {
+ TimeUnit mStartTime;
+ MediaInfo mInfo;
+};
+
+class DecodedStreamGraphListener;
+
+class SourceVideoTrackListener : public MediaTrackListener {
+ public:
+ SourceVideoTrackListener(DecodedStreamGraphListener* aGraphListener,
+ SourceMediaTrack* aVideoTrack,
+ MediaTrack* aAudioTrack,
+ nsISerialEventTarget* aDecoderThread);
+
+ void NotifyOutput(MediaTrackGraph* aGraph,
+ TrackTime aCurrentTrackTime) override;
+ void NotifyEnded(MediaTrackGraph* aGraph) override;
+
+ private:
+ const RefPtr<DecodedStreamGraphListener> mGraphListener;
+ const RefPtr<SourceMediaTrack> mVideoTrack;
+ const RefPtr<const MediaTrack> mAudioTrack;
+ const RefPtr<nsISerialEventTarget> mDecoderThread;
+ TrackTime mLastVideoOutputTime = 0;
+};
+
+class DecodedStreamGraphListener {
+ NS_INLINE_DECL_THREADSAFE_REFCOUNTING(DecodedStreamGraphListener)
+ public:
+ DecodedStreamGraphListener(
+ nsISerialEventTarget* aDecoderThread, AudioDecoderInputTrack* aAudioTrack,
+ MozPromiseHolder<DecodedStream::EndedPromise>&& aAudioEndedHolder,
+ SourceMediaTrack* aVideoTrack,
+ MozPromiseHolder<DecodedStream::EndedPromise>&& aVideoEndedHolder)
+ : mDecoderThread(aDecoderThread),
+ mVideoTrackListener(
+ aVideoTrack ? MakeRefPtr<SourceVideoTrackListener>(
+ this, aVideoTrack, aAudioTrack, aDecoderThread)
+ : nullptr),
+ mAudioEndedHolder(std::move(aAudioEndedHolder)),
+ mVideoEndedHolder(std::move(aVideoEndedHolder)),
+ mAudioTrack(aAudioTrack),
+ mVideoTrack(aVideoTrack) {
+ MOZ_ASSERT(NS_IsMainThread());
+ MOZ_ASSERT(mDecoderThread);
+
+ if (mAudioTrack) {
+ mOnAudioOutput = mAudioTrack->OnOutput().Connect(
+ mDecoderThread,
+ [self = RefPtr<DecodedStreamGraphListener>(this)](TrackTime aTime) {
+ self->NotifyOutput(MediaSegment::AUDIO, aTime);
+ });
+ mOnAudioEnd = mAudioTrack->OnEnd().Connect(
+ mDecoderThread, [self = RefPtr<DecodedStreamGraphListener>(this)]() {
+ self->NotifyEnded(MediaSegment::AUDIO);
+ });
+ } else {
+ mAudioEnded = true;
+ mAudioEndedHolder.ResolveIfExists(true, __func__);
+ }
+
+ if (mVideoTrackListener) {
+ mVideoTrack->AddListener(mVideoTrackListener);
+ } else {
+ mVideoEnded = true;
+ mVideoEndedHolder.ResolveIfExists(true, __func__);
+ }
+ }
+
+ void Close() {
+ AssertOnDecoderThread();
+ if (mAudioTrack) {
+ mAudioTrack->Close();
+ }
+ if (mVideoTrack) {
+ mVideoTrack->End();
+ }
+ mAudioEndedHolder.ResolveIfExists(false, __func__);
+ mVideoEndedHolder.ResolveIfExists(false, __func__);
+ mOnAudioOutput.DisconnectIfExists();
+ mOnAudioEnd.DisconnectIfExists();
+ }
+
+ void NotifyOutput(MediaSegment::Type aType, TrackTime aCurrentTrackTime) {
+ AssertOnDecoderThread();
+ if (aType == MediaSegment::AUDIO) {
+ mAudioOutputFrames = aCurrentTrackTime;
+ } else if (aType == MediaSegment::VIDEO) {
+ if (aCurrentTrackTime >= mVideoEndTime) {
+ mVideoTrack->End();
+ }
+ } else {
+ MOZ_CRASH("Unexpected track type");
+ }
+
+ MOZ_ASSERT_IF(aType == MediaSegment::AUDIO, !mAudioEnded);
+ MOZ_ASSERT_IF(aType == MediaSegment::VIDEO, !mVideoEnded);
+ // This situation would happen when playing audio in >1x playback rate,
+ // because the audio output clock isn't align the graph time and would go
+ // forward faster. Eg. playback rate=2, when the graph time passes 10s, the
+ // audio clock time actually already goes forward 20s. After audio track
+ // ended, video track would tirgger the clock, but the video time still
+ // follows the graph time, which is smaller than the preivous audio clock
+ // time and should be ignored.
+ if (aCurrentTrackTime <= mLastOutputTime) {
+ MOZ_ASSERT(aType == MediaSegment::VIDEO);
+ return;
+ }
+ MOZ_ASSERT(aCurrentTrackTime > mLastOutputTime);
+ mLastOutputTime = aCurrentTrackTime;
+
+ // Only when audio track doesn't exists or has reached the end, video
+ // track should drive the clock.
+ MOZ_ASSERT_IF(aType == MediaSegment::VIDEO, mAudioEnded);
+ const MediaTrack* track = aType == MediaSegment::VIDEO
+ ? static_cast<MediaTrack*>(mVideoTrack)
+ : static_cast<MediaTrack*>(mAudioTrack);
+ mOnOutput.Notify(track->TrackTimeToMicroseconds(aCurrentTrackTime));
+ }
+
+ void NotifyEnded(MediaSegment::Type aType) {
+ AssertOnDecoderThread();
+ if (aType == MediaSegment::AUDIO) {
+ MOZ_ASSERT(!mAudioEnded);
+ mAudioEnded = true;
+ mAudioEndedHolder.ResolveIfExists(true, __func__);
+ } else if (aType == MediaSegment::VIDEO) {
+ MOZ_ASSERT(!mVideoEnded);
+ mVideoEnded = true;
+ mVideoEndedHolder.ResolveIfExists(true, __func__);
+ } else {
+ MOZ_CRASH("Unexpected track type");
+ }
+ }
+
+ /**
+ * Tell the graph listener to end the track sourced by the given track after
+ * it has seen at least aEnd worth of output reported as processed by the
+ * graph.
+ *
+ * A TrackTime of TRACK_TIME_MAX indicates that the track has no end and is
+ * the default.
+ *
+ * This method of ending tracks is needed because the MediaTrackGraph
+ * processes ended tracks (through SourceMediaTrack::EndTrack) at the
+ * beginning of an iteration, but waits until the end of the iteration to
+ * process any ControlMessages. When such a ControlMessage is a listener that
+ * is to be added to a track that has ended in its very first iteration, the
+ * track ends before the listener tracking this ending is added. This can lead
+ * to a MediaStreamTrack ending on main thread (it uses another listener)
+ * before the listeners to render the track get added, potentially meaning a
+ * media element doesn't progress before reaching the end although data was
+ * available.
+ */
+ void EndVideoTrackAt(MediaTrack* aTrack, TrackTime aEnd) {
+ AssertOnDecoderThread();
+ MOZ_DIAGNOSTIC_ASSERT(aTrack == mVideoTrack);
+ mVideoEndTime = aEnd;
+ }
+
+ void Forget() {
+ MOZ_ASSERT(NS_IsMainThread());
+ if (mVideoTrackListener && !mVideoTrack->IsDestroyed()) {
+ mVideoTrack->RemoveListener(mVideoTrackListener);
+ }
+ mVideoTrackListener = nullptr;
+ }
+
+ TrackTime GetAudioFramesPlayed() {
+ AssertOnDecoderThread();
+ return mAudioOutputFrames;
+ }
+
+ MediaEventSource<int64_t>& OnOutput() { return mOnOutput; }
+
+ private:
+ ~DecodedStreamGraphListener() {
+ MOZ_ASSERT(mAudioEndedHolder.IsEmpty());
+ MOZ_ASSERT(mVideoEndedHolder.IsEmpty());
+ }
+
+ inline void AssertOnDecoderThread() const {
+ MOZ_ASSERT(mDecoderThread->IsOnCurrentThread());
+ }
+
+ const RefPtr<nsISerialEventTarget> mDecoderThread;
+
+ // Accessible on any thread, but only notify on the decoder thread.
+ MediaEventProducer<int64_t> mOnOutput;
+
+ RefPtr<SourceVideoTrackListener> mVideoTrackListener;
+
+ // These can be resolved on the main thread on creation if there is no
+ // corresponding track, otherwise they are resolved on the decoder thread.
+ MozPromiseHolder<DecodedStream::EndedPromise> mAudioEndedHolder;
+ MozPromiseHolder<DecodedStream::EndedPromise> mVideoEndedHolder;
+
+ // Decoder thread only.
+ TrackTime mAudioOutputFrames = 0;
+ TrackTime mLastOutputTime = 0;
+ bool mAudioEnded = false;
+ bool mVideoEnded = false;
+
+ // Any thread.
+ const RefPtr<AudioDecoderInputTrack> mAudioTrack;
+ const RefPtr<SourceMediaTrack> mVideoTrack;
+ MediaEventListener mOnAudioOutput;
+ MediaEventListener mOnAudioEnd;
+ Atomic<TrackTime> mVideoEndTime{TRACK_TIME_MAX};
+};
+
+SourceVideoTrackListener::SourceVideoTrackListener(
+ DecodedStreamGraphListener* aGraphListener, SourceMediaTrack* aVideoTrack,
+ MediaTrack* aAudioTrack, nsISerialEventTarget* aDecoderThread)
+ : mGraphListener(aGraphListener),
+ mVideoTrack(aVideoTrack),
+ mAudioTrack(aAudioTrack),
+ mDecoderThread(aDecoderThread) {}
+
+void SourceVideoTrackListener::NotifyOutput(MediaTrackGraph* aGraph,
+ TrackTime aCurrentTrackTime) {
+ aGraph->AssertOnGraphThreadOrNotRunning();
+ if (mAudioTrack && !mAudioTrack->Ended()) {
+ // Only audio playout drives the clock forward, if present and live.
+ return;
+ }
+ // The graph can iterate without time advancing, but the invariant is that
+ // time can never go backwards.
+ if (aCurrentTrackTime <= mLastVideoOutputTime) {
+ MOZ_ASSERT(aCurrentTrackTime == mLastVideoOutputTime);
+ return;
+ }
+ mLastVideoOutputTime = aCurrentTrackTime;
+ mDecoderThread->Dispatch(NS_NewRunnableFunction(
+ "SourceVideoTrackListener::NotifyOutput",
+ [self = RefPtr<SourceVideoTrackListener>(this), aCurrentTrackTime]() {
+ self->mGraphListener->NotifyOutput(MediaSegment::VIDEO,
+ aCurrentTrackTime);
+ }));
+}
+
+void SourceVideoTrackListener::NotifyEnded(MediaTrackGraph* aGraph) {
+ aGraph->AssertOnGraphThreadOrNotRunning();
+ mDecoderThread->Dispatch(NS_NewRunnableFunction(
+ "SourceVideoTrackListener::NotifyEnded",
+ [self = RefPtr<SourceVideoTrackListener>(this)]() {
+ self->mGraphListener->NotifyEnded(MediaSegment::VIDEO);
+ }));
+}
+
+/**
+ * All MediaStream-related data is protected by the decoder's monitor. We have
+ * at most one DecodedStreamData per MediaDecoder. XXX Its tracks are used as
+ * inputs for all output tracks created by OutputStreamManager after calls to
+ * captureStream/UntilEnded. Seeking creates new source tracks, as does
+ * replaying after the input as ended. In the latter case, the new sources are
+ * not connected to tracks created by captureStreamUntilEnded.
+ */
+class DecodedStreamData final {
+ public:
+ DecodedStreamData(
+ PlaybackInfoInit&& aInit, MediaTrackGraph* aGraph,
+ RefPtr<ProcessedMediaTrack> aAudioOutputTrack,
+ RefPtr<ProcessedMediaTrack> aVideoOutputTrack,
+ MozPromiseHolder<DecodedStream::EndedPromise>&& aAudioEndedPromise,
+ MozPromiseHolder<DecodedStream::EndedPromise>&& aVideoEndedPromise,
+ float aPlaybackRate, float aVolume, bool aPreservesPitch,
+ nsISerialEventTarget* aDecoderThread);
+ ~DecodedStreamData();
+ MediaEventSource<int64_t>& OnOutput();
+ // This is used to mark track as closed and should be called before Forget().
+ // Decoder thread only.
+ void Close();
+ // After calling this function, the DecodedStreamData would be destroyed.
+ // Main thread only.
+ void Forget();
+ void GetDebugInfo(dom::DecodedStreamDataDebugInfo& aInfo);
+
+ void WriteVideoToSegment(layers::Image* aImage, const TimeUnit& aStart,
+ const TimeUnit& aEnd,
+ const gfx::IntSize& aIntrinsicSize,
+ const TimeStamp& aTimeStamp, VideoSegment* aOutput,
+ const PrincipalHandle& aPrincipalHandle,
+ double aPlaybackRate);
+
+ /* The following group of fields are protected by the decoder's monitor
+ * and can be read or written on any thread.
+ */
+ // Count of audio frames written to the track
+ int64_t mAudioFramesWritten;
+ // Count of video frames written to the track in the track's rate
+ TrackTime mVideoTrackWritten;
+ // mNextAudioTime is the end timestamp for the last packet sent to the track.
+ // Therefore audio packets starting at or after this time need to be copied
+ // to the output track.
+ TimeUnit mNextAudioTime;
+ // mLastVideoStartTime is the start timestamp for the last packet sent to the
+ // track. Therefore video packets starting after this time need to be copied
+ // to the output track.
+ NullableTimeUnit mLastVideoStartTime;
+ // mLastVideoEndTime is the end timestamp for the last packet sent to the
+ // track. It is used to adjust durations of chunks sent to the output track
+ // when there are overlaps in VideoData.
+ NullableTimeUnit mLastVideoEndTime;
+ // The timestamp of the last frame, so we can ensure time never goes
+ // backwards.
+ TimeStamp mLastVideoTimeStamp;
+ // The last video image sent to the track. Useful if we need to replicate
+ // the image.
+ RefPtr<layers::Image> mLastVideoImage;
+ gfx::IntSize mLastVideoImageDisplaySize;
+ bool mHaveSentFinishAudio;
+ bool mHaveSentFinishVideo;
+
+ const RefPtr<AudioDecoderInputTrack> mAudioTrack;
+ const RefPtr<SourceMediaTrack> mVideoTrack;
+ const RefPtr<ProcessedMediaTrack> mAudioOutputTrack;
+ const RefPtr<ProcessedMediaTrack> mVideoOutputTrack;
+ const RefPtr<MediaInputPort> mAudioPort;
+ const RefPtr<MediaInputPort> mVideoPort;
+ const RefPtr<DecodedStream::EndedPromise> mAudioEndedPromise;
+ const RefPtr<DecodedStream::EndedPromise> mVideoEndedPromise;
+ const RefPtr<DecodedStreamGraphListener> mListener;
+};
+
+DecodedStreamData::DecodedStreamData(
+ PlaybackInfoInit&& aInit, MediaTrackGraph* aGraph,
+ RefPtr<ProcessedMediaTrack> aAudioOutputTrack,
+ RefPtr<ProcessedMediaTrack> aVideoOutputTrack,
+ MozPromiseHolder<DecodedStream::EndedPromise>&& aAudioEndedPromise,
+ MozPromiseHolder<DecodedStream::EndedPromise>&& aVideoEndedPromise,
+ float aPlaybackRate, float aVolume, bool aPreservesPitch,
+ nsISerialEventTarget* aDecoderThread)
+ : mAudioFramesWritten(0),
+ mVideoTrackWritten(0),
+ mNextAudioTime(aInit.mStartTime),
+ mHaveSentFinishAudio(false),
+ mHaveSentFinishVideo(false),
+ mAudioTrack(aInit.mInfo.HasAudio()
+ ? AudioDecoderInputTrack::Create(
+ aGraph, aDecoderThread, aInit.mInfo.mAudio,
+ aPlaybackRate, aVolume, aPreservesPitch)
+ : nullptr),
+ mVideoTrack(aInit.mInfo.HasVideo()
+ ? aGraph->CreateSourceTrack(MediaSegment::VIDEO)
+ : nullptr),
+ mAudioOutputTrack(std::move(aAudioOutputTrack)),
+ mVideoOutputTrack(std::move(aVideoOutputTrack)),
+ mAudioPort((mAudioOutputTrack && mAudioTrack)
+ ? mAudioOutputTrack->AllocateInputPort(mAudioTrack)
+ : nullptr),
+ mVideoPort((mVideoOutputTrack && mVideoTrack)
+ ? mVideoOutputTrack->AllocateInputPort(mVideoTrack)
+ : nullptr),
+ mAudioEndedPromise(aAudioEndedPromise.Ensure(__func__)),
+ mVideoEndedPromise(aVideoEndedPromise.Ensure(__func__)),
+ // DecodedStreamGraphListener will resolve these promises.
+ mListener(MakeRefPtr<DecodedStreamGraphListener>(
+ aDecoderThread, mAudioTrack, std::move(aAudioEndedPromise),
+ mVideoTrack, std::move(aVideoEndedPromise))) {
+ MOZ_ASSERT(NS_IsMainThread());
+}
+
+DecodedStreamData::~DecodedStreamData() {
+ MOZ_ASSERT(NS_IsMainThread());
+ if (mAudioTrack) {
+ mAudioTrack->Destroy();
+ }
+ if (mVideoTrack) {
+ mVideoTrack->Destroy();
+ }
+ if (mAudioPort) {
+ mAudioPort->Destroy();
+ }
+ if (mVideoPort) {
+ mVideoPort->Destroy();
+ }
+}
+
+MediaEventSource<int64_t>& DecodedStreamData::OnOutput() {
+ return mListener->OnOutput();
+}
+
+void DecodedStreamData::Close() { mListener->Close(); }
+
+void DecodedStreamData::Forget() { mListener->Forget(); }
+
+void DecodedStreamData::GetDebugInfo(dom::DecodedStreamDataDebugInfo& aInfo) {
+ CopyUTF8toUTF16(nsPrintfCString("%p", this), aInfo.mInstance);
+ aInfo.mAudioFramesWritten = mAudioFramesWritten;
+ aInfo.mStreamAudioWritten = mListener->GetAudioFramesPlayed();
+ aInfo.mNextAudioTime = mNextAudioTime.ToMicroseconds();
+ aInfo.mLastVideoStartTime =
+ mLastVideoStartTime.valueOr(TimeUnit::FromMicroseconds(-1))
+ .ToMicroseconds();
+ aInfo.mLastVideoEndTime =
+ mLastVideoEndTime.valueOr(TimeUnit::FromMicroseconds(-1))
+ .ToMicroseconds();
+ aInfo.mHaveSentFinishAudio = mHaveSentFinishAudio;
+ aInfo.mHaveSentFinishVideo = mHaveSentFinishVideo;
+}
+
+DecodedStream::DecodedStream(
+ MediaDecoderStateMachine* aStateMachine,
+ nsMainThreadPtrHandle<SharedDummyTrack> aDummyTrack,
+ CopyableTArray<RefPtr<ProcessedMediaTrack>> aOutputTracks, double aVolume,
+ double aPlaybackRate, bool aPreservesPitch,
+ MediaQueue<AudioData>& aAudioQueue, MediaQueue<VideoData>& aVideoQueue,
+ RefPtr<AudioDeviceInfo> aAudioDevice)
+ : mOwnerThread(aStateMachine->OwnerThread()),
+ mDummyTrack(std::move(aDummyTrack)),
+ mWatchManager(this, mOwnerThread),
+ mPlaying(false, "DecodedStream::mPlaying"),
+ mPrincipalHandle(aStateMachine->OwnerThread(), PRINCIPAL_HANDLE_NONE,
+ "DecodedStream::mPrincipalHandle (Mirror)"),
+ mCanonicalOutputPrincipal(aStateMachine->CanonicalOutputPrincipal()),
+ mOutputTracks(std::move(aOutputTracks)),
+ mVolume(aVolume),
+ mPlaybackRate(aPlaybackRate),
+ mPreservesPitch(aPreservesPitch),
+ mAudioQueue(aAudioQueue),
+ mVideoQueue(aVideoQueue),
+ mAudioDevice(std::move(aAudioDevice)) {}
+
+DecodedStream::~DecodedStream() {
+ MOZ_ASSERT(mStartTime.isNothing(), "playback should've ended.");
+}
+
+RefPtr<DecodedStream::EndedPromise> DecodedStream::OnEnded(TrackType aType) {
+ AssertOwnerThread();
+ MOZ_ASSERT(mStartTime.isSome());
+
+ if (aType == TrackInfo::kAudioTrack && mInfo.HasAudio()) {
+ return mAudioEndedPromise;
+ }
+ if (aType == TrackInfo::kVideoTrack && mInfo.HasVideo()) {
+ return mVideoEndedPromise;
+ }
+ return nullptr;
+}
+
+nsresult DecodedStream::Start(const TimeUnit& aStartTime,
+ const MediaInfo& aInfo) {
+ AssertOwnerThread();
+ MOZ_ASSERT(mStartTime.isNothing(), "playback already started.");
+
+ AUTO_PROFILER_LABEL(FUNCTION_SIGNATURE, MEDIA_PLAYBACK);
+ if (profiler_thread_is_being_profiled_for_markers()) {
+ nsPrintfCString markerString("StartTime=%" PRId64,
+ aStartTime.ToMicroseconds());
+ PLAYBACK_PROFILER_MARKER(markerString);
+ }
+ LOG_DS(LogLevel::Debug, "Start() mStartTime=%" PRId64,
+ aStartTime.ToMicroseconds());
+
+ mStartTime.emplace(aStartTime);
+ mLastOutputTime = TimeUnit::Zero();
+ mInfo = aInfo;
+ mPlaying = true;
+ mPrincipalHandle.Connect(mCanonicalOutputPrincipal);
+ mWatchManager.Watch(mPlaying, &DecodedStream::PlayingChanged);
+ mAudibilityMonitor.emplace(
+ mInfo.mAudio.mRate,
+ StaticPrefs::dom_media_silence_duration_for_audibility());
+ ConnectListener();
+
+ class R : public Runnable {
+ public:
+ R(PlaybackInfoInit&& aInit,
+ nsMainThreadPtrHandle<SharedDummyTrack> aDummyTrack,
+ nsTArray<RefPtr<ProcessedMediaTrack>> aOutputTracks,
+ MozPromiseHolder<MediaSink::EndedPromise>&& aAudioEndedPromise,
+ MozPromiseHolder<MediaSink::EndedPromise>&& aVideoEndedPromise,
+ float aPlaybackRate, float aVolume, bool aPreservesPitch,
+ nsISerialEventTarget* aDecoderThread)
+ : Runnable("CreateDecodedStreamData"),
+ mInit(std::move(aInit)),
+ mDummyTrack(std::move(aDummyTrack)),
+ mOutputTracks(std::move(aOutputTracks)),
+ mAudioEndedPromise(std::move(aAudioEndedPromise)),
+ mVideoEndedPromise(std::move(aVideoEndedPromise)),
+ mPlaybackRate(aPlaybackRate),
+ mVolume(aVolume),
+ mPreservesPitch(aPreservesPitch),
+ mDecoderThread(aDecoderThread) {}
+ NS_IMETHOD Run() override {
+ MOZ_ASSERT(NS_IsMainThread());
+ RefPtr<ProcessedMediaTrack> audioOutputTrack;
+ RefPtr<ProcessedMediaTrack> videoOutputTrack;
+ for (const auto& track : mOutputTracks) {
+ if (track->mType == MediaSegment::AUDIO) {
+ MOZ_DIAGNOSTIC_ASSERT(
+ !audioOutputTrack,
+ "We only support capturing to one output track per kind");
+ audioOutputTrack = track;
+ } else if (track->mType == MediaSegment::VIDEO) {
+ MOZ_DIAGNOSTIC_ASSERT(
+ !videoOutputTrack,
+ "We only support capturing to one output track per kind");
+ videoOutputTrack = track;
+ } else {
+ MOZ_CRASH("Unknown media type");
+ }
+ }
+ if (!mDummyTrack) {
+ // No dummy track - no graph. This could be intentional as the owning
+ // media element needs access to the tracks on main thread to set up
+ // forwarding of them before playback starts. MDSM will re-create
+ // DecodedStream once a dummy track is available. This effectively halts
+ // playback for this DecodedStream.
+ return NS_OK;
+ }
+ if ((audioOutputTrack && audioOutputTrack->IsDestroyed()) ||
+ (videoOutputTrack && videoOutputTrack->IsDestroyed())) {
+ // A track has been destroyed and we'll soon get re-created with a
+ // proper one. This effectively halts playback for this DecodedStream.
+ return NS_OK;
+ }
+ mData = MakeUnique<DecodedStreamData>(
+ std::move(mInit), mDummyTrack->mTrack->Graph(),
+ std::move(audioOutputTrack), std::move(videoOutputTrack),
+ std::move(mAudioEndedPromise), std::move(mVideoEndedPromise),
+ mPlaybackRate, mVolume, mPreservesPitch, mDecoderThread);
+ return NS_OK;
+ }
+ UniquePtr<DecodedStreamData> ReleaseData() { return std::move(mData); }
+
+ private:
+ PlaybackInfoInit mInit;
+ nsMainThreadPtrHandle<SharedDummyTrack> mDummyTrack;
+ const nsTArray<RefPtr<ProcessedMediaTrack>> mOutputTracks;
+ MozPromiseHolder<MediaSink::EndedPromise> mAudioEndedPromise;
+ MozPromiseHolder<MediaSink::EndedPromise> mVideoEndedPromise;
+ UniquePtr<DecodedStreamData> mData;
+ const float mPlaybackRate;
+ const float mVolume;
+ const bool mPreservesPitch;
+ const RefPtr<nsISerialEventTarget> mDecoderThread;
+ };
+
+ MozPromiseHolder<DecodedStream::EndedPromise> audioEndedHolder;
+ MozPromiseHolder<DecodedStream::EndedPromise> videoEndedHolder;
+ PlaybackInfoInit init{aStartTime, aInfo};
+ nsCOMPtr<nsIRunnable> r =
+ new R(std::move(init), mDummyTrack, mOutputTracks.Clone(),
+ std::move(audioEndedHolder), std::move(videoEndedHolder),
+ static_cast<float>(mPlaybackRate), static_cast<float>(mVolume),
+ mPreservesPitch, mOwnerThread);
+ SyncRunnable::DispatchToThread(GetMainThreadSerialEventTarget(), r);
+ mData = static_cast<R*>(r.get())->ReleaseData();
+
+ if (mData) {
+ mAudioEndedPromise = mData->mAudioEndedPromise;
+ mVideoEndedPromise = mData->mVideoEndedPromise;
+ mOutputListener = mData->OnOutput().Connect(mOwnerThread, this,
+ &DecodedStream::NotifyOutput);
+ SendData();
+ }
+ return NS_OK;
+}
+
+void DecodedStream::Stop() {
+ AssertOwnerThread();
+ MOZ_ASSERT(mStartTime.isSome(), "playback not started.");
+
+ TRACE("DecodedStream::Stop");
+ LOG_DS(LogLevel::Debug, "Stop()");
+
+ DisconnectListener();
+ ResetVideo(mPrincipalHandle);
+ ResetAudio();
+ mStartTime.reset();
+ mAudioEndedPromise = nullptr;
+ mVideoEndedPromise = nullptr;
+
+ // Clear mData immediately when this playback session ends so we won't
+ // send data to the wrong track in SendData() in next playback session.
+ DestroyData(std::move(mData));
+
+ mPrincipalHandle.DisconnectIfConnected();
+ mWatchManager.Unwatch(mPlaying, &DecodedStream::PlayingChanged);
+ mAudibilityMonitor.reset();
+}
+
+bool DecodedStream::IsStarted() const {
+ AssertOwnerThread();
+ return mStartTime.isSome();
+}
+
+bool DecodedStream::IsPlaying() const {
+ AssertOwnerThread();
+ return IsStarted() && mPlaying;
+}
+
+void DecodedStream::Shutdown() {
+ AssertOwnerThread();
+ mPrincipalHandle.DisconnectIfConnected();
+ mWatchManager.Shutdown();
+}
+
+void DecodedStream::DestroyData(UniquePtr<DecodedStreamData>&& aData) {
+ AssertOwnerThread();
+
+ if (!aData) {
+ return;
+ }
+
+ TRACE("DecodedStream::DestroyData");
+ mOutputListener.Disconnect();
+
+ aData->Close();
+ NS_DispatchToMainThread(
+ NS_NewRunnableFunction("DecodedStream::DestroyData",
+ [data = std::move(aData)]() { data->Forget(); }));
+}
+
+void DecodedStream::SetPlaying(bool aPlaying) {
+ AssertOwnerThread();
+
+ // Resume/pause matters only when playback started.
+ if (mStartTime.isNothing()) {
+ return;
+ }
+
+ if (profiler_thread_is_being_profiled_for_markers()) {
+ nsPrintfCString markerString("Playing=%s", aPlaying ? "true" : "false");
+ PLAYBACK_PROFILER_MARKER(markerString);
+ }
+ LOG_DS(LogLevel::Debug, "playing (%d) -> (%d)", mPlaying.Ref(), aPlaying);
+ mPlaying = aPlaying;
+}
+
+void DecodedStream::SetVolume(double aVolume) {
+ AssertOwnerThread();
+ if (profiler_thread_is_being_profiled_for_markers()) {
+ nsPrintfCString markerString("Volume=%f", aVolume);
+ PLAYBACK_PROFILER_MARKER(markerString);
+ }
+ if (mVolume == aVolume) {
+ return;
+ }
+ mVolume = aVolume;
+ if (mData && mData->mAudioTrack) {
+ mData->mAudioTrack->SetVolume(static_cast<float>(aVolume));
+ }
+}
+
+void DecodedStream::SetPlaybackRate(double aPlaybackRate) {
+ AssertOwnerThread();
+ if (profiler_thread_is_being_profiled_for_markers()) {
+ nsPrintfCString markerString("PlaybackRate=%f", aPlaybackRate);
+ PLAYBACK_PROFILER_MARKER(markerString);
+ }
+ if (mPlaybackRate == aPlaybackRate) {
+ return;
+ }
+ mPlaybackRate = aPlaybackRate;
+ if (mData && mData->mAudioTrack) {
+ mData->mAudioTrack->SetPlaybackRate(static_cast<float>(aPlaybackRate));
+ }
+}
+
+void DecodedStream::SetPreservesPitch(bool aPreservesPitch) {
+ AssertOwnerThread();
+ if (profiler_thread_is_being_profiled_for_markers()) {
+ nsPrintfCString markerString("PreservesPitch=%s",
+ aPreservesPitch ? "true" : "false");
+ PLAYBACK_PROFILER_MARKER(markerString);
+ }
+ if (mPreservesPitch == aPreservesPitch) {
+ return;
+ }
+ mPreservesPitch = aPreservesPitch;
+ if (mData && mData->mAudioTrack) {
+ mData->mAudioTrack->SetPreservesPitch(aPreservesPitch);
+ }
+}
+
+double DecodedStream::PlaybackRate() const {
+ AssertOwnerThread();
+ return mPlaybackRate;
+}
+
+void DecodedStream::SendAudio(const PrincipalHandle& aPrincipalHandle) {
+ AssertOwnerThread();
+
+ if (!mInfo.HasAudio()) {
+ return;
+ }
+
+ if (mData->mHaveSentFinishAudio) {
+ return;
+ }
+
+ TRACE("DecodedStream::SendAudio");
+ // It's OK to hold references to the AudioData because AudioData
+ // is ref-counted.
+ AutoTArray<RefPtr<AudioData>, 10> audio;
+ mAudioQueue.GetElementsAfter(mData->mNextAudioTime, &audio);
+
+ // This will happen everytime when the media sink switches from `AudioSink` to
+ // `DecodedStream`. If we don't insert the silence then the A/V will be out of
+ // sync.
+ RefPtr<AudioData> nextAudio = audio.IsEmpty() ? nullptr : audio[0];
+ if (RefPtr<AudioData> silence = CreateSilenceDataIfGapExists(nextAudio)) {
+ LOG_DS(LogLevel::Verbose, "Detect a gap in audio, insert silence=%u",
+ silence->Frames());
+ audio.InsertElementAt(0, silence);
+ }
+
+ // Append data which hasn't been sent to audio track before.
+ mData->mAudioTrack->AppendData(audio, aPrincipalHandle);
+ for (uint32_t i = 0; i < audio.Length(); ++i) {
+ CheckIsDataAudible(audio[i]);
+ mData->mNextAudioTime = audio[i]->GetEndTime();
+ mData->mAudioFramesWritten += audio[i]->Frames();
+ }
+
+ if (mAudioQueue.IsFinished() && !mData->mHaveSentFinishAudio) {
+ mData->mAudioTrack->NotifyEndOfStream();
+ mData->mHaveSentFinishAudio = true;
+ }
+}
+
+already_AddRefed<AudioData> DecodedStream::CreateSilenceDataIfGapExists(
+ RefPtr<AudioData>& aNextAudio) {
+ AssertOwnerThread();
+ if (!aNextAudio) {
+ return nullptr;
+ }
+ CheckedInt64 audioWrittenOffset =
+ mData->mAudioFramesWritten +
+ TimeUnitToFrames(*mStartTime, aNextAudio->mRate);
+ CheckedInt64 frameOffset =
+ TimeUnitToFrames(aNextAudio->mTime, aNextAudio->mRate);
+ if (audioWrittenOffset.value() >= frameOffset.value()) {
+ return nullptr;
+ }
+ // We've written less audio than our frame offset, return a silence data so we
+ // have enough audio to be at the correct offset for our current frames.
+ CheckedInt64 missingFrames = frameOffset - audioWrittenOffset;
+ AlignedAudioBuffer silenceBuffer(missingFrames.value() *
+ aNextAudio->mChannels);
+ if (!silenceBuffer) {
+ NS_WARNING("OOM in DecodedStream::CreateSilenceDataIfGapExists");
+ return nullptr;
+ }
+ auto duration = media::TimeUnit(missingFrames.value(), aNextAudio->mRate);
+ if (!duration.IsValid()) {
+ NS_WARNING("Int overflow in DecodedStream::CreateSilenceDataIfGapExists");
+ return nullptr;
+ }
+ RefPtr<AudioData> silenceData = new AudioData(
+ aNextAudio->mOffset, aNextAudio->mTime, std::move(silenceBuffer),
+ aNextAudio->mChannels, aNextAudio->mRate);
+ MOZ_DIAGNOSTIC_ASSERT(duration == silenceData->mDuration, "must be equal");
+ return silenceData.forget();
+}
+
+void DecodedStream::CheckIsDataAudible(const AudioData* aData) {
+ MOZ_ASSERT(aData);
+
+ mAudibilityMonitor->Process(aData);
+ bool isAudible = mAudibilityMonitor->RecentlyAudible();
+
+ if (isAudible != mIsAudioDataAudible) {
+ mIsAudioDataAudible = isAudible;
+ mAudibleEvent.Notify(mIsAudioDataAudible);
+ }
+}
+
+void DecodedStreamData::WriteVideoToSegment(
+ layers::Image* aImage, const TimeUnit& aStart, const TimeUnit& aEnd,
+ const gfx::IntSize& aIntrinsicSize, const TimeStamp& aTimeStamp,
+ VideoSegment* aOutput, const PrincipalHandle& aPrincipalHandle,
+ double aPlaybackRate) {
+ RefPtr<layers::Image> image = aImage;
+ aOutput->AppendFrame(image.forget(), aIntrinsicSize, aPrincipalHandle, false,
+ aTimeStamp);
+ // Extend this so we get accurate durations for all frames.
+ // Because this track is pushed, we need durations so the graph can track
+ // when playout of the track has finished.
+ MOZ_ASSERT(aPlaybackRate > 0);
+ TrackTime start = aStart.ToTicksAtRate(mVideoTrack->mSampleRate);
+ TrackTime end = aEnd.ToTicksAtRate(mVideoTrack->mSampleRate);
+ aOutput->ExtendLastFrameBy(
+ static_cast<TrackTime>((float)(end - start) / aPlaybackRate));
+
+ mLastVideoStartTime = Some(aStart);
+ mLastVideoEndTime = Some(aEnd);
+ mLastVideoTimeStamp = aTimeStamp;
+}
+
+static bool ZeroDurationAtLastChunk(VideoSegment& aInput) {
+ // Get the last video frame's start time in VideoSegment aInput.
+ // If the start time is equal to the duration of aInput, means the last video
+ // frame's duration is zero.
+ TrackTime lastVideoStratTime;
+ aInput.GetLastFrame(&lastVideoStratTime);
+ return lastVideoStratTime == aInput.GetDuration();
+}
+
+void DecodedStream::ResetAudio() {
+ AssertOwnerThread();
+
+ if (!mData) {
+ return;
+ }
+
+ if (!mInfo.HasAudio()) {
+ return;
+ }
+
+ TRACE("DecodedStream::ResetAudio");
+ mData->mAudioTrack->ClearFutureData();
+ if (const RefPtr<AudioData>& v = mAudioQueue.PeekFront()) {
+ mData->mNextAudioTime = v->mTime;
+ mData->mHaveSentFinishAudio = false;
+ }
+}
+
+void DecodedStream::ResetVideo(const PrincipalHandle& aPrincipalHandle) {
+ AssertOwnerThread();
+
+ if (!mData) {
+ return;
+ }
+
+ if (!mInfo.HasVideo()) {
+ return;
+ }
+
+ TRACE("DecodedStream::ResetVideo");
+ TrackTime cleared = mData->mVideoTrack->ClearFutureData();
+ mData->mVideoTrackWritten -= cleared;
+ if (mData->mHaveSentFinishVideo && cleared > 0) {
+ mData->mHaveSentFinishVideo = false;
+ mData->mListener->EndVideoTrackAt(mData->mVideoTrack, TRACK_TIME_MAX);
+ }
+
+ VideoSegment resetter;
+ TimeStamp currentTime;
+ TimeUnit currentPosition = GetPosition(&currentTime);
+
+ // Giving direct consumers a frame (really *any* frame, so in this case:
+ // nullptr) at an earlier time than the previous, will signal to that consumer
+ // to discard any frames ahead in time of the new frame. To be honest, this is
+ // an ugly hack because the direct listeners of the MediaTrackGraph do not
+ // have an API that supports clearing the future frames. ImageContainer and
+ // VideoFrameContainer do though, and we will need to move to a similar API
+ // for video tracks as part of bug 1493618.
+ resetter.AppendFrame(nullptr, mData->mLastVideoImageDisplaySize,
+ aPrincipalHandle, false, currentTime);
+ mData->mVideoTrack->AppendData(&resetter);
+
+ // Consumer buffers have been reset. We now set the next time to the start
+ // time of the current frame, so that it can be displayed again on resuming.
+ if (RefPtr<VideoData> v = mVideoQueue.PeekFront()) {
+ mData->mLastVideoStartTime = Some(v->mTime - TimeUnit::FromMicroseconds(1));
+ mData->mLastVideoEndTime = Some(v->mTime);
+ } else {
+ // There was no current frame in the queue. We set the next time to the
+ // current time, so we at least don't resume starting in the future.
+ mData->mLastVideoStartTime =
+ Some(currentPosition - TimeUnit::FromMicroseconds(1));
+ mData->mLastVideoEndTime = Some(currentPosition);
+ }
+
+ mData->mLastVideoTimeStamp = currentTime;
+}
+
+void DecodedStream::SendVideo(const PrincipalHandle& aPrincipalHandle) {
+ AssertOwnerThread();
+
+ if (!mInfo.HasVideo()) {
+ return;
+ }
+
+ if (mData->mHaveSentFinishVideo) {
+ return;
+ }
+
+ TRACE("DecodedStream::SendVideo");
+ VideoSegment output;
+ AutoTArray<RefPtr<VideoData>, 10> video;
+
+ // It's OK to hold references to the VideoData because VideoData
+ // is ref-counted.
+ mVideoQueue.GetElementsAfter(
+ mData->mLastVideoStartTime.valueOr(mStartTime.ref()), &video);
+
+ TimeStamp currentTime;
+ TimeUnit currentPosition = GetPosition(&currentTime);
+
+ if (mData->mLastVideoTimeStamp.IsNull()) {
+ mData->mLastVideoTimeStamp = currentTime;
+ }
+
+ for (uint32_t i = 0; i < video.Length(); ++i) {
+ VideoData* v = video[i];
+ TimeUnit lastStart = mData->mLastVideoStartTime.valueOr(
+ mStartTime.ref() - TimeUnit::FromMicroseconds(1));
+ TimeUnit lastEnd = mData->mLastVideoEndTime.valueOr(mStartTime.ref());
+
+ if (lastEnd < v->mTime) {
+ // Write last video frame to catch up. mLastVideoImage can be null here
+ // which is fine, it just means there's no video.
+
+ // TODO: |mLastVideoImage| should come from the last image rendered
+ // by the state machine. This will avoid the black frame when capture
+ // happens in the middle of playback (especially in th middle of a
+ // video frame). E.g. if we have a video frame that is 30 sec long
+ // and capture happens at 15 sec, we'll have to append a black frame
+ // that is 15 sec long.
+ TimeStamp t =
+ std::max(mData->mLastVideoTimeStamp,
+ currentTime + (lastEnd - currentPosition).ToTimeDuration());
+ mData->WriteVideoToSegment(mData->mLastVideoImage, lastEnd, v->mTime,
+ mData->mLastVideoImageDisplaySize, t, &output,
+ aPrincipalHandle, mPlaybackRate);
+ lastEnd = v->mTime;
+ }
+
+ if (lastStart < v->mTime) {
+ // This frame starts after the last frame's start. Note that this could be
+ // before the last frame's end time for some videos. This only matters for
+ // the track's lifetime in the MTG, as rendering is based on timestamps,
+ // aka frame start times.
+ TimeStamp t =
+ std::max(mData->mLastVideoTimeStamp,
+ currentTime + (lastEnd - currentPosition).ToTimeDuration());
+ TimeUnit end = std::max(
+ v->GetEndTime(),
+ lastEnd + TimeUnit::FromMicroseconds(
+ mData->mVideoTrack->TrackTimeToMicroseconds(1) + 1));
+ mData->mLastVideoImage = v->mImage;
+ mData->mLastVideoImageDisplaySize = v->mDisplay;
+ mData->WriteVideoToSegment(v->mImage, lastEnd, end, v->mDisplay, t,
+ &output, aPrincipalHandle, mPlaybackRate);
+ }
+ }
+
+ // Check the output is not empty.
+ bool compensateEOS = false;
+ bool forceBlack = false;
+ if (output.GetLastFrame()) {
+ compensateEOS = ZeroDurationAtLastChunk(output);
+ }
+
+ if (output.GetDuration() > 0) {
+ mData->mVideoTrackWritten += mData->mVideoTrack->AppendData(&output);
+ }
+
+ if (mVideoQueue.IsFinished() && !mData->mHaveSentFinishVideo) {
+ if (!mData->mLastVideoImage) {
+ // We have video, but the video queue finished before we received any
+ // frame. We insert a black frame to progress any consuming
+ // HTMLMediaElement. This mirrors the behavior of VideoSink.
+
+ // Force a frame - can be null
+ compensateEOS = true;
+ // Force frame to be black
+ forceBlack = true;
+ // Override the frame's size (will be 0x0 otherwise)
+ mData->mLastVideoImageDisplaySize = mInfo.mVideo.mDisplay;
+ LOG_DS(LogLevel::Debug, "No mLastVideoImage");
+ }
+ if (compensateEOS) {
+ VideoSegment endSegment;
+ auto start = mData->mLastVideoEndTime.valueOr(mStartTime.ref());
+ mData->WriteVideoToSegment(
+ mData->mLastVideoImage, start, start,
+ mData->mLastVideoImageDisplaySize,
+ currentTime + (start - currentPosition).ToTimeDuration(), &endSegment,
+ aPrincipalHandle, mPlaybackRate);
+ // ForwardedInputTrack drops zero duration frames, even at the end of
+ // the track. Give the frame a minimum duration so that it is not
+ // dropped.
+ endSegment.ExtendLastFrameBy(1);
+ LOG_DS(LogLevel::Debug,
+ "compensateEOS: start %s, duration %" PRId64
+ ", mPlaybackRate %lf, sample rate %" PRId32,
+ start.ToString().get(), endSegment.GetDuration(), mPlaybackRate,
+ mData->mVideoTrack->mSampleRate);
+ MOZ_ASSERT(endSegment.GetDuration() > 0);
+ if (forceBlack) {
+ endSegment.ReplaceWithDisabled();
+ }
+ mData->mVideoTrackWritten += mData->mVideoTrack->AppendData(&endSegment);
+ }
+ mData->mListener->EndVideoTrackAt(mData->mVideoTrack,
+ mData->mVideoTrackWritten);
+ mData->mHaveSentFinishVideo = true;
+ }
+}
+
+void DecodedStream::SendData() {
+ AssertOwnerThread();
+
+ // Not yet created on the main thread. MDSM will try again later.
+ if (!mData) {
+ return;
+ }
+
+ if (!mPlaying) {
+ return;
+ }
+
+ LOG_DS(LogLevel::Verbose, "SendData()");
+ SendAudio(mPrincipalHandle);
+ SendVideo(mPrincipalHandle);
+}
+
+TimeUnit DecodedStream::GetEndTime(TrackType aType) const {
+ AssertOwnerThread();
+ TRACE("DecodedStream::GetEndTime");
+ if (aType == TrackInfo::kAudioTrack && mInfo.HasAudio() && mData) {
+ auto t = mStartTime.ref() +
+ media::TimeUnit(mData->mAudioFramesWritten, mInfo.mAudio.mRate);
+ if (t.IsValid()) {
+ return t;
+ }
+ } else if (aType == TrackInfo::kVideoTrack && mData) {
+ return mData->mLastVideoEndTime.valueOr(mStartTime.ref());
+ }
+ return TimeUnit::Zero();
+}
+
+TimeUnit DecodedStream::GetPosition(TimeStamp* aTimeStamp) {
+ AssertOwnerThread();
+ TRACE("DecodedStream::GetPosition");
+ // This is only called after MDSM starts playback. So mStartTime is
+ // guaranteed to be something.
+ MOZ_ASSERT(mStartTime.isSome());
+ if (aTimeStamp) {
+ *aTimeStamp = TimeStamp::Now();
+ }
+ return mStartTime.ref() + mLastOutputTime;
+}
+
+void DecodedStream::NotifyOutput(int64_t aTime) {
+ AssertOwnerThread();
+ TimeUnit time = TimeUnit::FromMicroseconds(aTime);
+ if (time == mLastOutputTime) {
+ return;
+ }
+ MOZ_ASSERT(mLastOutputTime < time);
+ mLastOutputTime = time;
+ auto currentTime = GetPosition();
+
+ if (profiler_thread_is_being_profiled_for_markers()) {
+ nsPrintfCString markerString("OutputTime=%" PRId64,
+ currentTime.ToMicroseconds());
+ PLAYBACK_PROFILER_MARKER(markerString);
+ }
+ LOG_DS(LogLevel::Verbose, "time is now %" PRId64,
+ currentTime.ToMicroseconds());
+
+ // Remove audio samples that have been played by MTG from the queue.
+ RefPtr<AudioData> a = mAudioQueue.PeekFront();
+ for (; a && a->GetEndTime() <= currentTime;) {
+ LOG_DS(LogLevel::Debug, "Dropping audio [%" PRId64 ",%" PRId64 "]",
+ a->mTime.ToMicroseconds(), a->GetEndTime().ToMicroseconds());
+ RefPtr<AudioData> releaseMe = mAudioQueue.PopFront();
+ a = mAudioQueue.PeekFront();
+ }
+}
+
+void DecodedStream::PlayingChanged() {
+ AssertOwnerThread();
+ TRACE("DecodedStream::PlayingChanged");
+
+ if (!mPlaying) {
+ // On seek or pause we discard future frames.
+ ResetVideo(mPrincipalHandle);
+ ResetAudio();
+ }
+}
+
+void DecodedStream::ConnectListener() {
+ AssertOwnerThread();
+
+ mAudioPushListener = mAudioQueue.PushEvent().Connect(
+ mOwnerThread, this, &DecodedStream::SendData);
+ mAudioFinishListener = mAudioQueue.FinishEvent().Connect(
+ mOwnerThread, this, &DecodedStream::SendData);
+ mVideoPushListener = mVideoQueue.PushEvent().Connect(
+ mOwnerThread, this, &DecodedStream::SendData);
+ mVideoFinishListener = mVideoQueue.FinishEvent().Connect(
+ mOwnerThread, this, &DecodedStream::SendData);
+ mWatchManager.Watch(mPlaying, &DecodedStream::SendData);
+}
+
+void DecodedStream::DisconnectListener() {
+ AssertOwnerThread();
+
+ mAudioPushListener.Disconnect();
+ mVideoPushListener.Disconnect();
+ mAudioFinishListener.Disconnect();
+ mVideoFinishListener.Disconnect();
+ mWatchManager.Unwatch(mPlaying, &DecodedStream::SendData);
+}
+
+void DecodedStream::GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) {
+ AssertOwnerThread();
+ int64_t startTime = mStartTime.isSome() ? mStartTime->ToMicroseconds() : -1;
+ aInfo.mDecodedStream.mInstance =
+ NS_ConvertUTF8toUTF16(nsPrintfCString("%p", this));
+ aInfo.mDecodedStream.mStartTime = startTime;
+ aInfo.mDecodedStream.mLastOutputTime = mLastOutputTime.ToMicroseconds();
+ aInfo.mDecodedStream.mPlaying = mPlaying.Ref();
+ auto lastAudio = mAudioQueue.PeekBack();
+ aInfo.mDecodedStream.mLastAudio =
+ lastAudio ? lastAudio->GetEndTime().ToMicroseconds() : -1;
+ aInfo.mDecodedStream.mAudioQueueFinished = mAudioQueue.IsFinished();
+ aInfo.mDecodedStream.mAudioQueueSize =
+ AssertedCast<int>(mAudioQueue.GetSize());
+ if (mData) {
+ mData->GetDebugInfo(aInfo.mDecodedStream.mData);
+ }
+}
+
+#undef LOG_DS
+
+} // namespace mozilla
diff --git a/dom/media/mediasink/DecodedStream.h b/dom/media/mediasink/DecodedStream.h
new file mode 100644
index 0000000000..4709ffeda6
--- /dev/null
+++ b/dom/media/mediasink/DecodedStream.h
@@ -0,0 +1,154 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef DecodedStream_h_
+#define DecodedStream_h_
+
+#include "AudibilityMonitor.h"
+#include "MediaEventSource.h"
+#include "MediaInfo.h"
+#include "MediaSegment.h"
+#include "MediaSink.h"
+
+#include "mozilla/AbstractThread.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/MozPromise.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/StateMirroring.h"
+#include "mozilla/UniquePtr.h"
+
+namespace mozilla {
+
+class DecodedStreamData;
+class MediaDecoderStateMachine;
+class AudioData;
+class VideoData;
+struct PlaybackInfoInit;
+class ProcessedMediaTrack;
+class TimeStamp;
+
+template <class T>
+class MediaQueue;
+
+class DecodedStream : public MediaSink {
+ public:
+ DecodedStream(MediaDecoderStateMachine* aStateMachine,
+ nsMainThreadPtrHandle<SharedDummyTrack> aDummyTrack,
+ CopyableTArray<RefPtr<ProcessedMediaTrack>> aOutputTracks,
+ double aVolume, double aPlaybackRate, bool aPreservesPitch,
+ MediaQueue<AudioData>& aAudioQueue,
+ MediaQueue<VideoData>& aVideoQueue,
+ RefPtr<AudioDeviceInfo> aAudioDevice);
+
+ RefPtr<EndedPromise> OnEnded(TrackType aType) override;
+ media::TimeUnit GetEndTime(TrackType aType) const override;
+ media::TimeUnit GetPosition(TimeStamp* aTimeStamp = nullptr) override;
+ bool HasUnplayedFrames(TrackType aType) const override {
+ // TODO: bug 1755026
+ return false;
+ }
+
+ media::TimeUnit UnplayedDuration(TrackType aType) const override {
+ // TODO: bug 1755026
+ return media::TimeUnit::Zero();
+ }
+
+ void SetVolume(double aVolume) override;
+ void SetPlaybackRate(double aPlaybackRate) override;
+ void SetPreservesPitch(bool aPreservesPitch) override;
+ void SetPlaying(bool aPlaying) override;
+
+ double PlaybackRate() const override;
+
+ nsresult Start(const media::TimeUnit& aStartTime,
+ const MediaInfo& aInfo) override;
+ void Stop() override;
+ bool IsStarted() const override;
+ bool IsPlaying() const override;
+ void Shutdown() override;
+ void GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) override;
+ const AudioDeviceInfo* AudioDevice() const override { return mAudioDevice; }
+
+ MediaEventSource<bool>& AudibleEvent() { return mAudibleEvent; }
+
+ protected:
+ virtual ~DecodedStream();
+
+ private:
+ void DestroyData(UniquePtr<DecodedStreamData>&& aData);
+ void SendAudio(const PrincipalHandle& aPrincipalHandle);
+ void SendVideo(const PrincipalHandle& aPrincipalHandle);
+ void ResetAudio();
+ void ResetVideo(const PrincipalHandle& aPrincipalHandle);
+ void SendData();
+ void NotifyOutput(int64_t aTime);
+ void CheckIsDataAudible(const AudioData* aData);
+
+ void AssertOwnerThread() const {
+ MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
+ }
+
+ void PlayingChanged();
+
+ void ConnectListener();
+ void DisconnectListener();
+
+ // Give the audio that is going to be appended next as an input, if there is
+ // a gap between audio's time and the frames that we've written, then return
+ // a silence data that has same amount of frames and can be used to fill the
+ // gap. If no gap exists, return nullptr.
+ already_AddRefed<AudioData> CreateSilenceDataIfGapExists(
+ RefPtr<AudioData>& aNextAudio);
+
+ const RefPtr<AbstractThread> mOwnerThread;
+
+ // Used to access the graph.
+ const nsMainThreadPtrHandle<SharedDummyTrack> mDummyTrack;
+
+ /*
+ * Worker thread only members.
+ */
+ WatchManager<DecodedStream> mWatchManager;
+ UniquePtr<DecodedStreamData> mData;
+ RefPtr<EndedPromise> mAudioEndedPromise;
+ RefPtr<EndedPromise> mVideoEndedPromise;
+
+ Watchable<bool> mPlaying;
+ Mirror<PrincipalHandle> mPrincipalHandle;
+ AbstractCanonical<PrincipalHandle>* mCanonicalOutputPrincipal;
+ const nsTArray<RefPtr<ProcessedMediaTrack>> mOutputTracks;
+
+ double mVolume;
+ double mPlaybackRate;
+ bool mPreservesPitch;
+
+ media::NullableTimeUnit mStartTime;
+ media::TimeUnit mLastOutputTime;
+ MediaInfo mInfo;
+ // True when stream is producing audible sound, false when stream is silent.
+ bool mIsAudioDataAudible = false;
+ Maybe<AudibilityMonitor> mAudibilityMonitor;
+ MediaEventProducer<bool> mAudibleEvent;
+
+ MediaQueue<AudioData>& mAudioQueue;
+ MediaQueue<VideoData>& mVideoQueue;
+
+ // This is the audio device we were told to play out to.
+ // All audio is captured, so nothing is actually played out -- but we report
+ // this upwards as it could save us from being recreated when the sink
+ // changes.
+ const RefPtr<AudioDeviceInfo> mAudioDevice;
+
+ MediaEventListener mAudioPushListener;
+ MediaEventListener mVideoPushListener;
+ MediaEventListener mAudioFinishListener;
+ MediaEventListener mVideoFinishListener;
+ MediaEventListener mOutputListener;
+};
+
+} // namespace mozilla
+
+#endif // DecodedStream_h_
diff --git a/dom/media/mediasink/MediaSink.h b/dom/media/mediasink/MediaSink.h
new file mode 100644
index 0000000000..de6f26dcc9
--- /dev/null
+++ b/dom/media/mediasink/MediaSink.h
@@ -0,0 +1,142 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MediaSink_h_
+#define MediaSink_h_
+
+#include "MediaInfo.h"
+#include "mozilla/MozPromise.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/dom/MediaDebugInfoBinding.h"
+#include "nsISupportsImpl.h"
+
+class AudioDeviceInfo;
+
+namespace mozilla {
+
+class TimeStamp;
+class VideoFrameContainer;
+
+/**
+ * A consumer of audio/video data which plays audio and video tracks and
+ * manages A/V sync between them.
+ *
+ * A typical sink sends audio/video outputs to the speaker and screen.
+ * However, there are also sinks which capture the output of an media element
+ * and send the output to a MediaStream.
+ *
+ * This class is used to move A/V sync management and audio/video rendering
+ * out of MDSM so it is possible for subclasses to do external rendering using
+ * specific hardware which is required by TV projects and CDM.
+ *
+ * Note this class is not thread-safe and should be called from the state
+ * machine thread only.
+ */
+class MediaSink {
+ public:
+ NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MediaSink);
+ typedef mozilla::TrackInfo::TrackType TrackType;
+
+ // EndedPromise needs to be a non-exclusive promise as it is shared between
+ // both the AudioSink and VideoSink.
+ typedef MozPromise<bool, nsresult, /* IsExclusive = */ false> EndedPromise;
+
+ // Return a promise which is resolved when the track finishes
+ // or null if no such track.
+ // Must be called after playback starts.
+ virtual RefPtr<EndedPromise> OnEnded(TrackType aType) = 0;
+
+ // Return the end time of the audio/video data that has been consumed
+ // or 0 if no such track.
+ // Must be called after playback starts.
+ virtual media::TimeUnit GetEndTime(TrackType aType) const = 0;
+
+ // Return playback position of the media.
+ // Since A/V sync is always maintained by this sink, there is no need to
+ // specify whether we want to get audio or video position.
+ // aTimeStamp returns the timeStamp corresponding to the returned position
+ // which is used by the compositor to derive the render time of video frames.
+ // Must be called after playback starts.
+ virtual media::TimeUnit GetPosition(TimeStamp* aTimeStamp = nullptr) = 0;
+
+ // Return true if there are data consumed but not played yet.
+ // Can be called in any state.
+ virtual bool HasUnplayedFrames(TrackType aType) const = 0;
+
+ // Return the duration of data consumed but not played yet.
+ // Can be called in any state.
+ virtual media::TimeUnit UnplayedDuration(TrackType aType) const = 0;
+
+ // Set volume of the audio track.
+ // Do nothing if this sink has no audio track.
+ // Can be called in any state.
+ virtual void SetVolume(double aVolume) {}
+
+ // Set the audio stream name.
+ // Does nothing if this sink has no audio stream.
+ // Can be called in any state.
+ virtual void SetStreamName(const nsAString& aStreamName) {}
+
+ // Set the playback rate.
+ // Can be called in any state.
+ virtual void SetPlaybackRate(double aPlaybackRate) {}
+
+ // Whether to preserve pitch of the audio track.
+ // Do nothing if this sink has no audio track.
+ // Can be called in any state.
+ virtual void SetPreservesPitch(bool aPreservesPitch) {}
+
+ // Pause/resume the playback. Only work after playback starts.
+ virtual void SetPlaying(bool aPlaying) = 0;
+
+ // Get the playback rate.
+ // Can be called in any state.
+ virtual double PlaybackRate() const = 0;
+
+ // Single frame rendering operation may need to be done before playback
+ // started (1st frame) or right after seek completed or playback stopped.
+ // Do nothing if this sink has no video track. Can be called in any state.
+ virtual void Redraw(const VideoInfo& aInfo){};
+
+ // Begin a playback session with the provided start time and media info.
+ // Must be called when playback is stopped.
+ virtual nsresult Start(const media::TimeUnit& aStartTime,
+ const MediaInfo& aInfo) = 0;
+
+ // Finish a playback session.
+ // Must be called after playback starts.
+ virtual void Stop() = 0;
+
+ // Return true if playback has started.
+ // Can be called in any state.
+ virtual bool IsStarted() const = 0;
+
+ // Return true if playback is started and not paused otherwise false.
+ // Can be called in any state.
+ virtual bool IsPlaying() const = 0;
+
+ // The audio output device this MediaSink is playing audio data to. The
+ // default device is used if this returns null.
+ virtual const AudioDeviceInfo* AudioDevice() const = 0;
+
+ // Called on the state machine thread to shut down the sink. All resources
+ // allocated by this sink should be released.
+ // Must be called after playback stopped.
+ virtual void Shutdown() {}
+
+ virtual void SetSecondaryVideoContainer(VideoFrameContainer* aSecondary) {}
+
+ virtual void GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) {}
+
+ virtual void EnableTreatAudioUnderrunAsSilence(bool aEnabled) {}
+
+ protected:
+ virtual ~MediaSink() = default;
+};
+
+} // namespace mozilla
+
+#endif // MediaSink_h_
diff --git a/dom/media/mediasink/VideoSink.cpp b/dom/media/mediasink/VideoSink.cpp
new file mode 100644
index 0000000000..906efdf0db
--- /dev/null
+++ b/dom/media/mediasink/VideoSink.cpp
@@ -0,0 +1,706 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef XP_WIN
+// Include Windows headers required for enabling high precision timers.
+# include <windows.h>
+# include <mmsystem.h>
+#endif
+
+#include "VideoSink.h"
+
+#include "MediaQueue.h"
+#include "VideoUtils.h"
+
+#include "mozilla/IntegerPrintfMacros.h"
+#include "mozilla/ProfilerLabels.h"
+#include "mozilla/ProfilerMarkerTypes.h"
+#include "mozilla/StaticPrefs_browser.h"
+#include "mozilla/StaticPrefs_media.h"
+
+namespace mozilla {
+extern LazyLogModule gMediaDecoderLog;
+}
+
+#undef FMT
+
+#define FMT(x, ...) "VideoSink=%p " x, this, ##__VA_ARGS__
+#define VSINK_LOG(x, ...) \
+ MOZ_LOG(gMediaDecoderLog, LogLevel::Debug, (FMT(x, ##__VA_ARGS__)))
+#define VSINK_LOG_V(x, ...) \
+ MOZ_LOG(gMediaDecoderLog, LogLevel::Verbose, (FMT(x, ##__VA_ARGS__)))
+
+namespace mozilla {
+
+using namespace mozilla::layers;
+
+// Minimum update frequency is 1/120th of a second, i.e. half the
+// duration of a 60-fps frame.
+static const int64_t MIN_UPDATE_INTERVAL_US = 1000000 / (60 * 2);
+
+static void SetImageToGreenPixel(PlanarYCbCrImage* aImage) {
+ static uint8_t greenPixel[] = {0x00, 0x00, 0x00};
+ PlanarYCbCrData data;
+ data.mYChannel = greenPixel;
+ data.mCbChannel = greenPixel + 1;
+ data.mCrChannel = greenPixel + 2;
+ data.mYStride = data.mCbCrStride = 1;
+ data.mPictureRect = gfx::IntRect(0, 0, 1, 1);
+ data.mYUVColorSpace = gfx::YUVColorSpace::BT601;
+ aImage->CopyData(data);
+}
+
+VideoSink::VideoSink(AbstractThread* aThread, MediaSink* aAudioSink,
+ MediaQueue<VideoData>& aVideoQueue,
+ VideoFrameContainer* aContainer,
+ FrameStatistics& aFrameStats,
+ uint32_t aVQueueSentToCompositerSize)
+ : mOwnerThread(aThread),
+ mAudioSink(aAudioSink),
+ mVideoQueue(aVideoQueue),
+ mContainer(aContainer),
+ mProducerID(ImageContainer::AllocateProducerID()),
+ mFrameStats(aFrameStats),
+ mOldCompositorDroppedCount(mContainer ? mContainer->GetDroppedImageCount()
+ : 0),
+ mPendingDroppedCount(0),
+ mHasVideo(false),
+ mUpdateScheduler(aThread),
+ mVideoQueueSendToCompositorSize(aVQueueSentToCompositerSize),
+ mMinVideoQueueSize(StaticPrefs::media_ruin_av_sync_enabled() ? 1 : 0)
+#ifdef XP_WIN
+ ,
+ mHiResTimersRequested(false)
+#endif
+{
+ MOZ_ASSERT(mAudioSink, "AudioSink should exist.");
+
+ if (StaticPrefs::browser_measurement_render_anims_and_video_solid() &&
+ mContainer) {
+ InitializeBlankImage();
+ MOZ_ASSERT(mBlankImage, "Blank image should exist.");
+ }
+}
+
+VideoSink::~VideoSink() {
+#ifdef XP_WIN
+ MOZ_ASSERT(!mHiResTimersRequested);
+#endif
+}
+
+RefPtr<VideoSink::EndedPromise> VideoSink::OnEnded(TrackType aType) {
+ AssertOwnerThread();
+ MOZ_ASSERT(mAudioSink->IsStarted(), "Must be called after playback starts.");
+
+ if (aType == TrackInfo::kAudioTrack) {
+ return mAudioSink->OnEnded(aType);
+ } else if (aType == TrackInfo::kVideoTrack) {
+ return mEndPromise;
+ }
+ return nullptr;
+}
+
+media::TimeUnit VideoSink::GetEndTime(TrackType aType) const {
+ AssertOwnerThread();
+ MOZ_ASSERT(mAudioSink->IsStarted(), "Must be called after playback starts.");
+
+ if (aType == TrackInfo::kVideoTrack) {
+ return mVideoFrameEndTime;
+ } else if (aType == TrackInfo::kAudioTrack) {
+ return mAudioSink->GetEndTime(aType);
+ }
+ return media::TimeUnit::Zero();
+}
+
+media::TimeUnit VideoSink::GetPosition(TimeStamp* aTimeStamp) {
+ AssertOwnerThread();
+ return mAudioSink->GetPosition(aTimeStamp);
+}
+
+bool VideoSink::HasUnplayedFrames(TrackType aType) const {
+ AssertOwnerThread();
+ MOZ_ASSERT(aType == TrackInfo::kAudioTrack,
+ "Not implemented for non audio tracks.");
+
+ return mAudioSink->HasUnplayedFrames(aType);
+}
+
+media::TimeUnit VideoSink::UnplayedDuration(TrackType aType) const {
+ AssertOwnerThread();
+ MOZ_ASSERT(aType == TrackInfo::kAudioTrack,
+ "Not implemented for non audio tracks.");
+
+ return mAudioSink->UnplayedDuration(aType);
+}
+
+void VideoSink::SetPlaybackRate(double aPlaybackRate) {
+ AssertOwnerThread();
+
+ mAudioSink->SetPlaybackRate(aPlaybackRate);
+}
+
+void VideoSink::SetVolume(double aVolume) {
+ AssertOwnerThread();
+
+ mAudioSink->SetVolume(aVolume);
+}
+
+void VideoSink::SetStreamName(const nsAString& aStreamName) {
+ AssertOwnerThread();
+
+ mAudioSink->SetStreamName(aStreamName);
+}
+
+void VideoSink::SetPreservesPitch(bool aPreservesPitch) {
+ AssertOwnerThread();
+
+ mAudioSink->SetPreservesPitch(aPreservesPitch);
+}
+
+double VideoSink::PlaybackRate() const {
+ AssertOwnerThread();
+
+ return mAudioSink->PlaybackRate();
+}
+
+void VideoSink::EnsureHighResTimersOnOnlyIfPlaying() {
+#ifdef XP_WIN
+ const bool needed = IsPlaying();
+ if (needed == mHiResTimersRequested) {
+ return;
+ }
+ if (needed) {
+ // Ensure high precision timers are enabled on Windows, otherwise the
+ // VideoSink isn't woken up at reliable intervals to set the next frame, and
+ // we drop frames while painting. Note that each call must be matched by a
+ // corresponding timeEndPeriod() call. Enabling high precision timers causes
+ // the CPU to wake up more frequently on Windows 7 and earlier, which causes
+ // more CPU load and battery use. So we only enable high precision timers
+ // when we're actually playing.
+ timeBeginPeriod(1);
+ } else {
+ timeEndPeriod(1);
+ }
+ mHiResTimersRequested = needed;
+#endif
+}
+
+void VideoSink::SetPlaying(bool aPlaying) {
+ AssertOwnerThread();
+ VSINK_LOG_V(" playing (%d) -> (%d)", mAudioSink->IsPlaying(), aPlaying);
+
+ if (!aPlaying) {
+ // Reset any update timer if paused.
+ mUpdateScheduler.Reset();
+ // Since playback is paused, tell compositor to render only current frame.
+ TimeStamp nowTime;
+ const auto clockTime = mAudioSink->GetPosition(&nowTime);
+ RenderVideoFrames(1, clockTime.ToMicroseconds(), nowTime);
+ if (mContainer) {
+ mContainer->ClearCachedResources();
+ }
+ if (mSecondaryContainer) {
+ mSecondaryContainer->ClearCachedResources();
+ }
+ }
+
+ mAudioSink->SetPlaying(aPlaying);
+
+ if (mHasVideo && aPlaying) {
+ // There's no thread in VideoSink for pulling video frames, need to trigger
+ // rendering while becoming playing status. because the VideoQueue may be
+ // full already.
+ TryUpdateRenderedVideoFrames();
+ }
+
+ EnsureHighResTimersOnOnlyIfPlaying();
+}
+
+nsresult VideoSink::Start(const media::TimeUnit& aStartTime,
+ const MediaInfo& aInfo) {
+ AssertOwnerThread();
+ VSINK_LOG("[%s]", __func__);
+
+ nsresult rv = mAudioSink->Start(aStartTime, aInfo);
+
+ mHasVideo = aInfo.HasVideo();
+
+ if (mHasVideo) {
+ mEndPromise = mEndPromiseHolder.Ensure(__func__);
+
+ // If the underlying MediaSink has an end promise for the video track (which
+ // happens when mAudioSink refers to a DecodedStream), we must wait for it
+ // to complete before resolving our own end promise. Otherwise, MDSM might
+ // stop playback before DecodedStream plays to the end and cause
+ // test_streams_element_capture.html to time out.
+ RefPtr<EndedPromise> p = mAudioSink->OnEnded(TrackInfo::kVideoTrack);
+ if (p) {
+ RefPtr<VideoSink> self = this;
+ p->Then(
+ mOwnerThread, __func__,
+ [self]() {
+ self->mVideoSinkEndRequest.Complete();
+ self->TryUpdateRenderedVideoFrames();
+ // It is possible the video queue size is 0 and we have no
+ // frames to render. However, we need to call
+ // MaybeResolveEndPromise() to ensure mEndPromiseHolder is
+ // resolved.
+ self->MaybeResolveEndPromise();
+ },
+ [self]() {
+ self->mVideoSinkEndRequest.Complete();
+ self->TryUpdateRenderedVideoFrames();
+ self->MaybeResolveEndPromise();
+ })
+ ->Track(mVideoSinkEndRequest);
+ }
+
+ ConnectListener();
+ // Run the render loop at least once so we can resolve the end promise
+ // when video duration is 0.
+ UpdateRenderedVideoFrames();
+ }
+ return rv;
+}
+
+void VideoSink::Stop() {
+ AssertOwnerThread();
+ MOZ_ASSERT(mAudioSink->IsStarted(), "playback not started.");
+ VSINK_LOG("[%s]", __func__);
+
+ mAudioSink->Stop();
+
+ mUpdateScheduler.Reset();
+ if (mHasVideo) {
+ DisconnectListener();
+ mVideoSinkEndRequest.DisconnectIfExists();
+ mEndPromiseHolder.ResolveIfExists(true, __func__);
+ mEndPromise = nullptr;
+ }
+ mVideoFrameEndTime = media::TimeUnit::Zero();
+
+ EnsureHighResTimersOnOnlyIfPlaying();
+}
+
+bool VideoSink::IsStarted() const {
+ AssertOwnerThread();
+
+ return mAudioSink->IsStarted();
+}
+
+bool VideoSink::IsPlaying() const {
+ AssertOwnerThread();
+
+ return mAudioSink->IsPlaying();
+}
+
+const AudioDeviceInfo* VideoSink::AudioDevice() const {
+ return mAudioSink->AudioDevice();
+}
+
+void VideoSink::Shutdown() {
+ AssertOwnerThread();
+ MOZ_ASSERT(!mAudioSink->IsStarted(), "must be called after playback stops.");
+ VSINK_LOG("[%s]", __func__);
+
+ mAudioSink->Shutdown();
+}
+
+void VideoSink::OnVideoQueuePushed(RefPtr<VideoData>&& aSample) {
+ AssertOwnerThread();
+ // Listen to push event, VideoSink should try rendering ASAP if first frame
+ // arrives but update scheduler is not triggered yet.
+ if (!aSample->IsSentToCompositor()) {
+ // Since we push rendered frames back to the queue, we will receive
+ // push events for them. We only need to trigger render loop
+ // when this frame is not rendered yet.
+ TryUpdateRenderedVideoFrames();
+ }
+}
+
+void VideoSink::OnVideoQueueFinished() {
+ AssertOwnerThread();
+ // Run render loop if the end promise is not resolved yet.
+ if (!mUpdateScheduler.IsScheduled() && mAudioSink->IsPlaying() &&
+ !mEndPromiseHolder.IsEmpty()) {
+ UpdateRenderedVideoFrames();
+ }
+}
+
+void VideoSink::Redraw(const VideoInfo& aInfo) {
+ AUTO_PROFILER_LABEL("VideoSink::Redraw", MEDIA_PLAYBACK);
+ AssertOwnerThread();
+
+ // No video track, nothing to draw.
+ if (!aInfo.IsValid() || !mContainer) {
+ return;
+ }
+
+ auto now = TimeStamp::Now();
+
+ RefPtr<VideoData> video = VideoQueue().PeekFront();
+ if (video) {
+ if (mBlankImage) {
+ video->mImage = mBlankImage;
+ }
+ video->MarkSentToCompositor();
+ mContainer->SetCurrentFrame(video->mDisplay, video->mImage, now);
+ if (mSecondaryContainer) {
+ mSecondaryContainer->SetCurrentFrame(video->mDisplay, video->mImage, now);
+ }
+ return;
+ }
+
+ // When we reach here, it means there are no frames in this video track.
+ // Draw a blank frame to ensure there is something in the image container
+ // to fire 'loadeddata'.
+
+ RefPtr<Image> blank =
+ mContainer->GetImageContainer()->CreatePlanarYCbCrImage();
+ mContainer->SetCurrentFrame(aInfo.mDisplay, blank, now);
+
+ if (mSecondaryContainer) {
+ mSecondaryContainer->SetCurrentFrame(aInfo.mDisplay, blank, now);
+ }
+}
+
+void VideoSink::TryUpdateRenderedVideoFrames() {
+ AUTO_PROFILER_LABEL("VideoSink::TryUpdateRenderedVideoFrames",
+ MEDIA_PLAYBACK);
+ AssertOwnerThread();
+ if (mUpdateScheduler.IsScheduled() || !mAudioSink->IsPlaying()) {
+ return;
+ }
+ RefPtr<VideoData> v = VideoQueue().PeekFront();
+ if (!v) {
+ // No frames to render.
+ return;
+ }
+
+ TimeStamp nowTime;
+ const media::TimeUnit clockTime = mAudioSink->GetPosition(&nowTime);
+ if (clockTime >= v->mTime) {
+ // Time to render this frame.
+ UpdateRenderedVideoFrames();
+ return;
+ }
+
+ // If we send this future frame to the compositor now, it will be rendered
+ // immediately and break A/V sync. Instead, we schedule a timer to send it
+ // later.
+ int64_t delta =
+ (v->mTime - clockTime).ToMicroseconds() / mAudioSink->PlaybackRate();
+ TimeStamp target = nowTime + TimeDuration::FromMicroseconds(delta);
+ RefPtr<VideoSink> self = this;
+ mUpdateScheduler.Ensure(
+ target, [self]() { self->UpdateRenderedVideoFramesByTimer(); },
+ [self]() { self->UpdateRenderedVideoFramesByTimer(); });
+}
+
+void VideoSink::UpdateRenderedVideoFramesByTimer() {
+ AssertOwnerThread();
+ mUpdateScheduler.CompleteRequest();
+ UpdateRenderedVideoFrames();
+}
+
+void VideoSink::ConnectListener() {
+ AssertOwnerThread();
+ mPushListener = VideoQueue().PushEvent().Connect(
+ mOwnerThread, this, &VideoSink::OnVideoQueuePushed);
+ mFinishListener = VideoQueue().FinishEvent().Connect(
+ mOwnerThread, this, &VideoSink::OnVideoQueueFinished);
+}
+
+void VideoSink::DisconnectListener() {
+ AssertOwnerThread();
+ mPushListener.Disconnect();
+ mFinishListener.Disconnect();
+}
+
+void VideoSink::RenderVideoFrames(int32_t aMaxFrames, int64_t aClockTime,
+ const TimeStamp& aClockTimeStamp) {
+ AUTO_PROFILER_LABEL("VideoSink::RenderVideoFrames", MEDIA_PLAYBACK);
+ AssertOwnerThread();
+
+ AutoTArray<RefPtr<VideoData>, 16> frames;
+ VideoQueue().GetFirstElements(aMaxFrames, &frames);
+ if (frames.IsEmpty() || !mContainer) {
+ return;
+ }
+
+ AutoTArray<ImageContainer::NonOwningImage, 16> images;
+ TimeStamp lastFrameTime;
+ double playbackRate = mAudioSink->PlaybackRate();
+ for (uint32_t i = 0; i < frames.Length(); ++i) {
+ VideoData* frame = frames[i];
+ bool wasSent = frame->IsSentToCompositor();
+ frame->MarkSentToCompositor();
+
+ if (!frame->mImage || !frame->mImage->IsValid() ||
+ !frame->mImage->GetSize().width || !frame->mImage->GetSize().height) {
+ continue;
+ }
+
+ if (frame->mTime.IsNegative()) {
+ // Frame times before the start time are invalid; drop such frames
+ continue;
+ }
+
+ MOZ_ASSERT(!aClockTimeStamp.IsNull());
+ int64_t delta = frame->mTime.ToMicroseconds() - aClockTime;
+ TimeStamp t =
+ aClockTimeStamp + TimeDuration::FromMicroseconds(delta / playbackRate);
+ if (!lastFrameTime.IsNull() && t <= lastFrameTime) {
+ // Timestamps out of order; drop the new frame. In theory we should
+ // probably replace the previous frame with the new frame if the
+ // timestamps are equal, but this is a corrupt video file already so
+ // never mind.
+ continue;
+ }
+ MOZ_ASSERT(!t.IsNull());
+ lastFrameTime = t;
+
+ ImageContainer::NonOwningImage* img = images.AppendElement();
+ img->mTimeStamp = t;
+ img->mImage = frame->mImage;
+ if (mBlankImage) {
+ img->mImage = mBlankImage;
+ }
+ img->mFrameID = frame->mFrameID;
+ img->mProducerID = mProducerID;
+
+ VSINK_LOG_V("playing video frame %" PRId64
+ " (id=%x, vq-queued=%zu, clock=%" PRId64 ")",
+ frame->mTime.ToMicroseconds(), frame->mFrameID,
+ VideoQueue().GetSize(), aClockTime);
+ if (!wasSent) {
+ PROFILER_MARKER("PlayVideo", MEDIA_PLAYBACK, {}, MediaSampleMarker,
+ frame->mTime.ToMicroseconds(),
+ frame->GetEndTime().ToMicroseconds(),
+ VideoQueue().GetSize());
+ }
+ }
+
+ if (images.Length() > 0) {
+ mContainer->SetCurrentFrames(frames[0]->mDisplay, images);
+
+ if (mSecondaryContainer) {
+ mSecondaryContainer->SetCurrentFrames(frames[0]->mDisplay, images);
+ }
+ }
+}
+
+void VideoSink::UpdateRenderedVideoFrames() {
+ AUTO_PROFILER_LABEL("VideoSink::UpdateRenderedVideoFrames", MEDIA_PLAYBACK);
+ AssertOwnerThread();
+ MOZ_ASSERT(mAudioSink->IsPlaying(), "should be called while playing.");
+
+ // Get the current playback position.
+ TimeStamp nowTime;
+ const auto clockTime = mAudioSink->GetPosition(&nowTime);
+ MOZ_ASSERT(!clockTime.IsNegative(), "Should have positive clock time.");
+
+ uint32_t sentToCompositorCount = 0;
+ uint32_t droppedInSink = 0;
+
+ // Skip frames up to the playback position.
+ media::TimeUnit lastFrameEndTime;
+ while (VideoQueue().GetSize() > mMinVideoQueueSize &&
+ clockTime >= VideoQueue().PeekFront()->GetEndTime()) {
+ RefPtr<VideoData> frame = VideoQueue().PopFront();
+ lastFrameEndTime = frame->GetEndTime();
+ if (frame->IsSentToCompositor()) {
+ sentToCompositorCount++;
+ } else {
+ droppedInSink++;
+ VSINK_LOG_V("discarding video frame mTime=%" PRId64
+ " clock_time=%" PRId64,
+ frame->mTime.ToMicroseconds(), clockTime.ToMicroseconds());
+
+ struct VideoSinkDroppedFrameMarker {
+ static constexpr Span<const char> MarkerTypeName() {
+ return MakeStringSpan("VideoSinkDroppedFrame");
+ }
+ static void StreamJSONMarkerData(
+ baseprofiler::SpliceableJSONWriter& aWriter,
+ int64_t aSampleStartTimeUs, int64_t aSampleEndTimeUs,
+ int64_t aClockTimeUs) {
+ aWriter.IntProperty("sampleStartTimeUs", aSampleStartTimeUs);
+ aWriter.IntProperty("sampleEndTimeUs", aSampleEndTimeUs);
+ aWriter.IntProperty("clockTimeUs", aClockTimeUs);
+ }
+ static MarkerSchema MarkerTypeDisplay() {
+ using MS = MarkerSchema;
+ MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable};
+ schema.AddKeyLabelFormat("sampleStartTimeUs", "Sample start time",
+ MS::Format::Microseconds);
+ schema.AddKeyLabelFormat("sampleEndTimeUs", "Sample end time",
+ MS::Format::Microseconds);
+ schema.AddKeyLabelFormat("clockTimeUs", "Audio clock time",
+ MS::Format::Microseconds);
+ return schema;
+ }
+ };
+ profiler_add_marker(
+ "VideoSinkDroppedFrame", geckoprofiler::category::MEDIA_PLAYBACK, {},
+ VideoSinkDroppedFrameMarker{}, frame->mTime.ToMicroseconds(),
+ frame->GetEndTime().ToMicroseconds(), clockTime.ToMicroseconds());
+ }
+ }
+
+ if (droppedInSink || sentToCompositorCount) {
+ uint32_t totalCompositorDroppedCount = mContainer->GetDroppedImageCount();
+ uint32_t droppedInCompositor =
+ totalCompositorDroppedCount - mOldCompositorDroppedCount;
+ if (droppedInCompositor > 0) {
+ mOldCompositorDroppedCount = totalCompositorDroppedCount;
+ VSINK_LOG_V("%u video frame previously discarded by compositor",
+ droppedInCompositor);
+ }
+ mPendingDroppedCount += droppedInCompositor;
+ uint32_t droppedReported = mPendingDroppedCount > sentToCompositorCount
+ ? sentToCompositorCount
+ : mPendingDroppedCount;
+ mPendingDroppedCount -= droppedReported;
+
+ mFrameStats.Accumulate({0, 0, sentToCompositorCount - droppedReported, 0,
+ droppedInSink, droppedInCompositor});
+ }
+
+ // The presentation end time of the last video frame displayed is either
+ // the end time of the current frame, or if we dropped all frames in the
+ // queue, the end time of the last frame we removed from the queue.
+ RefPtr<VideoData> currentFrame = VideoQueue().PeekFront();
+ mVideoFrameEndTime =
+ std::max(mVideoFrameEndTime,
+ currentFrame ? currentFrame->GetEndTime() : lastFrameEndTime);
+
+ RenderVideoFrames(mVideoQueueSendToCompositorSize, clockTime.ToMicroseconds(),
+ nowTime);
+
+ MaybeResolveEndPromise();
+
+ // Get the timestamp of the next frame. Schedule the next update at
+ // the start time of the next frame. If we don't have a next frame,
+ // we will run render loops again upon incoming frames.
+ nsTArray<RefPtr<VideoData>> frames;
+ VideoQueue().GetFirstElements(2, &frames);
+ if (frames.Length() < 2) {
+ return;
+ }
+
+ int64_t nextFrameTime = frames[1]->mTime.ToMicroseconds();
+ int64_t delta = std::max(nextFrameTime - clockTime.ToMicroseconds(),
+ MIN_UPDATE_INTERVAL_US);
+ TimeStamp target = nowTime + TimeDuration::FromMicroseconds(
+ delta / mAudioSink->PlaybackRate());
+
+ RefPtr<VideoSink> self = this;
+ mUpdateScheduler.Ensure(
+ target, [self]() { self->UpdateRenderedVideoFramesByTimer(); },
+ [self]() { self->UpdateRenderedVideoFramesByTimer(); });
+}
+
+void VideoSink::MaybeResolveEndPromise() {
+ AssertOwnerThread();
+ // All frames are rendered, Let's resolve the promise.
+ if (VideoQueue().IsFinished() && VideoQueue().GetSize() <= 1 &&
+ !mVideoSinkEndRequest.Exists()) {
+ if (VideoQueue().GetSize() == 1) {
+ // Remove the last frame since we have sent it to compositor.
+ RefPtr<VideoData> frame = VideoQueue().PopFront();
+ if (mPendingDroppedCount > 0) {
+ mFrameStats.Accumulate({0, 0, 0, 0, 0, 1});
+ mPendingDroppedCount--;
+ } else {
+ mFrameStats.NotifyPresentedFrame();
+ }
+ }
+
+ TimeStamp nowTime;
+ const auto clockTime = mAudioSink->GetPosition(&nowTime);
+
+ // Clear future frames from the compositor, in case the playback position
+ // unexpectedly jumped to the end, and all frames between the previous
+ // playback position and the end were discarded. Old frames based on the
+ // previous playback position might still be queued in the compositor. See
+ // bug 1598143 for when this can happen.
+ mContainer->ClearFutureFrames(nowTime);
+ if (mSecondaryContainer) {
+ mSecondaryContainer->ClearFutureFrames(nowTime);
+ }
+
+ if (clockTime < mVideoFrameEndTime) {
+ VSINK_LOG_V(
+ "Not reach video end time yet, reschedule timer to resolve "
+ "end promise. clockTime=%" PRId64 ", endTime=%" PRId64,
+ clockTime.ToMicroseconds(), mVideoFrameEndTime.ToMicroseconds());
+ int64_t delta = (mVideoFrameEndTime - clockTime).ToMicroseconds() /
+ mAudioSink->PlaybackRate();
+ TimeStamp target = nowTime + TimeDuration::FromMicroseconds(delta);
+ auto resolveEndPromise = [self = RefPtr<VideoSink>(this)]() {
+ self->mEndPromiseHolder.ResolveIfExists(true, __func__);
+ self->mUpdateScheduler.CompleteRequest();
+ };
+ mUpdateScheduler.Ensure(target, std::move(resolveEndPromise),
+ std::move(resolveEndPromise));
+ } else {
+ mEndPromiseHolder.ResolveIfExists(true, __func__);
+ }
+ }
+}
+
+void VideoSink::SetSecondaryVideoContainer(VideoFrameContainer* aSecondary) {
+ AssertOwnerThread();
+ mSecondaryContainer = aSecondary;
+ if (!IsPlaying() && mSecondaryContainer) {
+ ImageContainer* mainImageContainer = mContainer->GetImageContainer();
+ ImageContainer* secondaryImageContainer =
+ mSecondaryContainer->GetImageContainer();
+ MOZ_DIAGNOSTIC_ASSERT(mainImageContainer);
+ MOZ_DIAGNOSTIC_ASSERT(secondaryImageContainer);
+
+ // If the video isn't currently playing, get the current frame and display
+ // that in the secondary container as well.
+ AutoLockImage lockImage(mainImageContainer);
+ TimeStamp now = TimeStamp::Now();
+ if (RefPtr<Image> image = lockImage.GetImage(now)) {
+ AutoTArray<ImageContainer::NonOwningImage, 1> currentFrame;
+ currentFrame.AppendElement(ImageContainer::NonOwningImage(
+ image, now, /* frameID */ 1,
+ /* producerId */ ImageContainer::AllocateProducerID()));
+ secondaryImageContainer->SetCurrentImages(currentFrame);
+ }
+ }
+}
+
+void VideoSink::GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) {
+ AssertOwnerThread();
+ aInfo.mVideoSink.mIsStarted = IsStarted();
+ aInfo.mVideoSink.mIsPlaying = IsPlaying();
+ aInfo.mVideoSink.mFinished = VideoQueue().IsFinished();
+ aInfo.mVideoSink.mSize = VideoQueue().GetSize();
+ aInfo.mVideoSink.mVideoFrameEndTime = mVideoFrameEndTime.ToMicroseconds();
+ aInfo.mVideoSink.mHasVideo = mHasVideo;
+ aInfo.mVideoSink.mVideoSinkEndRequestExists = mVideoSinkEndRequest.Exists();
+ aInfo.mVideoSink.mEndPromiseHolderIsEmpty = mEndPromiseHolder.IsEmpty();
+ mAudioSink->GetDebugInfo(aInfo);
+}
+
+bool VideoSink::InitializeBlankImage() {
+ mBlankImage = mContainer->GetImageContainer()->CreatePlanarYCbCrImage();
+ if (mBlankImage == nullptr) {
+ return false;
+ }
+ SetImageToGreenPixel(mBlankImage->AsPlanarYCbCrImage());
+ return true;
+}
+
+void VideoSink::EnableTreatAudioUnderrunAsSilence(bool aEnabled) {
+ mAudioSink->EnableTreatAudioUnderrunAsSilence(aEnabled);
+}
+
+} // namespace mozilla
diff --git a/dom/media/mediasink/VideoSink.h b/dom/media/mediasink/VideoSink.h
new file mode 100644
index 0000000000..7f2528d870
--- /dev/null
+++ b/dom/media/mediasink/VideoSink.h
@@ -0,0 +1,177 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef VideoSink_h_
+#define VideoSink_h_
+
+#include "FrameStatistics.h"
+#include "ImageContainer.h"
+#include "MediaEventSource.h"
+#include "MediaSink.h"
+#include "MediaTimer.h"
+#include "VideoFrameContainer.h"
+#include "mozilla/AbstractThread.h"
+#include "mozilla/MozPromise.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/TimeStamp.h"
+
+namespace mozilla {
+
+class VideoFrameContainer;
+template <class T>
+class MediaQueue;
+
+class VideoSink : public MediaSink {
+ typedef mozilla::layers::ImageContainer::ProducerID ProducerID;
+
+ public:
+ VideoSink(AbstractThread* aThread, MediaSink* aAudioSink,
+ MediaQueue<VideoData>& aVideoQueue, VideoFrameContainer* aContainer,
+ FrameStatistics& aFrameStats, uint32_t aVQueueSentToCompositerSize);
+
+ RefPtr<EndedPromise> OnEnded(TrackType aType) override;
+
+ media::TimeUnit GetEndTime(TrackType aType) const override;
+
+ media::TimeUnit GetPosition(TimeStamp* aTimeStamp = nullptr) override;
+
+ bool HasUnplayedFrames(TrackType aType) const override;
+ media::TimeUnit UnplayedDuration(TrackType aType) const override;
+
+ void SetPlaybackRate(double aPlaybackRate) override;
+
+ void SetVolume(double aVolume) override;
+
+ void SetStreamName(const nsAString& aStreamName) override;
+
+ void SetPreservesPitch(bool aPreservesPitch) override;
+
+ void SetPlaying(bool aPlaying) override;
+
+ double PlaybackRate() const override;
+
+ void Redraw(const VideoInfo& aInfo) override;
+
+ nsresult Start(const media::TimeUnit& aStartTime,
+ const MediaInfo& aInfo) override;
+
+ void Stop() override;
+
+ bool IsStarted() const override;
+
+ bool IsPlaying() const override;
+
+ const AudioDeviceInfo* AudioDevice() const override;
+
+ void Shutdown() override;
+
+ void SetSecondaryVideoContainer(VideoFrameContainer* aSecondary) override;
+
+ void GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) override;
+
+ void EnableTreatAudioUnderrunAsSilence(bool aEnabled) override;
+
+ private:
+ virtual ~VideoSink();
+
+ // VideoQueue listener related.
+ void OnVideoQueuePushed(RefPtr<VideoData>&& aSample);
+ void OnVideoQueueFinished();
+ void ConnectListener();
+ void DisconnectListener();
+
+ void EnsureHighResTimersOnOnlyIfPlaying();
+
+ // Sets VideoQueue images into the VideoFrameContainer. Called on the shared
+ // state machine thread. The first aMaxFrames (at most) are set.
+ // aClockTime and aClockTimeStamp are used as the baseline for deriving
+ // timestamps for the frames; when omitted, aMaxFrames must be 1 and
+ // a null timestamp is passed to the VideoFrameContainer.
+ // If the VideoQueue is empty, this does nothing.
+ void RenderVideoFrames(int32_t aMaxFrames, int64_t aClockTime = 0,
+ const TimeStamp& aClickTimeStamp = TimeStamp());
+
+ // Triggered while videosink is started, videosink becomes "playing" status,
+ // or VideoQueue event arrived.
+ void TryUpdateRenderedVideoFrames();
+
+ // If we have video, display a video frame if it's time for display has
+ // arrived, otherwise sleep until it's time for the next frame. Update the
+ // current frame time as appropriate, and trigger ready state update.
+ // Called on the shared state machine thread.
+ void UpdateRenderedVideoFrames();
+ void UpdateRenderedVideoFramesByTimer();
+
+ void MaybeResolveEndPromise();
+
+ void AssertOwnerThread() const {
+ MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
+ }
+
+ MediaQueue<VideoData>& VideoQueue() const { return mVideoQueue; }
+
+ const RefPtr<AbstractThread> mOwnerThread;
+ const RefPtr<MediaSink> mAudioSink;
+ MediaQueue<VideoData>& mVideoQueue;
+ VideoFrameContainer* mContainer;
+ RefPtr<VideoFrameContainer> mSecondaryContainer;
+
+ // Producer ID to help ImageContainer distinguish different streams of
+ // FrameIDs. A unique and immutable value per VideoSink.
+ const ProducerID mProducerID;
+
+ // Used to notify MediaDecoder's frame statistics
+ FrameStatistics& mFrameStats;
+
+ RefPtr<EndedPromise> mEndPromise;
+ MozPromiseHolder<EndedPromise> mEndPromiseHolder;
+ MozPromiseRequestHolder<EndedPromise> mVideoSinkEndRequest;
+
+ // The presentation end time of the last video frame which has been displayed.
+ media::TimeUnit mVideoFrameEndTime;
+
+ uint32_t mOldCompositorDroppedCount;
+ uint32_t mPendingDroppedCount;
+
+ // Event listeners for VideoQueue
+ MediaEventListener mPushListener;
+ MediaEventListener mFinishListener;
+
+ // True if this sink is going to handle video track.
+ bool mHasVideo;
+
+ // Used to trigger another update of rendered frames in next round.
+ DelayedScheduler mUpdateScheduler;
+
+ // Max frame number sent to compositor at a time.
+ // Based on the pref value obtained in MDSM.
+ const uint32_t mVideoQueueSendToCompositorSize;
+
+ // Talos tests for the compositor require at least one frame in the
+ // video queue so that the compositor has something to composit during
+ // the talos test when the decode is stressed. We have a minimum size
+ // on the video queue in order to facilitate this talos test.
+ // Note: Normal playback should not have a queue size of more than 0,
+ // otherwise A/V sync will be ruined! *Only* make this non-zero for
+ // testing purposes.
+ const uint32_t mMinVideoQueueSize;
+
+#ifdef XP_WIN
+ // Whether we've called timeBeginPeriod(1) to request high resolution
+ // timers. We request high resolution timers when playback starts, and
+ // turn them off when playback is paused. Enabling high resolution
+ // timers can cause higher CPU usage and battery drain on Windows 7,
+ // but reduces our frame drop rate.
+ bool mHiResTimersRequested;
+#endif
+
+ RefPtr<layers::Image> mBlankImage;
+ bool InitializeBlankImage();
+};
+
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/mediasink/moz.build b/dom/media/mediasink/moz.build
new file mode 100644
index 0000000000..6db074538f
--- /dev/null
+++ b/dom/media/mediasink/moz.build
@@ -0,0 +1,25 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+UNIFIED_SOURCES += [
+ "AudioDecoderInputTrack.cpp",
+ "AudioSink.cpp",
+ "AudioSinkWrapper.cpp",
+ "DecodedStream.cpp",
+ "VideoSink.cpp",
+]
+
+EXPORTS += [
+ "MediaSink.h",
+]
+
+LOCAL_INCLUDES += [
+ "/dom/media",
+]
+
+include("/ipc/chromium/chromium-config.mozbuild")
+
+FINAL_LIBRARY = "xul"