12 files changed, 4807 insertions, 0 deletions
diff --git a/dom/media/mediasink/AudioDecoderInputTrack.cpp b/dom/media/mediasink/AudioDecoderInputTrack.cpp
new file mode 100644
index 0000000000..7f970f0e4f
--- /dev/null
+++ b/dom/media/mediasink/AudioDecoderInputTrack.cpp
@@ -0,0 +1,681 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "AudioDecoderInputTrack.h"
+
+#include "MediaData.h"
+#include "mozilla/ScopeExit.h"
+#include "mozilla/StaticPrefs_media.h"
+#include "Tracing.h"
+
+// Use abort() instead of exception in SoundTouch.
+#define ST_NO_EXCEPTION_HANDLING 1
+#include "soundtouch/SoundTouchFactory.h"
+
+namespace mozilla {
+
+extern LazyLogModule gMediaDecoderLog;
+
+#define LOG(msg, ...)                        \
+  MOZ_LOG(gMediaDecoderLog, LogLevel::Debug, \
+          ("AudioDecoderInputTrack=%p " msg, this, ##__VA_ARGS__))
+
+#define LOG_M(msg, this, ...)                \
+  MOZ_LOG(gMediaDecoderLog, LogLevel::Debug, \
+          ("AudioDecoderInputTrack=%p " msg, this, ##__VA_ARGS__))
+
+/* static */
+AudioDecoderInputTrack* AudioDecoderInputTrack::Create(
+    MediaTrackGraph* aGraph, nsISerialEventTarget* aDecoderThread,
+    const AudioInfo& aInfo, float aPlaybackRate, float aVolume,
+    bool aPreservesPitch) {
+  MOZ_ASSERT(aGraph);
+  MOZ_ASSERT(aDecoderThread);
+  AudioDecoderInputTrack* track =
+      new AudioDecoderInputTrack(aDecoderThread, aGraph->GraphRate(), aInfo,
+                                 aPlaybackRate, aVolume, aPreservesPitch);
+  aGraph->AddTrack(track);
+  return track;
+}
+
+AudioDecoderInputTrack::AudioDecoderInputTrack(
+    nsISerialEventTarget* aDecoderThread, TrackRate aGraphRate,
+    const AudioInfo& aInfo, float aPlaybackRate, float aVolume,
+    bool aPreservesPitch)
+    : ProcessedMediaTrack(aGraphRate, MediaSegment::AUDIO, new AudioSegment()),
+      mDecoderThread(aDecoderThread),
+      mResamplerChannelCount(0),
+      mInitialInputChannels(aInfo.mChannels),
+      mInputSampleRate(aInfo.mRate),
+      mDelayedScheduler(mDecoderThread),
+      mPlaybackRate(aPlaybackRate),
+      mVolume(aVolume),
+      mPreservesPitch(aPreservesPitch) {}
+
+bool AudioDecoderInputTrack::ConvertAudioDataToSegment(
+    AudioData* aAudio, AudioSegment& aSegment,
+    const PrincipalHandle& aPrincipalHandle) {
+  AssertOnDecoderThread();
+  MOZ_ASSERT(aAudio);
+  MOZ_ASSERT(aSegment.IsEmpty());
+  if (!aAudio->Frames()) {
+    LOG("Ignore audio with zero frame");
+    return false;
+  }
+
+  aAudio->EnsureAudioBuffer();
+  RefPtr<SharedBuffer> buffer = aAudio->mAudioBuffer;
+  AudioDataValue* bufferData = static_cast<AudioDataValue*>(buffer->Data());
+  AutoTArray<const AudioDataValue*, 2> channels;
+  for (uint32_t i = 0; i < aAudio->mChannels; ++i) {
+    channels.AppendElement(bufferData + i * aAudio->Frames());
+  }
+  aSegment.AppendFrames(buffer.forget(), channels, aAudio->Frames(),
+                        aPrincipalHandle);
+  const TrackRate newInputRate = static_cast<TrackRate>(aAudio->mRate);
+  if (newInputRate != mInputSampleRate) {
+    LOG("Input sample rate changed %u -> %u", mInputSampleRate, newInputRate);
+    mInputSampleRate = newInputRate;
+    mResampler.own(nullptr);
+    mResamplerChannelCount = 0;
+  }
+  if (mInputSampleRate != GraphImpl()->GraphRate()) {
+    aSegment.ResampleChunks(mResampler, &mResamplerChannelCount,
+                            mInputSampleRate, GraphImpl()->GraphRate());
+  }
+  return aSegment.GetDuration() > 0;
+}
+
+void AudioDecoderInputTrack::AppendData(
+    AudioData* aAudio, const PrincipalHandle& aPrincipalHandle) {
+  AssertOnDecoderThread();
+  MOZ_ASSERT(aAudio);
+  nsTArray<RefPtr<AudioData>> audio;
+  audio.AppendElement(aAudio);
+  AppendData(audio, aPrincipalHandle);
+}
+
+void AudioDecoderInputTrack::AppendData(
+    nsTArray<RefPtr<AudioData>>& aAudioArray,
+    const PrincipalHandle& aPrincipalHandle) {
+  AssertOnDecoderThread();
+  MOZ_ASSERT(!mShutdownSPSCQueue);
+
+  // Batching all new data together in order to push them as a single unit that
+  // gives the SPSC queue more spaces.
+  for (const auto& audio : aAudioArray) {
+    BatchData(audio, aPrincipalHandle);
+  }
+
+  // If SPSC queue doesn't have much available capacity now, we would push
+  // batched later.
+  if (ShouldBatchData()) {
+    return;
+  }
+  PushBatchedDataIfNeeded();
+}
+
+bool AudioDecoderInputTrack::ShouldBatchData() const {
+  AssertOnDecoderThread();
+  // If the SPSC queue has less available capacity than the threshold, then all
+  // input audio data should be batched together, in order not to increase the
+  // pressure of SPSC queue.
+  static const int kThresholdNumerator = 3;
+  static const int kThresholdDenominator = 10;
+  return mSPSCQueue.AvailableWrite() <
+         mSPSCQueue.Capacity() * kThresholdNumerator / kThresholdDenominator;
+}
+
+bool AudioDecoderInputTrack::HasBatchedData() const {
+  AssertOnDecoderThread();
+  return !mBatchedData.mSegment.IsEmpty();
+}
+
+void AudioDecoderInputTrack::BatchData(
+    AudioData* aAudio, const PrincipalHandle& aPrincipalHandle) {
+  AssertOnDecoderThread();
+  AudioSegment segment;
+  if (!ConvertAudioDataToSegment(aAudio, segment, aPrincipalHandle)) {
+    return;
+  }
+  mBatchedData.mSegment.AppendFrom(&segment);
+  if (!mBatchedData.mStartTime.IsValid()) {
+    mBatchedData.mStartTime = aAudio->mTime;
+  }
+  mBatchedData.mEndTime = aAudio->GetEndTime();
+  LOG("batched data [%" PRId64 ":%" PRId64 "] sz=%" PRId64,
+      aAudio->mTime.ToMicroseconds(), aAudio->GetEndTime().ToMicroseconds(),
+      mBatchedData.mSegment.GetDuration());
+  DispatchPushBatchedDataIfNeeded();
+}
+
+void AudioDecoderInputTrack::DispatchPushBatchedDataIfNeeded() {
+  AssertOnDecoderThread();
+  MOZ_ASSERT(!mShutdownSPSCQueue);
+  // The graph thread runs iteration around per 2~10ms. Doing this to ensure
+  // that we can keep consuming data. If the producer stops pushing new data
+  // due to MDSM stops decoding, which is because MDSM thinks the data stored
+  // in the audio queue are enough. The way to remove those data from the
+  // audio queue is driven by us, so we have to keep consuming data.
+  // Otherwise, we would get stuck because those batched data would never be
+  // consumed.
+  static const uint8_t kTimeoutMS = 10;
+  TimeStamp target =
+      TimeStamp::Now() + TimeDuration::FromMilliseconds(kTimeoutMS);
+  mDelayedScheduler.Ensure(
+      target,
+      [self = RefPtr<AudioDecoderInputTrack>(this), this]() {
+        LOG("In the task of DispatchPushBatchedDataIfNeeded");
+        mDelayedScheduler.CompleteRequest();
+        MOZ_ASSERT(!mShutdownSPSCQueue);
+        MOZ_ASSERT(HasBatchedData());
+        // The capacity in SPSC is still not enough, so we can't push data now.
+        // Retrigger another task to push batched data.
+        if (ShouldBatchData()) {
+          DispatchPushBatchedDataIfNeeded();
+          return;
+        }
+        PushBatchedDataIfNeeded();
+      },
+      []() { MOZ_DIAGNOSTIC_ASSERT(false); });
+}
+
+void AudioDecoderInputTrack::PushBatchedDataIfNeeded() {
+  AssertOnDecoderThread();
+  if (!HasBatchedData()) {
+    return;
+  }
+  LOG("Append batched data [%" PRId64 ":%" PRId64 "], available SPSC sz=%u",
+      mBatchedData.mStartTime.ToMicroseconds(),
+      mBatchedData.mEndTime.ToMicroseconds(), mSPSCQueue.AvailableWrite());
+  SPSCData data({SPSCData::DecodedData(std::move(mBatchedData))});
+  PushDataToSPSCQueue(data);
+  MOZ_ASSERT(mBatchedData.mSegment.IsEmpty());
+  // No batched data remains, we can cancel the pending tasks.
+  mDelayedScheduler.Reset();
+}
+
+void AudioDecoderInputTrack::NotifyEndOfStream() {
+  AssertOnDecoderThread();
+  // Force to push all data before EOS. Otherwise, the track would be ended too
+  // early without sending all data.
+  PushBatchedDataIfNeeded();
+  SPSCData data({SPSCData::EOS()});
+  LOG("Set EOS, available SPSC sz=%u", mSPSCQueue.AvailableWrite());
+  PushDataToSPSCQueue(data);
+}
+
+void AudioDecoderInputTrack::ClearFutureData() {
+  AssertOnDecoderThread();
+  // Clear the data hasn't been pushed to SPSC queue yet.
+  mBatchedData.Clear();
+  mDelayedScheduler.Reset();
+  SPSCData data({SPSCData::ClearFutureData()});
+  LOG("Set clear future data, available SPSC sz=%u",
+      mSPSCQueue.AvailableWrite());
+  PushDataToSPSCQueue(data);
+}
+
+void AudioDecoderInputTrack::PushDataToSPSCQueue(SPSCData& data) {
+  AssertOnDecoderThread();
+  const bool rv = mSPSCQueue.Enqueue(data);
+  MOZ_DIAGNOSTIC_ASSERT(rv, "Failed to push data, SPSC queue is full!");
+  Unused << rv;
+}
+
+void AudioDecoderInputTrack::SetVolume(float aVolume) {
+  AssertOnDecoderThread();
+  LOG("Set volume=%f", aVolume);
+  GetMainThreadSerialEventTarget()->Dispatch(
+      NS_NewRunnableFunction("AudioDecoderInputTrack::SetVolume",
+                             [self = RefPtr<AudioDecoderInputTrack>(this),
+                              aVolume] { self->SetVolumeImpl(aVolume); }));
+}
+
+void AudioDecoderInputTrack::SetVolumeImpl(float aVolume) {
+  MOZ_ASSERT(NS_IsMainThread());
+  class Message : public ControlMessage {
+   public:
+    Message(AudioDecoderInputTrack* aTrack, float aVolume)
+        : ControlMessage(aTrack), mTrack(aTrack), mVolume(aVolume) {}
+    void Run() override {
+      TRACE_COMMENT("AudioDecoderInputTrack::SetVolume ControlMessage", "%f",
+                    mVolume);
+      LOG_M("Apply volume=%f", mTrack.get(), mVolume);
+      mTrack->mVolume = mVolume;
+    }
+
+   protected:
+    const RefPtr<AudioDecoderInputTrack> mTrack;
+    const float mVolume;
+  };
+  GraphImpl()->AppendMessage(MakeUnique<Message>(this, aVolume));
+}
+
+void AudioDecoderInputTrack::SetPlaybackRate(float aPlaybackRate) {
+  AssertOnDecoderThread();
+  LOG("Set playback rate=%f", aPlaybackRate);
+  GetMainThreadSerialEventTarget()->Dispatch(NS_NewRunnableFunction(
+      "AudioDecoderInputTrack::SetPlaybackRate",
+      [self = RefPtr<AudioDecoderInputTrack>(this), aPlaybackRate] {
+        self->SetPlaybackRateImpl(aPlaybackRate);
+      }));
+}
+
+void AudioDecoderInputTrack::SetPlaybackRateImpl(float aPlaybackRate) {
+  MOZ_ASSERT(NS_IsMainThread());
+  class Message : public ControlMessage {
+   public:
+    Message(AudioDecoderInputTrack* aTrack, float aPlaybackRate)
+        : ControlMessage(aTrack),
+          mTrack(aTrack),
+          mPlaybackRate(aPlaybackRate) {}
+    void Run() override {
+      TRACE_COMMENT("AudioDecoderInputTrack::SetPlaybackRate ControlMessage",
+                    "%f", mPlaybackRate);
+      LOG_M("Apply playback rate=%f", mTrack.get(), mPlaybackRate);
+      mTrack->mPlaybackRate = mPlaybackRate;
+      mTrack->SetTempoAndRateForTimeStretcher();
+    }
+
+   protected:
+    const RefPtr<AudioDecoderInputTrack> mTrack;
+    const float mPlaybackRate;
+  };
+  GraphImpl()->AppendMessage(MakeUnique<Message>(this, aPlaybackRate));
+}
+
+void AudioDecoderInputTrack::SetPreservesPitch(bool aPreservesPitch) {
+  AssertOnDecoderThread();
+  LOG("Set preserves pitch=%d", aPreservesPitch);
+  GetMainThreadSerialEventTarget()->Dispatch(NS_NewRunnableFunction(
+      "AudioDecoderInputTrack::SetPreservesPitch",
+      [self = RefPtr<AudioDecoderInputTrack>(this), aPreservesPitch] {
+        self->SetPreservesPitchImpl(aPreservesPitch);
+      }));
+}
+
+void AudioDecoderInputTrack::SetPreservesPitchImpl(bool aPreservesPitch) {
+  MOZ_ASSERT(NS_IsMainThread());
+  class Message : public ControlMessage {
+   public:
+    Message(AudioDecoderInputTrack* aTrack, bool aPreservesPitch)
+        : ControlMessage(aTrack),
+          mTrack(aTrack),
+          mPreservesPitch(aPreservesPitch) {}
+    void Run() override {
+      TRACE_COMMENT("AudioDecoderInputTrack::SetPreservesPitch", "%s",
+                    mPreservesPitch ? "true" : "false")
+      LOG_M("Apply preserves pitch=%d", mTrack.get(), mPreservesPitch);
+      mTrack->mPreservesPitch = mPreservesPitch;
+      mTrack->SetTempoAndRateForTimeStretcher();
+    }
+
+   protected:
+    const RefPtr<AudioDecoderInputTrack> mTrack;
+    const bool mPreservesPitch;
+  };
+  GraphImpl()->AppendMessage(MakeUnique<Message>(this, aPreservesPitch));
+}
+
+void AudioDecoderInputTrack::Close() {
+  AssertOnDecoderThread();
+  LOG("Close");
+  mShutdownSPSCQueue = true;
+  mBatchedData.Clear();
+  mDelayedScheduler.Reset();
+}
+
+void AudioDecoderInputTrack::DestroyImpl() {
+  LOG("DestroyImpl");
+  AssertOnGraphThreadOrNotRunning();
+  mBufferedData.Clear();
+  if (mTimeStretcher) {
+    soundtouch::destroySoundTouchObj(mTimeStretcher);
+  }
+  ProcessedMediaTrack::DestroyImpl();
+}
+
+AudioDecoderInputTrack::~AudioDecoderInputTrack() {
+  MOZ_ASSERT(mBatchedData.mSegment.IsEmpty());
+  MOZ_ASSERT(mShutdownSPSCQueue);
+  mResampler.own(nullptr);
+}
+
+void AudioDecoderInputTrack::ProcessInput(GraphTime aFrom, GraphTime aTo,
+                                          uint32_t aFlags) {
+  AssertOnGraphThread();
+  if (Ended()) {
+    return;
+  }
+
+  TrackTime consumedDuration = 0;
+  auto notify = MakeScopeExit([this, &consumedDuration] {
+    NotifyInTheEndOfProcessInput(consumedDuration);
+  });
+
+  if (mSentAllData && (aFlags & ALLOW_END)) {
+    LOG("End track");
+    mEnded = true;
+    return;
+  }
+
+  const TrackTime expectedDuration = aTo - aFrom;
+  LOG("ProcessInput [%" PRId64 " to %" PRId64 "], duration=%" PRId64, aFrom,
+      aTo, expectedDuration);
+
+  // Drain all data from SPSC queue first, because we want that the SPSC queue
+  // always has capacity of accepting data from the producer. In addition, we
+  // also need to check if there is any control related data that should be
+  // applied to output segment, eg. `ClearFutureData`.
+  SPSCData data;
+  while (mSPSCQueue.Dequeue(&data, 1) > 0) {
+    HandleSPSCData(data);
+  }
+
+  consumedDuration += AppendBufferedDataToOutput(expectedDuration);
+  if (HasSentAllData()) {
+    LOG("Sent all data, should end track in next iteration");
+    mSentAllData = true;
+  }
+}
+
+void AudioDecoderInputTrack::HandleSPSCData(SPSCData& aData) {
+  AssertOnGraphThread();
+  if (aData.IsDecodedData()) {
+    MOZ_ASSERT(!mReceivedEOS);
+    AudioSegment& segment = aData.AsDecodedData()->mSegment;
+    LOG("popped out data [%" PRId64 ":%" PRId64 "] sz=%" PRId64,
+        aData.AsDecodedData()->mStartTime.ToMicroseconds(),
+        aData.AsDecodedData()->mEndTime.ToMicroseconds(),
+        segment.GetDuration());
+    mBufferedData.AppendFrom(&segment);
+    return;
+  }
+  if (aData.IsEOS()) {
+    MOZ_ASSERT(!Ended());
+    LOG("Received EOS");
+    mReceivedEOS = true;
+    return;
+  }
+  if (aData.IsClearFutureData()) {
+    LOG("Clear future data");
+    mBufferedData.Clear();
+    if (!Ended()) {
+      LOG("Clear EOS");
+      mReceivedEOS = false;
+    }
+    return;
+  }
+  MOZ_ASSERT_UNREACHABLE("unsupported SPSC data");
+}
+
+TrackTime AudioDecoderInputTrack::AppendBufferedDataToOutput(
+    TrackTime aExpectedDuration) {
+  AssertOnGraphThread();
+
+  // Remove the necessary part from `mBufferedData` to create a new
+  // segment in order to apply some operation without affecting all data.
+  AudioSegment outputSegment;
+  TrackTime consumedDuration = 0;
+  if (mPlaybackRate != 1.0) {
+    consumedDuration =
+        AppendTimeStretchedDataToSegment(aExpectedDuration, outputSegment);
+  } else {
+    consumedDuration =
+        AppendUnstretchedDataToSegment(aExpectedDuration, outputSegment);
+  }
+
+  // Apply any necessary change on the segement which would be outputed to the
+  // graph.
+  const TrackTime appendedDuration = outputSegment.GetDuration();
+  outputSegment.ApplyVolume(mVolume);
+  ApplyTrackDisabling(&outputSegment);
+  mSegment->AppendFrom(&outputSegment);
+
+  LOG("Appended %" PRId64 ", consumed %" PRId64
+      ", remaining raw buffered %" PRId64 ", remaining time-stretched %u",
+      appendedDuration, consumedDuration, mBufferedData.GetDuration(),
+      mTimeStretcher ? mTimeStretcher->numSamples() : 0);
+  if (auto gap = aExpectedDuration - appendedDuration; gap > 0) {
+    LOG("Audio underrun, fill silence %" PRId64, gap);
+    MOZ_ASSERT(mBufferedData.IsEmpty());
+    mSegment->AppendNullData(gap);
+  }
+  return consumedDuration;
+}
+
+TrackTime AudioDecoderInputTrack::AppendTimeStretchedDataToSegment(
+    TrackTime aExpectedDuration, AudioSegment& aOutput) {
+  AssertOnGraphThread();
+  EnsureTimeStretcher();
+
+  MOZ_ASSERT(mPlaybackRate != 1.0f);
+  MOZ_ASSERT(aExpectedDuration >= 0);
+  MOZ_ASSERT(mTimeStretcher);
+  MOZ_ASSERT(aOutput.IsEmpty());
+
+  // If we don't have enough data that have been time-stretched, fill raw data
+  // into the time stretcher until the amount of samples that time stretcher
+  // finishes processed reaches or exceeds the expected duration.
+  TrackTime consumedDuration = 0;
+  if (mTimeStretcher->numSamples() < aExpectedDuration) {
+    consumedDuration = FillDataToTimeStretcher(aExpectedDuration);
+  }
+  MOZ_ASSERT(consumedDuration >= 0);
+  Unused << GetDataFromTimeStretcher(aExpectedDuration, aOutput);
+  return consumedDuration;
+}
+
+TrackTime AudioDecoderInputTrack::FillDataToTimeStretcher(
+    TrackTime aExpectedDuration) {
+  AssertOnGraphThread();
+  MOZ_ASSERT(mPlaybackRate != 1.0f);
+  MOZ_ASSERT(aExpectedDuration >= 0);
+  MOZ_ASSERT(mTimeStretcher);
+
+  TrackTime consumedDuration = 0;
+  const uint32_t channels = GetChannelCountForTimeStretcher();
+  mBufferedData.IterateOnChunks([&](AudioChunk* aChunk) {
+    MOZ_ASSERT(aChunk);
+    if (aChunk->IsNull() && aChunk->GetDuration() == 0) {
+      // Skip this chunk and wait for next one.
+      return false;
+    }
+    const uint32_t bufferLength = channels * aChunk->GetDuration();
+    if (bufferLength > mInterleavedBuffer.Capacity()) {
+      mInterleavedBuffer.SetCapacity(bufferLength);
+    }
+    mInterleavedBuffer.SetLengthAndRetainStorage(bufferLength);
+    if (aChunk->IsNull()) {
+      MOZ_ASSERT(aChunk->GetDuration(), "chunk with only silence");
+      memset(mInterleavedBuffer.Elements(), 0, mInterleavedBuffer.Length());
+    } else {
+      // Do the up-mix/down-mix first if necessary that forces to change the
+      // data's channel count to the time stretcher's channel count. Then
+      // perform a transformation from planar to interleaved.
+      switch (aChunk->mBufferFormat) {
+        case AUDIO_FORMAT_S16:
+          WriteChunk<int16_t>(*aChunk, channels, 1.0f,
+                              mInterleavedBuffer.Elements());
+          break;
+        case AUDIO_FORMAT_FLOAT32:
+          WriteChunk<float>(*aChunk, channels, 1.0f,
+                            mInterleavedBuffer.Elements());
+          break;
+        default:
+          MOZ_ASSERT_UNREACHABLE("Not expected format");
+      }
+    }
+    mTimeStretcher->putSamples(mInterleavedBuffer.Elements(),
+                               aChunk->GetDuration());
+    consumedDuration += aChunk->GetDuration();
+    return mTimeStretcher->numSamples() >= aExpectedDuration;
+  });
+  mBufferedData.RemoveLeading(consumedDuration);
+  return consumedDuration;
+}
+
+TrackTime AudioDecoderInputTrack::AppendUnstretchedDataToSegment(
+    TrackTime aExpectedDuration, AudioSegment& aOutput) {
+  AssertOnGraphThread();
+  MOZ_ASSERT(mPlaybackRate == 1.0f);
+  MOZ_ASSERT(aExpectedDuration >= 0);
+  MOZ_ASSERT(aOutput.IsEmpty());
+
+  const TrackTime drained =
+      DrainStretchedDataIfNeeded(aExpectedDuration, aOutput);
+  const TrackTime available =
+      std::min(aExpectedDuration - drained, mBufferedData.GetDuration());
+  aOutput.AppendSlice(mBufferedData, 0, available);
+  MOZ_ASSERT(aOutput.GetDuration() <= aExpectedDuration);
+  mBufferedData.RemoveLeading(available);
+  return available;
+}
+
+TrackTime AudioDecoderInputTrack::DrainStretchedDataIfNeeded(
+    TrackTime aExpectedDuration, AudioSegment& aOutput) {
+  AssertOnGraphThread();
+  MOZ_ASSERT(mPlaybackRate == 1.0f);
+  MOZ_ASSERT(aExpectedDuration >= 0);
+
+  if (!mTimeStretcher) {
+    return 0;
+  }
+  if (mTimeStretcher->numSamples() == 0) {
+    return 0;
+  }
+  return GetDataFromTimeStretcher(aExpectedDuration, aOutput);
+}
+
+TrackTime AudioDecoderInputTrack::GetDataFromTimeStretcher(
+    TrackTime aExpectedDuration, AudioSegment& aOutput) {
+  AssertOnGraphThread();
+  MOZ_ASSERT(mTimeStretcher);
+  MOZ_ASSERT(aExpectedDuration >= 0);
+
+  if (HasSentAllData() && mTimeStretcher->numUnprocessedSamples()) {
+    mTimeStretcher->flush();
+    LOG("Flush %u frames from the time stretcher",
+        mTimeStretcher->numSamples());
+  }
+
+  const TrackTime available =
+      std::min((TrackTime)mTimeStretcher->numSamples(), aExpectedDuration);
+  if (available == 0) {
+    // Either running out of stretched data, or the raw data we filled into
+    // the time stretcher were not enough for producing stretched data.
+    return 0;
+  }
+
+  // Retrieve interleaved data from the time stretcher.
+  const uint32_t channelCount = GetChannelCountForTimeStretcher();
+  const uint32_t bufferLength = channelCount * available;
+  if (bufferLength > mInterleavedBuffer.Capacity()) {
+    mInterleavedBuffer.SetCapacity(bufferLength);
+  }
+  mInterleavedBuffer.SetLengthAndRetainStorage(bufferLength);
+  mTimeStretcher->receiveSamples(mInterleavedBuffer.Elements(), available);
+
+  // Perform a transformation from interleaved to planar.
+  CheckedInt<size_t> bufferSize(sizeof(AudioDataValue));
+  bufferSize *= bufferLength;
+  RefPtr<SharedBuffer> buffer = SharedBuffer::Create(bufferSize);
+  AudioDataValue* bufferData = static_cast<AudioDataValue*>(buffer->Data());
+  AutoTArray<AudioDataValue*, 2> planarBuffer;
+  planarBuffer.SetLength(channelCount);
+  for (size_t idx = 0; idx < channelCount; idx++) {
+    planarBuffer[idx] = bufferData + idx * available;
+  }
+  DeinterleaveAndConvertBuffer(mInterleavedBuffer.Elements(), available,
+                               channelCount, planarBuffer.Elements());
+  AutoTArray<const AudioDataValue*, 2> outputChannels;
+  outputChannels.AppendElements(planarBuffer);
+  aOutput.AppendFrames(buffer.forget(), outputChannels,
+                       static_cast<int32_t>(available),
+                       mBufferedData.GetOldestPrinciple());
+  return available;
+}
+
+void AudioDecoderInputTrack::NotifyInTheEndOfProcessInput(
+    TrackTime aFillDuration) {
+  AssertOnGraphThread();
+  mWrittenFrames += aFillDuration;
+  LOG("Notify, fill=%" PRId64 ", total written=%" PRId64 ", ended=%d",
+      aFillDuration, mWrittenFrames, Ended());
+  if (aFillDuration > 0) {
+    mOnOutput.Notify(mWrittenFrames);
+  }
+  if (Ended()) {
+    mOnEnd.Notify();
+  }
+}
+
+bool AudioDecoderInputTrack::HasSentAllData() const {
+  AssertOnGraphThread();
+  return mReceivedEOS && mSPSCQueue.AvailableRead() == 0 &&
+         mBufferedData.IsEmpty();
+}
+
+uint32_t AudioDecoderInputTrack::NumberOfChannels() const {
+  AssertOnGraphThread();
+  const uint32_t maxChannelCount = GetData<AudioSegment>()->MaxChannelCount();
+  return maxChannelCount ? maxChannelCount : mInitialInputChannels;
+}
+
+void AudioDecoderInputTrack::EnsureTimeStretcher() {
+  AssertOnGraphThread();
+  if (!mTimeStretcher) {
+    mTimeStretcher = soundtouch::createSoundTouchObj();
+    mTimeStretcher->setSampleRate(GraphImpl()->GraphRate());
+    mTimeStretcher->setChannels(GetChannelCountForTimeStretcher());
+    mTimeStretcher->setPitch(1.0);
+
+    // SoundTouch v2.1.2 uses automatic time-stretch settings with the following
+    // values:
+    // Tempo 0.5: 90ms sequence, 20ms seekwindow, 8ms overlap
+    // Tempo 2.0: 40ms sequence, 15ms seekwindow, 8ms overlap
+    // We are going to use a smaller 10ms sequence size to improve speech
+    // clarity, giving more resolution at high tempo and less reverb at low
+    // tempo. Maintain 15ms seekwindow and 8ms overlap for smoothness.
+    mTimeStretcher->setSetting(
+        SETTING_SEQUENCE_MS,
+        StaticPrefs::media_audio_playbackrate_soundtouch_sequence_ms());
+    mTimeStretcher->setSetting(
+        SETTING_SEEKWINDOW_MS,
+        StaticPrefs::media_audio_playbackrate_soundtouch_seekwindow_ms());
+    mTimeStretcher->setSetting(
+        SETTING_OVERLAP_MS,
+        StaticPrefs::media_audio_playbackrate_soundtouch_overlap_ms());
+    SetTempoAndRateForTimeStretcher();
+    LOG("Create TimeStretcher (channel=%d, playbackRate=%f, preservePitch=%d)",
+        GetChannelCountForTimeStretcher(), mPlaybackRate, mPreservesPitch);
+  }
+}
+
+void AudioDecoderInputTrack::SetTempoAndRateForTimeStretcher() {
+  AssertOnGraphThread();
+  if (!mTimeStretcher) {
+    return;
+  }
+  if (mPreservesPitch) {
+    mTimeStretcher->setTempo(mPlaybackRate);
+    mTimeStretcher->setRate(1.0f);
+  } else {
+    mTimeStretcher->setTempo(1.0f);
+    mTimeStretcher->setRate(mPlaybackRate);
+  }
+}
+
+uint32_t AudioDecoderInputTrack::GetChannelCountForTimeStretcher() const {
+  // The time stretcher MUST be initialized with a fixed channel count, but the
+  // channel count in audio chunks might vary. Therefore, we always use the
+  // initial input channel count to initialize the time stretcher and perform a
+  // real-time down-mix/up-mix for audio chunks which have different channel
+  // count than the initial input channel count.
+  return mInitialInputChannels;
+}
+
+#undef LOG
+}  // namespace mozilla
diff --git a/dom/media/mediasink/AudioDecoderInputTrack.h b/dom/media/mediasink/AudioDecoderInputTrack.h
new file mode 100644
index 0000000000..8c82d7bed6
--- /dev/null
+++ b/dom/media/mediasink/AudioDecoderInputTrack.h
@@ -0,0 +1,242 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef AudioDecoderInputTrack_h
+#define AudioDecoderInputTrack_h
+
+#include "AudioSegment.h"
+#include "MediaEventSource.h"
+#include "MediaTimer.h"
+#include "MediaTrackGraph.h"
+#include "MediaTrackGraphImpl.h"
+#include "MediaSegment.h"
+#include "mozilla/SPSCQueue.h"
+#include "mozilla/StateMirroring.h"
+#include "nsISerialEventTarget.h"
+
+namespace soundtouch {
+class MOZ_EXPORT SoundTouch;
+}
+
+namespace mozilla {
+
+class AudioData;
+
+/**
+ * AudioDecoderInputTrack is used as a source for the audio decoder data, which
+ * supports adjusting playback rate and preserve pitch.
+ * The owner of this track would be responsible to push audio data via
+ * `AppendData()` into a SPSC queue, which is a thread-safe queue between the
+ * decoder thread (producer) and the graph thread (consumer). MediaTrackGraph
+ * requires data via `ProcessInput()`, then AudioDecoderInputTrack would convert
+ * (based on sample rate and playback rate) and append the amount of needed
+ * audio frames onto the output segment that would be used by MediaTrackGraph.
+ */
+class AudioDecoderInputTrack final : public ProcessedMediaTrack {
+ public:
+  static AudioDecoderInputTrack* Create(MediaTrackGraph* aGraph,
+                                        nsISerialEventTarget* aDecoderThread,
+                                        const AudioInfo& aInfo,
+                                        float aPlaybackRate, float aVolume,
+                                        bool aPreservesPitch);
+
+  // SPSCData suppports filling different supported type variants, and is used
+  // to achieve a thread-safe information exchange between the decoder thread
+  // and the graph thread.
+  struct SPSCData final {
+    struct Empty {};
+    struct ClearFutureData {};
+    struct DecodedData {
+      DecodedData()
+          : mStartTime(media::TimeUnit::Invalid()),
+            mEndTime(media::TimeUnit::Invalid()) {}
+      DecodedData(DecodedData&& aDecodedData)
+          : mSegment(std::move(aDecodedData.mSegment)) {
+        mStartTime = aDecodedData.mStartTime;
+        mEndTime = aDecodedData.mEndTime;
+        aDecodedData.Clear();
+      }
+      DecodedData(media::TimeUnit aStartTime, media::TimeUnit aEndTime)
+          : mStartTime(aStartTime), mEndTime(aEndTime) {}
+      DecodedData(const DecodedData&) = delete;
+      DecodedData& operator=(const DecodedData&) = delete;
+      void Clear() {
+        mSegment.Clear();
+        mStartTime = media::TimeUnit::Invalid();
+        mEndTime = media::TimeUnit::Invalid();
+      }
+      AudioSegment mSegment;
+      media::TimeUnit mStartTime;
+      media::TimeUnit mEndTime;
+    };
+    struct EOS {};
+
+    SPSCData() : mData(Empty()){};
+    explicit SPSCData(ClearFutureData&& aArg) : mData(std::move(aArg)){};
+    explicit SPSCData(DecodedData&& aArg) : mData(std::move(aArg)){};
+    explicit SPSCData(EOS&& aArg) : mData(std::move(aArg)){};
+
+    bool HasData() const { return !mData.is<Empty>(); }
+    bool IsClearFutureData() const { return mData.is<ClearFutureData>(); }
+    bool IsDecodedData() const { return mData.is<DecodedData>(); }
+    bool IsEOS() const { return mData.is<EOS>(); }
+
+    DecodedData* AsDecodedData() {
+      return IsDecodedData() ? &mData.as<DecodedData>() : nullptr;
+    }
+
+    Variant<Empty, ClearFutureData, DecodedData, EOS> mData;
+  };
+
+  // Decoder thread API
+  void AppendData(AudioData* aAudio, const PrincipalHandle& aPrincipalHandle);
+  void AppendData(nsTArray<RefPtr<AudioData>>& aAudioArray,
+                  const PrincipalHandle& aPrincipalHandle);
+  void NotifyEndOfStream();
+  void ClearFutureData();
+  void SetVolume(float aVolume);
+  void SetPlaybackRate(float aPlaybackRate);
+  void SetPreservesPitch(bool aPreservesPitch);
+  // After calling this, the track are not expected to receive any new data.
+  void Close();
+  bool HasBatchedData() const;
+
+  MediaEventSource<int64_t>& OnOutput() { return mOnOutput; }
+  MediaEventSource<void>& OnEnd() { return mOnEnd; }
+
+  // Graph Thread API
+  void DestroyImpl() override;
+  void ProcessInput(GraphTime aFrom, GraphTime aTo, uint32_t aFlags) override;
+  uint32_t NumberOfChannels() const override;
+
+  // The functions below are only used for testing.
+  TrackTime WrittenFrames() const {
+    AssertOnGraphThread();
+    return mWrittenFrames;
+  }
+  float Volume() const {
+    AssertOnGraphThread();
+    return mVolume;
+  }
+  float PlaybackRate() const {
+    AssertOnGraphThread();
+    return mPlaybackRate;
+  }
+
+ protected:
+  ~AudioDecoderInputTrack();
+
+ private:
+  AudioDecoderInputTrack(nsISerialEventTarget* aDecoderThread,
+                         TrackRate aGraphRate, const AudioInfo& aInfo,
+                         float aPlaybackRate, float aVolume,
+                         bool aPreservesPitch);
+
+  // Return false if the converted segment contains zero duration.
+  bool ConvertAudioDataToSegment(AudioData* aAudio, AudioSegment& aSegment,
+                                 const PrincipalHandle& aPrincipalHandle);
+
+  void HandleSPSCData(SPSCData& aData);
+
+  // These methods would return the total frames that we consumed from
+  // `mBufferedData`.
+  TrackTime AppendBufferedDataToOutput(TrackTime aExpectedDuration);
+  TrackTime FillDataToTimeStretcher(TrackTime aExpectedDuration);
+  TrackTime AppendTimeStretchedDataToSegment(TrackTime aExpectedDuration,
+                                             AudioSegment& aOutput);
+  TrackTime AppendUnstretchedDataToSegment(TrackTime aExpectedDuration,
+                                           AudioSegment& aOutput);
+
+  // Return the total frames that we retrieve from the time stretcher.
+  TrackTime DrainStretchedDataIfNeeded(TrackTime aExpectedDuration,
+                                       AudioSegment& aOutput);
+  TrackTime GetDataFromTimeStretcher(TrackTime aExpectedDuration,
+                                     AudioSegment& aOutput);
+  void NotifyInTheEndOfProcessInput(TrackTime aFillDuration);
+
+  bool HasSentAllData() const;
+
+  bool ShouldBatchData() const;
+  void BatchData(AudioData* aAudio, const PrincipalHandle& aPrincipalHandle);
+  void DispatchPushBatchedDataIfNeeded();
+  void PushBatchedDataIfNeeded();
+  void PushDataToSPSCQueue(SPSCData& data);
+
+  void SetVolumeImpl(float aVolume);
+  void SetPlaybackRateImpl(float aPlaybackRate);
+  void SetPreservesPitchImpl(bool aPreservesPitch);
+
+  void EnsureTimeStretcher();
+  void SetTempoAndRateForTimeStretcher();
+  uint32_t GetChannelCountForTimeStretcher() const;
+
+  inline void AssertOnDecoderThread() const {
+    MOZ_ASSERT(mDecoderThread->IsOnCurrentThread());
+  }
+  inline void AssertOnGraphThread() const {
+    MOZ_ASSERT(GraphImpl()->OnGraphThread());
+  }
+  inline void AssertOnGraphThreadOrNotRunning() const {
+    MOZ_ASSERT(GraphImpl()->OnGraphThreadOrNotRunning());
+  }
+
+  const RefPtr<nsISerialEventTarget> mDecoderThread;
+
+  // Notify the amount of audio frames which have been sent to the track.
+  MediaEventProducer<int64_t> mOnOutput;
+  // Notify when the track is ended.
+  MediaEventProducer<void> mOnEnd;
+
+  // These variables are ONLY used in the decoder thread.
+  nsAutoRef<SpeexResamplerState> mResampler;
+  uint32_t mResamplerChannelCount;
+  const uint32_t mInitialInputChannels;
+  TrackRate mInputSampleRate;
+  DelayedScheduler mDelayedScheduler;
+  bool mShutdownSPSCQueue = false;
+
+  // These attributes are ONLY used in the graph thread.
+  bool mReceivedEOS = false;
+  TrackTime mWrittenFrames = 0;
+  float mPlaybackRate;
+  float mVolume;
+  bool mPreservesPitch;
+
+  // A thread-safe queue shared by the decoder thread and the graph thread.
+  // The decoder thread is the producer side, and the graph thread is the
+  // consumer side. This queue should NEVER get full. In order to achieve that,
+  // we would batch input samples when SPSC queue doesn't have many available
+  // capacity.
+  // In addition, as the media track isn't guaranteed to be destroyed on the
+  // graph thread (it could be destroyed on the main thread as well) so we might
+  // not clear all data in SPSC queue when the track's `DestroyImpl()` gets
+  // called. We leave to destroy the queue later when the track gets destroyed.
+  SPSCQueue<SPSCData> mSPSCQueue{40};
+
+  // When the graph requires the less amount of audio frames than the amount of
+  // frames an audio data has, then the remaining part of frames would be stored
+  // and used in next iteration.
+  // This is ONLY used in the graph thread.
+  AudioSegment mBufferedData;
+
+  // In order to prevent SPSC queue from being full, we want to batch multiple
+  // data into one to control the density of SPSC queue, the length of batched
+  // data would be dynamically adjusted by queue's available capacity.
+  // This is ONLY used in the decoder thread.
+  SPSCData::DecodedData mBatchedData;
+
+  // True if we've sent all data to the graph, then the track will be marked as
+  // ended in the next iteration.
+  bool mSentAllData = false;
+
+  // This is used to adjust the playback rate and pitch.
+  soundtouch::SoundTouch* mTimeStretcher = nullptr;
+
+  // Buffers that would be used for the time stretching.
+  AutoTArray<AudioDataValue, 2> mInterleavedBuffer;
+};
+
+}  // namespace mozilla
+
+#endif  // AudioDecoderInputTrack_h
diff --git a/dom/media/mediasink/AudioSink.cpp b/dom/media/mediasink/AudioSink.cpp
new file mode 100644
index 0000000000..536a2a4f8a
--- /dev/null
+++ b/dom/media/mediasink/AudioSink.cpp
@@ -0,0 +1,664 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "AudioSink.h"
+#include "AudioConverter.h"
+#include "AudioDeviceInfo.h"
+#include "MediaQueue.h"
+#include "VideoUtils.h"
+#include "mozilla/CheckedInt.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/IntegerPrintfMacros.h"
+#include "mozilla/ProfilerMarkerTypes.h"
+#include "mozilla/StaticPrefs_media.h"
+#include "mozilla/StaticPrefs_dom.h"
+#include "nsPrintfCString.h"
+#include "Tracing.h"
+
+namespace mozilla {
+
+mozilla::LazyLogModule gAudioSinkLog("AudioSink");
+#define SINK_LOG(msg, ...)                \
+  MOZ_LOG(gAudioSinkLog, LogLevel::Debug, \
+          ("AudioSink=%p " msg, this, ##__VA_ARGS__))
+#define SINK_LOG_V(msg, ...)                \
+  MOZ_LOG(gAudioSinkLog, LogLevel::Verbose, \
+          ("AudioSink=%p " msg, this, ##__VA_ARGS__))
+
+// The amount of audio frames that is used to fuzz rounding errors.
+static const int64_t AUDIO_FUZZ_FRAMES = 1;
+
+using media::TimeUnit;
+
+AudioSink::AudioSink(AbstractThread* aThread,
+                     MediaQueue<AudioData>& aAudioQueue, const AudioInfo& aInfo,
+                     bool aShouldResistFingerprinting)
+    : mPlaying(true),
+      mWritten(0),
+      mErrored(false),
+      mOwnerThread(aThread),
+      mFramesParsed(0),
+      mOutputRate(
+          DecideAudioPlaybackSampleRate(aInfo, aShouldResistFingerprinting)),
+      mOutputChannels(DecideAudioPlaybackChannels(aInfo)),
+      mAudibilityMonitor(
+          mOutputRate,
+          StaticPrefs::dom_media_silence_duration_for_audibility()),
+      mIsAudioDataAudible(false),
+      mProcessedQueueFinished(false),
+      mAudioQueue(aAudioQueue),
+      mProcessedQueueThresholdMS(
+          StaticPrefs::media_audio_audiosink_threshold_ms()) {
+  // Not much to initialize here if there's no audio.
+  if (!aInfo.IsValid()) {
+    mProcessedSPSCQueue = MakeUnique<SPSCQueue<AudioDataValue>>(0);
+    return;
+  }
+  // Twice the limit that trigger a refill.
+  double capacitySeconds = mProcessedQueueThresholdMS / 1000.f * 2;
+  // Clamp to correct boundaries, and align on the channel count
+  int elementCount = static_cast<int>(
+      std::clamp(capacitySeconds * mOutputChannels * mOutputRate, 0.,
+                 std::numeric_limits<int>::max() - 1.));
+  elementCount -= elementCount % mOutputChannels;
+  mProcessedSPSCQueue = MakeUnique<SPSCQueue<AudioDataValue>>(elementCount);
+  SINK_LOG("Ringbuffer has space for %u elements (%lf seconds)",
+           mProcessedSPSCQueue->Capacity(),
+           static_cast<float>(elementCount) / mOutputChannels / mOutputRate);
+  // Determine if the data is likely to be audible when the stream will be
+  // ready, if possible.
+  RefPtr<AudioData> frontPacket = mAudioQueue.PeekFront();
+  if (frontPacket) {
+    mAudibilityMonitor.ProcessInterleaved(frontPacket->Data(),
+                                          frontPacket->mChannels);
+    mIsAudioDataAudible = mAudibilityMonitor.RecentlyAudible();
+    SINK_LOG("New AudioSink -- audio is likely to be %s",
+             mIsAudioDataAudible ? "audible" : "inaudible");
+  } else {
+    // If no packets are available, consider the audio audible.
+    mIsAudioDataAudible = true;
+    SINK_LOG(
+        "New AudioSink -- no audio packet avaialble, considering the stream "
+        "audible");
+  }
+}
+
+AudioSink::~AudioSink() {
+  // Generally instances of AudioSink should be properly Shutdown manually.
+  // The only way deleting an AudioSink without shutdown an happen is if the
+  // dispatch back to the MDSM thread after initializing it asynchronously
+  // fails. When that's the case, the stream has been initialized but not
+  // started. Manually shutdown the AudioStream in this case.
+  if (mAudioStream) {
+    mAudioStream->Shutdown();
+  }
+}
+
+nsresult AudioSink::InitializeAudioStream(
+    const PlaybackParams& aParams, const RefPtr<AudioDeviceInfo>& aAudioDevice,
+    AudioSink::InitializationType aInitializationType) {
+  if (aInitializationType == AudioSink::InitializationType::UNMUTING) {
+    // Consider the stream to be audible immediately, before initialization
+    // finishes when unmuting, in case initialization takes some time and it
+    // looked audible when the AudioSink was created.
+    mAudibleEvent.Notify(mIsAudioDataAudible);
+    SINK_LOG("InitializeAudioStream (Unmuting) notifying that audio is %s",
+             mIsAudioDataAudible ? "audible" : "inaudible");
+  } else {
+    // If not unmuting, the audibility event will be dispatched as usual,
+    // inspecting the audio content as it's being played and signaling the
+    // audibility event when a different in state is detected.
+    SINK_LOG("InitializeAudioStream (initial)");
+    mIsAudioDataAudible = false;
+  }
+
+  // When AudioQueue is empty, there is no way to know the channel layout of
+  // the coming audio data, so we use the predefined channel map instead.
+  AudioConfig::ChannelLayout::ChannelMap channelMap =
+      AudioConfig::ChannelLayout(mOutputChannels).Map();
+  // The layout map used here is already processed by mConverter with
+  // mOutputChannels into SMPTE format, so there is no need to worry if
+  // StaticPrefs::accessibility_monoaudio_enable() or
+  // StaticPrefs::media_forcestereo_enabled() is applied.
+  MOZ_ASSERT(!mAudioStream);
+  mAudioStream =
+      new AudioStream(*this, mOutputRate, mOutputChannels, channelMap);
+  nsresult rv = mAudioStream->Init(aAudioDevice);
+  if (NS_FAILED(rv)) {
+    mAudioStream->Shutdown();
+    mAudioStream = nullptr;
+    return rv;
+  }
+
+  // Set playback params before calling Start() so they can take effect
+  // as soon as the 1st DataCallback of the AudioStream fires.
+  mAudioStream->SetVolume(aParams.mVolume);
+  mAudioStream->SetPlaybackRate(aParams.mPlaybackRate);
+  mAudioStream->SetPreservesPitch(aParams.mPreservesPitch);
+
+  return NS_OK;
+}
+
+nsresult AudioSink::Start(
+    const media::TimeUnit& aStartTime,
+    MozPromiseHolder<MediaSink::EndedPromise>& aEndedPromise) {
+  MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
+
+  mAudioQueueListener = mAudioQueue.PushEvent().Connect(
+      mOwnerThread, this, &AudioSink::OnAudioPushed);
+  mAudioQueueFinishListener = mAudioQueue.FinishEvent().Connect(
+      mOwnerThread, this, &AudioSink::NotifyAudioNeeded);
+  mProcessedQueueListener =
+      mAudioPopped.Connect(mOwnerThread, this, &AudioSink::OnAudioPopped);
+
+  mStartTime = aStartTime;
+
+  // To ensure at least one audio packet will be popped from AudioQueue and
+  // ready to be played.
+  NotifyAudioNeeded();
+
+  return mAudioStream->Start(aEndedPromise);
+}
+
+TimeUnit AudioSink::GetPosition() {
+  int64_t tmp;
+  if (mAudioStream && (tmp = mAudioStream->GetPosition()) >= 0) {
+    TimeUnit pos = TimeUnit::FromMicroseconds(tmp);
+    NS_ASSERTION(pos >= mLastGoodPosition,
+                 "AudioStream position shouldn't go backward");
+    TimeUnit tmp = mStartTime + pos;
+    if (!tmp.IsValid()) {
+      mErrored = true;
+      return mStartTime + mLastGoodPosition;
+    }
+    // Update the last good position when we got a good one.
+    if (pos >= mLastGoodPosition) {
+      mLastGoodPosition = pos;
+    }
+  }
+
+  return mStartTime + mLastGoodPosition;
+}
+
+bool AudioSink::HasUnplayedFrames() {
+  // Experimentation suggests that GetPositionInFrames() is zero-indexed,
+  // so we need to add 1 here before comparing it to mWritten.
+  return mProcessedSPSCQueue->AvailableRead() ||
+         (mAudioStream && mAudioStream->GetPositionInFrames() + 1 < mWritten);
+}
+
+TimeUnit AudioSink::UnplayedDuration() const {
+  return TimeUnit::FromMicroseconds(AudioQueuedInRingBufferMS());
+}
+
+void AudioSink::ReenqueueUnplayedAudioDataIfNeeded() {
+  // This is OK: the AudioStream has been shut down. Shutdown guarantees that
+  // the audio callback thread won't call back again.
+  mProcessedSPSCQueue->ResetThreadIds();
+
+  // construct an AudioData
+  int sampleInRingbuffer = mProcessedSPSCQueue->AvailableRead();
+
+  if (!sampleInRingbuffer) {
+    return;
+  }
+
+  uint32_t channelCount;
+  uint32_t rate;
+  if (mConverter) {
+    channelCount = mConverter->OutputConfig().Channels();
+    rate = mConverter->OutputConfig().Rate();
+  } else {
+    channelCount = mOutputChannels;
+    rate = mOutputRate;
+  }
+
+  uint32_t framesRemaining = sampleInRingbuffer / channelCount;
+
+  nsTArray<AlignedAudioBuffer> packetsToReenqueue;
+  RefPtr<AudioData> frontPacket = mAudioQueue.PeekFront();
+  uint32_t offset;
+  TimeUnit time;
+  uint32_t typicalPacketFrameCount;
+  // Extrapolate mOffset, mTime from the front of the queue
+  // We can't really find a good value for `mOffset`, so we take what we have
+  // at the front of the queue.
+  // For `mTime`, assume there hasn't been a discontinuity recently.
+  if (!frontPacket) {
+    // We do our best here, but it's not going to be perfect.
+    typicalPacketFrameCount = 1024;  // typical for e.g. AAC
+    offset = 0;
+    time = GetPosition();
+  } else {
+    typicalPacketFrameCount = frontPacket->Frames();
+    offset = frontPacket->mOffset;
+    time = frontPacket->mTime;
+  }
+
+  // Extract all audio data from the ring buffer, we can only read the data from
+  // the most recent, so we reenqueue the data, packetized, in a temporary
+  // array.
+  while (framesRemaining) {
+    uint32_t packetFrameCount =
+        std::min(framesRemaining, typicalPacketFrameCount);
+    framesRemaining -= packetFrameCount;
+
+    int packetSampleCount = packetFrameCount * channelCount;
+    AlignedAudioBuffer packetData(packetSampleCount);
+    DebugOnly<int> samplesRead =
+        mProcessedSPSCQueue->Dequeue(packetData.Data(), packetSampleCount);
+    MOZ_ASSERT(samplesRead == packetSampleCount);
+
+    packetsToReenqueue.AppendElement(packetData);
+  }
+  // Reenqueue in the audio queue in correct order in the audio queue, starting
+  // with the end of the temporary array.
+  while (!packetsToReenqueue.IsEmpty()) {
+    auto packetData = packetsToReenqueue.PopLastElement();
+    uint32_t packetFrameCount = packetData.Length() / channelCount;
+    auto duration = TimeUnit(packetFrameCount, rate);
+    if (!duration.IsValid()) {
+      NS_WARNING("Int overflow in AudioSink");
+      mErrored = true;
+      return;
+    }
+    time -= duration;
+    RefPtr<AudioData> packet =
+        new AudioData(offset, time, std::move(packetData), channelCount, rate);
+    MOZ_DIAGNOSTIC_ASSERT(duration == packet->mDuration, "must be equal");
+
+    SINK_LOG(
+        "Muting: Pushing back %u frames (%lfms) from the ring buffer back into "
+        "the audio queue at pts %lf",
+        packetFrameCount, 1000 * static_cast<float>(packetFrameCount) / rate,
+        time.ToSeconds());
+    // The audio data's timestamp would be adjusted already if we're in looping,
+    // so we don't want to adjust them again.
+    mAudioQueue.PushFront(packet,
+                          MediaQueue<AudioData>::TimestampAdjustment::Disable);
+  }
+}
+
+Maybe<MozPromiseHolder<MediaSink::EndedPromise>> AudioSink::Shutdown(
+    ShutdownCause aShutdownCause) {
+  MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
+
+  mAudioQueueListener.DisconnectIfExists();
+  mAudioQueueFinishListener.DisconnectIfExists();
+  mProcessedQueueListener.DisconnectIfExists();
+
+  Maybe<MozPromiseHolder<MediaSink::EndedPromise>> rv;
+
+  if (mAudioStream) {
+    rv = mAudioStream->Shutdown(aShutdownCause);
+    mAudioStream = nullptr;
+    if (aShutdownCause == ShutdownCause::Muting) {
+      ReenqueueUnplayedAudioDataIfNeeded();
+    }
+  }
+  mProcessedQueueFinished = true;
+
+  return rv;
+}
+
+void AudioSink::SetVolume(double aVolume) {
+  if (mAudioStream) {
+    mAudioStream->SetVolume(aVolume);
+  }
+}
+
+void AudioSink::SetStreamName(const nsAString& aStreamName) {
+  if (mAudioStream) {
+    mAudioStream->SetStreamName(aStreamName);
+  }
+}
+
+void AudioSink::SetPlaybackRate(double aPlaybackRate) {
+  MOZ_ASSERT(aPlaybackRate != 0,
+             "Don't set the playbackRate to 0 on AudioStream");
+  if (mAudioStream) {
+    mAudioStream->SetPlaybackRate(aPlaybackRate);
+  }
+}
+
+void AudioSink::SetPreservesPitch(bool aPreservesPitch) {
+  if (mAudioStream) {
+    mAudioStream->SetPreservesPitch(aPreservesPitch);
+  }
+}
+
+void AudioSink::SetPlaying(bool aPlaying) {
+  if (!mAudioStream || mAudioStream->IsPlaybackCompleted() ||
+      mPlaying == aPlaying) {
+    return;
+  }
+  // pause/resume AudioStream as necessary.
+  if (!aPlaying) {
+    mAudioStream->Pause();
+  } else if (aPlaying) {
+    mAudioStream->Resume();
+  }
+  mPlaying = aPlaying;
+}
+
+TimeUnit AudioSink::GetEndTime() const {
+  uint64_t written = mWritten;
+  TimeUnit played = media::TimeUnit(written, mOutputRate) + mStartTime;
+  if (!played.IsValid()) {
+    NS_WARNING("Int overflow calculating audio end time");
+    return TimeUnit::Zero();
+  }
+  // As we may be resampling, rounding errors may occur. Ensure we never get
+  // past the original end time.
+  return std::min(mLastEndTime, played);
+}
+
+uint32_t AudioSink::PopFrames(AudioDataValue* aBuffer, uint32_t aFrames,
+                              bool aAudioThreadChanged) {
+  // This is safe, because we have the guarantee, by the OS, that audio
+  // callbacks are never called concurrently. Audio thread changes can only
+  // happen when not using cubeb remoting, and often when changing audio device
+  // at the system level.
+  if (aAudioThreadChanged) {
+    mProcessedSPSCQueue->ResetThreadIds();
+  }
+
+  TRACE_COMMENT("AudioSink::PopFrames", "%u frames (ringbuffer: %u/%u)",
+                aFrames, SampleToFrame(mProcessedSPSCQueue->AvailableRead()),
+                SampleToFrame(mProcessedSPSCQueue->Capacity()));
+
+  const int samplesToPop = static_cast<int>(aFrames * mOutputChannels);
+  const int samplesRead = mProcessedSPSCQueue->Dequeue(aBuffer, samplesToPop);
+  auto sampleOut = samplesRead;
+  MOZ_ASSERT(samplesRead % mOutputChannels == 0);
+  mWritten += SampleToFrame(samplesRead);
+  if (samplesRead != samplesToPop) {
+    if (Ended()) {
+      SINK_LOG("Last PopFrames -- Source ended.");
+    } else if (mTreatUnderrunAsSilence) {
+      SINK_LOG("Treat underrun frames (%u) as silence frames",
+               SampleToFrame(samplesToPop - samplesRead));
+      sampleOut = samplesToPop;
+    } else {
+      NS_WARNING("Underrun when popping samples from audiosink ring buffer.");
+      TRACE_COMMENT("AudioSink::PopFrames", "Underrun %u frames missing",
+                    SampleToFrame(samplesToPop - samplesRead));
+    }
+    // silence the rest
+    PodZero(aBuffer + samplesRead, samplesToPop - samplesRead);
+  }
+
+  mAudioPopped.Notify();
+
+  SINK_LOG_V("Popping %u frames. Remaining in ringbuffer %u / %u\n", aFrames,
+             SampleToFrame(mProcessedSPSCQueue->AvailableRead()),
+             SampleToFrame(mProcessedSPSCQueue->Capacity()));
+  CheckIsAudible(Span(aBuffer, sampleOut), mOutputChannels);
+
+  return SampleToFrame(sampleOut);
+}
+
+bool AudioSink::Ended() const {
+  // Return true when error encountered so AudioStream can start draining.
+  // Both atomic so we don't need locking
+  return mProcessedQueueFinished || mErrored;
+}
+
+void AudioSink::CheckIsAudible(const Span<AudioDataValue>& aInterleaved,
+                               size_t aChannel) {
+  mAudibilityMonitor.ProcessInterleaved(aInterleaved, aChannel);
+  bool isAudible = mAudibilityMonitor.RecentlyAudible();
+
+  if (isAudible != mIsAudioDataAudible) {
+    mIsAudioDataAudible = isAudible;
+    SINK_LOG("Notifying that audio is now %s",
+             mIsAudioDataAudible ? "audible" : "inaudible");
+    mAudibleEvent.Notify(mIsAudioDataAudible);
+  }
+}
+
+void AudioSink::OnAudioPopped() {
+  SINK_LOG_V("AudioStream has used an audio packet.");
+  NotifyAudioNeeded();
+}
+
+void AudioSink::OnAudioPushed(const RefPtr<AudioData>& aSample) {
+  SINK_LOG_V("One new audio packet available.");
+  NotifyAudioNeeded();
+}
+
+uint32_t AudioSink::AudioQueuedInRingBufferMS() const {
+  return static_cast<uint32_t>(
+      1000 * SampleToFrame(mProcessedSPSCQueue->AvailableRead()) / mOutputRate);
+}
+
+uint32_t AudioSink::SampleToFrame(uint32_t aSamples) const {
+  return aSamples / mOutputChannels;
+}
+
+void AudioSink::NotifyAudioNeeded() {
+  MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn(),
+             "Not called from the owner's thread");
+
+  while (mAudioQueue.GetSize() &&
+         AudioQueuedInRingBufferMS() <
+             static_cast<uint32_t>(mProcessedQueueThresholdMS)) {
+    // Check if there's room in our ring buffer.
+    if (mAudioQueue.PeekFront()->Frames() >
+        SampleToFrame(mProcessedSPSCQueue->AvailableWrite())) {
+      SINK_LOG_V("Can't push %u frames. In ringbuffer %u / %u\n",
+                 mAudioQueue.PeekFront()->Frames(),
+                 SampleToFrame(mProcessedSPSCQueue->AvailableRead()),
+                 SampleToFrame(mProcessedSPSCQueue->Capacity()));
+      return;
+    }
+    SINK_LOG_V("Pushing %u frames. In ringbuffer %u / %u\n",
+               mAudioQueue.PeekFront()->Frames(),
+               SampleToFrame(mProcessedSPSCQueue->AvailableRead()),
+               SampleToFrame(mProcessedSPSCQueue->Capacity()));
+    RefPtr<AudioData> data = mAudioQueue.PopFront();
+
+    // Ignore the element with 0 frames and try next.
+    if (!data->Frames()) {
+      continue;
+    }
+
+    if (!mConverter ||
+        (data->mRate != mConverter->InputConfig().Rate() ||
+         data->mChannels != mConverter->InputConfig().Channels())) {
+      SINK_LOG_V("Audio format changed from %u@%uHz to %u@%uHz",
+                 mConverter ? mConverter->InputConfig().Channels() : 0,
+                 mConverter ? mConverter->InputConfig().Rate() : 0,
+                 data->mChannels, data->mRate);
+
+      DrainConverter(SampleToFrame(mProcessedSPSCQueue->AvailableWrite()));
+
+      // mFramesParsed indicates the current playtime in frames at the current
+      // input sampling rate. Recalculate it per the new sampling rate.
+      if (mFramesParsed) {
+        // We minimize overflow.
+        uint32_t oldRate = mConverter->InputConfig().Rate();
+        uint32_t newRate = data->mRate;
+        CheckedInt64 result = SaferMultDiv(mFramesParsed, newRate, oldRate);
+        if (!result.isValid()) {
+          NS_WARNING("Int overflow in AudioSink");
+          mErrored = true;
+          return;
+        }
+        mFramesParsed = result.value();
+      }
+
+      const AudioConfig::ChannelLayout inputLayout =
+          data->mChannelMap
+              ? AudioConfig::ChannelLayout::SMPTEDefault(data->mChannelMap)
+              : AudioConfig::ChannelLayout(data->mChannels);
+      const AudioConfig::ChannelLayout outputLayout =
+          mOutputChannels == data->mChannels
+              ? inputLayout
+              : AudioConfig::ChannelLayout(mOutputChannels);
+      AudioConfig inConfig =
+          AudioConfig(inputLayout, data->mChannels, data->mRate);
+      AudioConfig outConfig =
+          AudioConfig(outputLayout, mOutputChannels, mOutputRate);
+      if (!AudioConverter::CanConvert(inConfig, outConfig)) {
+        mErrored = true;
+        return;
+      }
+      mConverter = MakeUnique<AudioConverter>(inConfig, outConfig);
+    }
+
+    // See if there's a gap in the audio. If there is, push silence into the
+    // audio hardware, so we can play across the gap.
+    // Calculate the timestamp of the next chunk of audio in numbers of
+    // samples.
+    CheckedInt64 sampleTime =
+        TimeUnitToFrames(data->mTime - mStartTime, data->mRate);
+    // Calculate the number of frames that have been pushed onto the audio
+    // hardware.
+    CheckedInt64 missingFrames = sampleTime - mFramesParsed;
+
+    if (!missingFrames.isValid() || !sampleTime.isValid()) {
+      NS_WARNING("Int overflow in AudioSink");
+      mErrored = true;
+      return;
+    }
+
+    if (missingFrames.value() > AUDIO_FUZZ_FRAMES) {
+      // The next audio packet begins some time after the end of the last packet
+      // we pushed to the audio hardware. We must push silence into the audio
+      // hardware so that the next audio packet begins playback at the correct
+      // time. But don't push more than the ring buffer can receive.
+      missingFrames = std::min<int64_t>(
+          std::min<int64_t>(INT32_MAX, missingFrames.value()),
+          SampleToFrame(mProcessedSPSCQueue->AvailableWrite()));
+      mFramesParsed += missingFrames.value();
+
+      SINK_LOG("Gap in the audio input, push %" PRId64 " frames of silence",
+               missingFrames.value());
+
+      RefPtr<AudioData> silenceData;
+      AlignedAudioBuffer silenceBuffer(missingFrames.value() * data->mChannels);
+      if (!silenceBuffer) {
+        NS_WARNING("OOM in AudioSink");
+        mErrored = true;
+        return;
+      }
+      if (mConverter->InputConfig() != mConverter->OutputConfig()) {
+        AlignedAudioBuffer convertedData =
+            mConverter->Process(AudioSampleBuffer(std::move(silenceBuffer)))
+                .Forget();
+        silenceData = CreateAudioFromBuffer(std::move(convertedData), data);
+      } else {
+        silenceData = CreateAudioFromBuffer(std::move(silenceBuffer), data);
+      }
+      TRACE("Pushing silence");
+      PushProcessedAudio(silenceData);
+    }
+
+    mLastEndTime = data->GetEndTime();
+    mFramesParsed += data->Frames();
+
+    if (mConverter->InputConfig() != mConverter->OutputConfig()) {
+      AlignedAudioBuffer buffer(data->MoveableData());
+      AlignedAudioBuffer convertedData =
+          mConverter->Process(AudioSampleBuffer(std::move(buffer))).Forget();
+      data = CreateAudioFromBuffer(std::move(convertedData), data);
+    }
+    if (PushProcessedAudio(data)) {
+      mLastProcessedPacket = Some(data);
+    }
+  }
+
+  if (mAudioQueue.IsFinished() && mAudioQueue.GetSize() == 0) {
+    // We have reached the end of the data, drain the resampler.
+    DrainConverter(SampleToFrame(mProcessedSPSCQueue->AvailableWrite()));
+    mProcessedQueueFinished = true;
+  }
+}
+
+uint32_t AudioSink::PushProcessedAudio(AudioData* aData) {
+  if (!aData || !aData->Frames()) {
+    return 0;
+  }
+  int framesToEnqueue = static_cast<int>(aData->Frames() * aData->mChannels);
+  TRACE_COMMENT("AudioSink::PushProcessedAudio", "%u frames (%u/%u)",
+                framesToEnqueue,
+                SampleToFrame(mProcessedSPSCQueue->AvailableWrite()),
+                SampleToFrame(mProcessedSPSCQueue->Capacity()));
+  DebugOnly<int> rv =
+      mProcessedSPSCQueue->Enqueue(aData->Data().Elements(), framesToEnqueue);
+  NS_WARNING_ASSERTION(
+      rv == static_cast<int>(aData->Frames() * aData->mChannels),
+      "AudioSink ring buffer over-run, can't push new data");
+  return aData->Frames();
+}
+
+already_AddRefed<AudioData> AudioSink::CreateAudioFromBuffer(
+    AlignedAudioBuffer&& aBuffer, AudioData* aReference) {
+  uint32_t frames = SampleToFrame(aBuffer.Length());
+  if (!frames) {
+    return nullptr;
+  }
+  auto duration = media::TimeUnit(frames, mOutputRate);
+  if (!duration.IsValid()) {
+    NS_WARNING("Int overflow in AudioSink");
+    mErrored = true;
+    return nullptr;
+  }
+  RefPtr<AudioData> data =
+      new AudioData(aReference->mOffset, aReference->mTime, std::move(aBuffer),
+                    mOutputChannels, mOutputRate);
+  MOZ_DIAGNOSTIC_ASSERT(duration == data->mDuration, "must be equal");
+  return data.forget();
+}
+
+uint32_t AudioSink::DrainConverter(uint32_t aMaxFrames) {
+  MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
+
+  if (!mConverter || !mLastProcessedPacket || !aMaxFrames) {
+    // nothing to drain.
+    return 0;
+  }
+
+  RefPtr<AudioData> lastPacket = mLastProcessedPacket.ref();
+  mLastProcessedPacket.reset();
+
+  // To drain we simply provide an empty packet to the audio converter.
+  AlignedAudioBuffer convertedData =
+      mConverter->Process(AudioSampleBuffer(AlignedAudioBuffer())).Forget();
+
+  uint32_t frames = SampleToFrame(convertedData.Length());
+  if (!convertedData.SetLength(std::min(frames, aMaxFrames) *
+                               mOutputChannels)) {
+    // This can never happen as we were reducing the length of convertData.
+    mErrored = true;
+    return 0;
+  }
+
+  RefPtr<AudioData> data =
+      CreateAudioFromBuffer(std::move(convertedData), lastPacket);
+  return PushProcessedAudio(data);
+}
+
+void AudioSink::GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) {
+  MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
+  aInfo.mAudioSinkWrapper.mAudioSink.mStartTime = mStartTime.ToMicroseconds();
+  aInfo.mAudioSinkWrapper.mAudioSink.mLastGoodPosition =
+      mLastGoodPosition.ToMicroseconds();
+  aInfo.mAudioSinkWrapper.mAudioSink.mIsPlaying = mPlaying;
+  aInfo.mAudioSinkWrapper.mAudioSink.mOutputRate = mOutputRate;
+  aInfo.mAudioSinkWrapper.mAudioSink.mWritten = mWritten;
+  aInfo.mAudioSinkWrapper.mAudioSink.mHasErrored = bool(mErrored);
+  aInfo.mAudioSinkWrapper.mAudioSink.mPlaybackComplete =
+      mAudioStream ? mAudioStream->IsPlaybackCompleted() : false;
+}
+
+void AudioSink::EnableTreatAudioUnderrunAsSilence(bool aEnabled) {
+  SINK_LOG("set mTreatUnderrunAsSilence=%d", aEnabled);
+  mTreatUnderrunAsSilence = aEnabled;
+}
+
+}  // namespace mozilla
diff --git a/dom/media/mediasink/AudioSink.h b/dom/media/mediasink/AudioSink.h
new file mode 100644
index 0000000000..856227ee4c
--- /dev/null
+++ b/dom/media/mediasink/AudioSink.h
@@ -0,0 +1,188 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef AudioSink_h__
+#define AudioSink_h__
+
+#include "AudioStream.h"
+#include "AudibilityMonitor.h"
+#include "MediaEventSource.h"
+#include "MediaInfo.h"
+#include "MediaQueue.h"
+#include "MediaSink.h"
+#include "mozilla/Atomics.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/Monitor.h"
+#include "mozilla/MozPromise.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/Result.h"
+#include "nsISupportsImpl.h"
+
+namespace mozilla {
+
+class AudioConverter;
+
+class AudioSink : private AudioStream::DataSource {
+ public:
+  enum class InitializationType {
+    // This AudioSink is being initialized for the first time
+    INITIAL,
+    UNMUTING
+  };
+  struct PlaybackParams {
+    PlaybackParams(double aVolume, double aPlaybackRate, bool aPreservesPitch)
+        : mVolume(aVolume),
+          mPlaybackRate(aPlaybackRate),
+          mPreservesPitch(aPreservesPitch) {}
+    double mVolume;
+    double mPlaybackRate;
+    bool mPreservesPitch;
+  };
+
+  AudioSink(AbstractThread* aThread, MediaQueue<AudioData>& aAudioQueue,
+            const AudioInfo& aInfo, bool aShouldResistFingerprinting);
+
+  ~AudioSink();
+
+  // Allocate and initialize mAudioStream. Returns NS_OK on success.
+  nsresult InitializeAudioStream(const PlaybackParams& aParams,
+                                 const RefPtr<AudioDeviceInfo>& aAudioDevice,
+                                 InitializationType aInitializationType);
+
+  // Start audio playback.
+  nsresult Start(const media::TimeUnit& aStartTime,
+                 MozPromiseHolder<MediaSink::EndedPromise>& aEndedPromise);
+
+  /*
+   * All public functions are not thread-safe.
+   * Called on the task queue of MDSM only.
+   */
+  media::TimeUnit GetPosition();
+  media::TimeUnit GetEndTime() const;
+
+  // Check whether we've pushed more frames to the audio stream than it
+  // has played.
+  bool HasUnplayedFrames();
+
+  // The duration of the buffered frames.
+  media::TimeUnit UnplayedDuration() const;
+
+  // Shut down the AudioSink's resources. If an AudioStream existed, return the
+  // ended promise it had, if it's shutting down-mid stream becaues it's muting.
+  Maybe<MozPromiseHolder<MediaSink::EndedPromise>> Shutdown(
+      ShutdownCause aShutdownCause = ShutdownCause::Regular);
+
+  void SetVolume(double aVolume);
+  void SetStreamName(const nsAString& aStreamName);
+  void SetPlaybackRate(double aPlaybackRate);
+  void SetPreservesPitch(bool aPreservesPitch);
+  void SetPlaying(bool aPlaying);
+
+  MediaEventSource<bool>& AudibleEvent() { return mAudibleEvent; }
+
+  void GetDebugInfo(dom::MediaSinkDebugInfo& aInfo);
+
+  // This returns true if the audio callbacks are being called, and so the
+  // audio stream-based clock is moving forward.
+  bool AudioStreamCallbackStarted() {
+    return mAudioStream && mAudioStream->CallbackStarted();
+  }
+
+  void UpdateStartTime(const media::TimeUnit& aStartTime) {
+    mStartTime = aStartTime;
+  }
+
+  void EnableTreatAudioUnderrunAsSilence(bool aEnabled);
+
+ private:
+  // Interface of AudioStream::DataSource.
+  // Called on the callback thread of cubeb. Returns the number of frames that
+  // were available.
+  uint32_t PopFrames(AudioDataValue* aBuffer, uint32_t aFrames,
+                     bool aAudioThreadChanged) override;
+  bool Ended() const override;
+
+  // When shutting down, it's important to not lose any audio data, it might be
+  // still of use, in two scenarios:
+  // - If the audio is now captured to a MediaStream, whatever is enqueued in
+  // the ring buffer needs to be played out now ;
+  // - If the AudioSink is shutting down because the audio is muted, it's
+  // important to keep the audio around in case it's quickly unmuted,
+  // and in general to keep A/V sync correct when unmuted.
+  void ReenqueueUnplayedAudioDataIfNeeded();
+
+  void CheckIsAudible(const Span<AudioDataValue>& aInterleaved,
+                      size_t aChannel);
+
+  // The audio stream resource. Used on the task queue of MDSM only.
+  RefPtr<AudioStream> mAudioStream;
+
+  // The presentation time of the first audio frame that was played.
+  // We can add this to the audio stream position to determine
+  // the current audio time.
+  media::TimeUnit mStartTime;
+
+  // Keep the last good position returned from the audio stream. Used to ensure
+  // position returned by GetPosition() is mono-increasing in spite of audio
+  // stream error. Used on the task queue of MDSM only.
+  media::TimeUnit mLastGoodPosition;
+
+  // Used on the task queue of MDSM only.
+  bool mPlaying;
+
+  // PCM frames written to the stream so far. Written on the callback thread,
+  // read on the MDSM thread.
+  Atomic<int64_t> mWritten;
+
+  // True if there is any error in processing audio data like overflow.
+  Atomic<bool> mErrored;
+
+  const RefPtr<AbstractThread> mOwnerThread;
+
+  // Audio Processing objects and methods
+  void OnAudioPopped();
+  void OnAudioPushed(const RefPtr<AudioData>& aSample);
+  void NotifyAudioNeeded();
+  // Drain the converter and add the output to the processed audio queue.
+  // A maximum of aMaxFrames will be added.
+  uint32_t DrainConverter(uint32_t aMaxFrames = UINT32_MAX);
+  already_AddRefed<AudioData> CreateAudioFromBuffer(
+      AlignedAudioBuffer&& aBuffer, AudioData* aReference);
+  // Add data to the processsed queue return the number of frames added.
+  uint32_t PushProcessedAudio(AudioData* aData);
+  uint32_t AudioQueuedInRingBufferMS() const;
+  uint32_t SampleToFrame(uint32_t aSamples) const;
+  UniquePtr<AudioConverter> mConverter;
+  UniquePtr<SPSCQueue<AudioDataValue>> mProcessedSPSCQueue;
+  MediaEventListener mAudioQueueListener;
+  MediaEventListener mAudioQueueFinishListener;
+  MediaEventListener mProcessedQueueListener;
+  // Number of frames processed from mAudioQueue. Used to determine gaps in
+  // the input stream. It indicates the time in frames since playback started
+  // at the current input framerate.
+  int64_t mFramesParsed;
+  Maybe<RefPtr<AudioData>> mLastProcessedPacket;
+  media::TimeUnit mLastEndTime;
+  // Never modifed after construction.
+  uint32_t mOutputRate;
+  uint32_t mOutputChannels;
+  AudibilityMonitor mAudibilityMonitor;
+  bool mIsAudioDataAudible;
+  MediaEventProducer<bool> mAudibleEvent;
+  // Only signed on the real-time audio thread.
+  MediaEventProducer<void> mAudioPopped;
+
+  Atomic<bool> mProcessedQueueFinished;
+  MediaQueue<AudioData>& mAudioQueue;
+  const float mProcessedQueueThresholdMS;
+
+  // True if we'd like to treat underrun as silent frames. But that can only be
+  // applied in the special situation for seamless looping.
+  bool mTreatUnderrunAsSilence = false;
+};
+
+}  // namespace mozilla
+
+#endif  // AudioSink_h__
diff --git a/dom/media/mediasink/AudioSinkWrapper.cpp b/dom/media/mediasink/AudioSinkWrapper.cpp
new file mode 100644
index 0000000000..5a006479e1
--- /dev/null
+++ b/dom/media/mediasink/AudioSinkWrapper.cpp
@@ -0,0 +1,496 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "AudioSinkWrapper.h"
+#include "AudioDeviceInfo.h"
+#include "AudioSink.h"
+#include "VideoUtils.h"
+#include "mozilla/Logging.h"
+#include "mozilla/Result.h"
+#include "nsPrintfCString.h"
+
+mozilla::LazyLogModule gAudioSinkWrapperLog("AudioSinkWrapper");
+#define LOG(...) \
+  MOZ_LOG(gAudioSinkWrapperLog, mozilla::LogLevel::Debug, (__VA_ARGS__));
+#define LOGV(...) \
+  MOZ_LOG(gAudioSinkWrapperLog, mozilla::LogLevel::Verbose, (__VA_ARGS__));
+
+namespace mozilla {
+
+using media::TimeUnit;
+
+AudioSinkWrapper::~AudioSinkWrapper() = default;
+
+void AudioSinkWrapper::Shutdown() {
+  AssertOwnerThread();
+  MOZ_ASSERT(!mIsStarted, "Must be called after playback stopped.");
+  mCreator = nullptr;
+  mEndedPromiseHolder.ResolveIfExists(true, __func__);
+}
+
+RefPtr<MediaSink::EndedPromise> AudioSinkWrapper::OnEnded(TrackType aType) {
+  AssertOwnerThread();
+  MOZ_ASSERT(mIsStarted, "Must be called after playback starts.");
+  if (aType == TrackInfo::kAudioTrack) {
+    return mEndedPromise;
+  }
+  return nullptr;
+}
+
+TimeUnit AudioSinkWrapper::GetEndTime(TrackType aType) const {
+  AssertOwnerThread();
+  MOZ_ASSERT(mIsStarted, "Must be called after playback starts.");
+  if (aType == TrackInfo::kAudioTrack && mAudioSink &&
+      mAudioSink->AudioStreamCallbackStarted()) {
+    return mAudioSink->GetEndTime();
+  }
+
+  if (aType == TrackInfo::kAudioTrack && !mAudioSink && IsMuted()) {
+    if (IsPlaying()) {
+      return GetSystemClockPosition(TimeStamp::Now());
+    }
+
+    return mPlayDuration;
+  }
+  return TimeUnit::Zero();
+}
+
+TimeUnit AudioSinkWrapper::GetSystemClockPosition(TimeStamp aNow) const {
+  AssertOwnerThread();
+  MOZ_ASSERT(!mPlayStartTime.IsNull());
+  // Time elapsed since we started playing.
+  double delta = (aNow - mPlayStartTime).ToSeconds();
+  // Take playback rate into account.
+  return mPlayDuration + TimeUnit::FromSeconds(delta * mParams.mPlaybackRate);
+}
+
+bool AudioSinkWrapper::IsMuted() const {
+  AssertOwnerThread();
+  return mParams.mVolume == 0.0;
+}
+
+TimeUnit AudioSinkWrapper::GetPosition(TimeStamp* aTimeStamp) {
+  AssertOwnerThread();
+  MOZ_ASSERT(mIsStarted, "Must be called after playback starts.");
+
+  TimeUnit pos;
+  TimeStamp t = TimeStamp::Now();
+
+  if (!mAudioEnded && !IsMuted() && mAudioSink) {
+    if (mLastClockSource == ClockSource::SystemClock) {
+      TimeUnit switchTime = GetSystemClockPosition(t);
+      // Update the _actual_ start time of the audio stream now that it has
+      // started, preventing any clock discontinuity.
+      mAudioSink->UpdateStartTime(switchTime);
+      LOGV("%p: switching to audio clock at media time %lf", this,
+           switchTime.ToSeconds());
+    }
+    // Rely on the audio sink to report playback position when it is not ended.
+    pos = mAudioSink->GetPosition();
+    LOGV("%p: Getting position from the Audio Sink %lf", this, pos.ToSeconds());
+    mLastClockSource = ClockSource::AudioStream;
+  } else if (!mPlayStartTime.IsNull()) {
+    // Calculate playback position using system clock if we are still playing,
+    // but not rendering the audio, because this audio sink is muted.
+    pos = GetSystemClockPosition(t);
+    LOGV("%p: Getting position from the system clock %lf", this,
+         pos.ToSeconds());
+    if (IsMuted()) {
+      if (mAudioQueue.GetSize() > 0) {
+        // audio track, but it's muted and won't be dequeued, discard packets
+        // that are behind the current media time, to keep the queue size under
+        // control.
+        DropAudioPacketsIfNeeded(pos);
+      }
+      // If muted, it's necessary to manually check if the audio has "ended",
+      // meaning that all the audio packets have been consumed, to resolve the
+      // ended promise.
+      if (CheckIfEnded()) {
+        MOZ_ASSERT(!mAudioSink);
+        mEndedPromiseHolder.ResolveIfExists(true, __func__);
+      }
+    }
+    mLastClockSource = ClockSource::SystemClock;
+  } else {
+    // Return how long we've played if we are not playing.
+    pos = mPlayDuration;
+    LOGV("%p: Getting static position, not playing %lf", this, pos.ToSeconds());
+    mLastClockSource = ClockSource::Paused;
+  }
+
+  if (aTimeStamp) {
+    *aTimeStamp = t;
+  }
+
+  return pos;
+}
+
+bool AudioSinkWrapper::CheckIfEnded() const {
+  return mAudioQueue.IsFinished() && mAudioQueue.GetSize() == 0u;
+}
+
+bool AudioSinkWrapper::HasUnplayedFrames(TrackType aType) const {
+  AssertOwnerThread();
+  return mAudioSink ? mAudioSink->HasUnplayedFrames() : false;
+}
+
+media::TimeUnit AudioSinkWrapper::UnplayedDuration(TrackType aType) const {
+  AssertOwnerThread();
+  return mAudioSink ? mAudioSink->UnplayedDuration() : media::TimeUnit::Zero();
+}
+
+void AudioSinkWrapper::DropAudioPacketsIfNeeded(
+    const TimeUnit& aMediaPosition) {
+  RefPtr<AudioData> audio = mAudioQueue.PeekFront();
+  uint32_t dropped = 0;
+  while (audio && audio->mTime + audio->mDuration < aMediaPosition) {
+    // drop this packet, try the next one
+    audio = mAudioQueue.PopFront();
+    dropped++;
+    if (audio) {
+      LOGV(
+          "Dropping audio packets: media position: %lf, "
+          "packet dropped: [%lf, %lf] (%u so far).\n",
+          aMediaPosition.ToSeconds(), audio->mTime.ToSeconds(),
+          (audio->GetEndTime()).ToSeconds(), dropped);
+    }
+    audio = mAudioQueue.PeekFront();
+  }
+}
+
+void AudioSinkWrapper::OnMuted(bool aMuted) {
+  AssertOwnerThread();
+  LOG("%p: AudioSinkWrapper::OnMuted(%s)", this, aMuted ? "true" : "false");
+  // Nothing to do
+  if (mAudioEnded) {
+    LOG("%p: AudioSinkWrapper::OnMuted, but no audio track", this);
+    return;
+  }
+  if (aMuted) {
+    if (mAudioSink) {
+      LOG("AudioSinkWrapper muted, shutting down AudioStream.");
+      mAudioSinkEndedPromise.DisconnectIfExists();
+      if (IsPlaying()) {
+        mPlayDuration = mAudioSink->GetPosition();
+        mPlayStartTime = TimeStamp::Now();
+      }
+      Maybe<MozPromiseHolder<MediaSink::EndedPromise>> rv =
+          mAudioSink->Shutdown(ShutdownCause::Muting);
+      // There will generally be a promise here, except if the stream has
+      // errored out, or if it has just finished. In both cases, the promise has
+      // been handled appropriately, there is nothing to do.
+      if (rv.isSome()) {
+        mEndedPromiseHolder = std::move(rv.ref());
+      }
+      mAudioSink = nullptr;
+    }
+  } else {
+    if (!IsPlaying()) {
+      LOG("%p: AudioSinkWrapper::OnMuted: not playing, not re-creating an "
+          "AudioSink",
+          this);
+      return;
+    }
+    LOG("%p: AudioSinkWrapper unmuted, re-creating an AudioStream.", this);
+    TimeUnit mediaPosition = GetSystemClockPosition(TimeStamp::Now());
+    nsresult rv = StartAudioSink(mediaPosition, AudioSinkStartPolicy::ASYNC);
+    if (NS_FAILED(rv)) {
+      NS_WARNING(
+          "Could not start AudioSink from AudioSinkWrapper when unmuting");
+    }
+  }
+}
+
+void AudioSinkWrapper::SetVolume(double aVolume) {
+  AssertOwnerThread();
+
+  bool wasMuted = mParams.mVolume == 0;
+  bool nowMuted = aVolume == 0.;
+  mParams.mVolume = aVolume;
+
+  if (!wasMuted && nowMuted) {
+    OnMuted(true);
+  } else if (wasMuted && !nowMuted) {
+    OnMuted(false);
+  }
+
+  if (mAudioSink) {
+    mAudioSink->SetVolume(aVolume);
+  }
+}
+
+void AudioSinkWrapper::SetStreamName(const nsAString& aStreamName) {
+  AssertOwnerThread();
+  if (mAudioSink) {
+    mAudioSink->SetStreamName(aStreamName);
+  }
+}
+
+void AudioSinkWrapper::SetPlaybackRate(double aPlaybackRate) {
+  AssertOwnerThread();
+  if (!mAudioEnded && mAudioSink) {
+    // Pass the playback rate to the audio sink. The underlying AudioStream
+    // will handle playback rate changes and report correct audio position.
+    mAudioSink->SetPlaybackRate(aPlaybackRate);
+  } else if (!mPlayStartTime.IsNull()) {
+    // Adjust playback duration and start time when we are still playing.
+    TimeStamp now = TimeStamp::Now();
+    mPlayDuration = GetSystemClockPosition(now);
+    mPlayStartTime = now;
+  }
+  // mParams.mPlaybackRate affects GetSystemClockPosition(). It should be
+  // updated after the calls to GetSystemClockPosition();
+  mParams.mPlaybackRate = aPlaybackRate;
+
+  // Do nothing when not playing. Changes in playback rate will be taken into
+  // account by GetSystemClockPosition().
+}
+
+void AudioSinkWrapper::SetPreservesPitch(bool aPreservesPitch) {
+  AssertOwnerThread();
+  mParams.mPreservesPitch = aPreservesPitch;
+  if (mAudioSink) {
+    mAudioSink->SetPreservesPitch(aPreservesPitch);
+  }
+}
+
+void AudioSinkWrapper::SetPlaying(bool aPlaying) {
+  AssertOwnerThread();
+  LOG("%p: AudioSinkWrapper::SetPlaying %s", this, aPlaying ? "true" : "false");
+
+  // Resume/pause matters only when playback started.
+  if (!mIsStarted) {
+    return;
+  }
+
+  if (mAudioSink) {
+    mAudioSink->SetPlaying(aPlaying);
+  } else {
+    if (aPlaying) {
+      LOG("%p: AudioSinkWrapper::SetPlaying : starting an AudioSink", this);
+      TimeUnit switchTime = GetPosition();
+      DropAudioPacketsIfNeeded(switchTime);
+      StartAudioSink(switchTime, AudioSinkStartPolicy::SYNC);
+    }
+  }
+
+  if (aPlaying) {
+    MOZ_ASSERT(mPlayStartTime.IsNull());
+    mPlayStartTime = TimeStamp::Now();
+  } else {
+    // Remember how long we've played.
+    mPlayDuration = GetPosition();
+    // mPlayStartTime must be updated later since GetPosition()
+    // depends on the value of mPlayStartTime.
+    mPlayStartTime = TimeStamp();
+  }
+}
+
+double AudioSinkWrapper::PlaybackRate() const {
+  AssertOwnerThread();
+  return mParams.mPlaybackRate;
+}
+
+nsresult AudioSinkWrapper::Start(const TimeUnit& aStartTime,
+                                 const MediaInfo& aInfo) {
+  LOG("%p AudioSinkWrapper::Start", this);
+  AssertOwnerThread();
+  MOZ_ASSERT(!mIsStarted, "playback already started.");
+
+  mIsStarted = true;
+  mPlayDuration = aStartTime;
+  mPlayStartTime = TimeStamp::Now();
+  mAudioEnded = IsAudioSourceEnded(aInfo);
+
+  if (mAudioEnded) {
+    // Resolve promise if we start playback at the end position of the audio.
+    mEndedPromise =
+        aInfo.HasAudio()
+            ? MediaSink::EndedPromise::CreateAndResolve(true, __func__)
+            : nullptr;
+    return NS_OK;
+  }
+
+  return StartAudioSink(aStartTime, AudioSinkStartPolicy::SYNC);
+}
+
+nsresult AudioSinkWrapper::StartAudioSink(const TimeUnit& aStartTime,
+                                          AudioSinkStartPolicy aPolicy) {
+  MOZ_RELEASE_ASSERT(!mAudioSink);
+
+  nsresult rv = NS_OK;
+
+  mAudioSinkEndedPromise.DisconnectIfExists();
+  mEndedPromise = mEndedPromiseHolder.Ensure(__func__);
+  mEndedPromise
+      ->Then(mOwnerThread.get(), __func__, this,
+             &AudioSinkWrapper::OnAudioEnded, &AudioSinkWrapper::OnAudioEnded)
+      ->Track(mAudioSinkEndedPromise);
+
+  LOG("%p: AudioSinkWrapper::StartAudioSink (%s)", this,
+      aPolicy == AudioSinkStartPolicy::ASYNC ? "Async" : "Sync");
+
+  if (IsMuted()) {
+    LOG("%p: Muted: not starting an audio sink", this);
+    return NS_OK;
+  }
+  LOG("%p: Not muted: starting a new audio sink", this);
+  if (aPolicy == AudioSinkStartPolicy::ASYNC) {
+    UniquePtr<AudioSink> audioSink;
+    audioSink.reset(mCreator->Create());
+    NS_DispatchBackgroundTask(NS_NewRunnableFunction(
+        "StartAudioSink (Async part: initialization)",
+        [self = RefPtr<AudioSinkWrapper>(this), audioSink{std::move(audioSink)},
+         this]() mutable {
+          LOG("AudioSink initialization on background thread");
+          // This can take about 200ms, e.g. on Windows, we don't want to do
+          // it on the MDSM thread, because it would make the clock not update
+          // for that amount of time, and the video would therefore not
+          // update. The Start() call is very cheap on the other hand, we can
+          // do it from the MDSM thread.
+          nsresult rv = audioSink->InitializeAudioStream(
+              mParams, mAudioDevice, AudioSink::InitializationType::UNMUTING);
+          mOwnerThread->Dispatch(NS_NewRunnableFunction(
+              "StartAudioSink (Async part: start from MDSM thread)",
+              [self = RefPtr<AudioSinkWrapper>(this),
+               audioSink{std::move(audioSink)}, this, rv]() mutable {
+                LOG("AudioSink async init done, back on MDSM thread");
+                if (NS_FAILED(rv)) {
+                  LOG("Async AudioSink initialization failed");
+                  mEndedPromiseHolder.RejectIfExists(rv, __func__);
+                  return;
+                }
+
+                // It's possible that the newly created isn't needed at this
+                // point, in some cases:
+                // 1. An AudioSink was created synchronously while this
+                // AudioSink was initialized asynchronously, bail out here. This
+                // happens when seeking (which does a synchronous
+                // initialization) right after unmuting.
+                // 2. The media element was muted while the async initialization
+                // was happening.
+                // 3. The AudioSinkWrapper was stopped during asynchronous
+                // creation.
+                // 4. The AudioSinkWrapper was paused during asynchronous
+                // creation.
+                if (mAudioSink || IsMuted() || !mIsStarted ||
+                    mPlayStartTime.IsNull()) {
+                  LOG("AudioSink initialized async isn't needed, shutting "
+                      "it down.");
+                  DebugOnly<Maybe<MozPromiseHolder<EndedPromise>>> rv =
+                      audioSink->Shutdown();
+                  MOZ_ASSERT(rv.inspect().isNothing());
+                  return;
+                }
+
+                MOZ_ASSERT(!mAudioSink);
+                TimeUnit switchTime = GetPosition();
+                DropAudioPacketsIfNeeded(switchTime);
+                mAudioSink.swap(audioSink);
+                if (mTreatUnderrunAsSilence) {
+                  mAudioSink->EnableTreatAudioUnderrunAsSilence(
+                      mTreatUnderrunAsSilence);
+                }
+                LOG("AudioSink async, start");
+                nsresult rv2 =
+                    mAudioSink->Start(switchTime, mEndedPromiseHolder);
+                if (NS_FAILED(rv2)) {
+                  LOG("Async AudioSinkWrapper start failed");
+                  mEndedPromiseHolder.RejectIfExists(rv2, __func__);
+                }
+              }));
+        }));
+  } else {
+    mAudioSink.reset(mCreator->Create());
+    nsresult rv = mAudioSink->InitializeAudioStream(
+        mParams, mAudioDevice, AudioSink::InitializationType::INITIAL);
+    if (NS_FAILED(rv)) {
+      mEndedPromiseHolder.RejectIfExists(rv, __func__);
+      LOG("Sync AudioSinkWrapper initialization failed");
+      return rv;
+    }
+    if (mTreatUnderrunAsSilence) {
+      mAudioSink->EnableTreatAudioUnderrunAsSilence(mTreatUnderrunAsSilence);
+    }
+    rv = mAudioSink->Start(aStartTime, mEndedPromiseHolder);
+    if (NS_FAILED(rv)) {
+      LOG("Sync AudioSinkWrapper start failed");
+      mEndedPromiseHolder.RejectIfExists(rv, __func__);
+    }
+  }
+
+  return rv;
+}
+
+bool AudioSinkWrapper::IsAudioSourceEnded(const MediaInfo& aInfo) const {
+  // no audio or empty audio queue which won't get data anymore is equivalent to
+  // audio ended
+  return !aInfo.HasAudio() ||
+         (mAudioQueue.IsFinished() && mAudioQueue.GetSize() == 0u);
+}
+
+void AudioSinkWrapper::Stop() {
+  AssertOwnerThread();
+  MOZ_ASSERT(mIsStarted, "playback not started.");
+
+  LOG("%p: AudioSinkWrapper::Stop", this);
+
+  mIsStarted = false;
+  mAudioEnded = true;
+
+  mAudioSinkEndedPromise.DisconnectIfExists();
+
+  if (mAudioSink) {
+    DebugOnly<Maybe<MozPromiseHolder<EndedPromise>>> rv =
+        mAudioSink->Shutdown();
+    MOZ_ASSERT(rv.inspect().isNothing());
+    mAudioSink = nullptr;
+    mEndedPromise = nullptr;
+  }
+}
+
+bool AudioSinkWrapper::IsStarted() const {
+  AssertOwnerThread();
+  return mIsStarted;
+}
+
+bool AudioSinkWrapper::IsPlaying() const {
+  AssertOwnerThread();
+  return IsStarted() && !mPlayStartTime.IsNull();
+}
+
+void AudioSinkWrapper::OnAudioEnded() {
+  AssertOwnerThread();
+  LOG("%p: AudioSinkWrapper::OnAudioEnded", this);
+  mAudioSinkEndedPromise.Complete();
+  mPlayDuration = GetPosition();
+  if (!mPlayStartTime.IsNull()) {
+    mPlayStartTime = TimeStamp::Now();
+  }
+  mAudioEnded = true;
+}
+
+void AudioSinkWrapper::GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) {
+  AssertOwnerThread();
+  aInfo.mAudioSinkWrapper.mIsPlaying = IsPlaying();
+  aInfo.mAudioSinkWrapper.mIsStarted = IsStarted();
+  aInfo.mAudioSinkWrapper.mAudioEnded = mAudioEnded;
+  if (mAudioSink) {
+    mAudioSink->GetDebugInfo(aInfo);
+  }
+}
+
+void AudioSinkWrapper::EnableTreatAudioUnderrunAsSilence(bool aEnabled) {
+  mTreatUnderrunAsSilence = aEnabled;
+  if (mAudioSink) {
+    mAudioSink->EnableTreatAudioUnderrunAsSilence(aEnabled);
+  }
+}
+
+}  // namespace mozilla
+
+#undef LOG
+#undef LOGV
diff --git a/dom/media/mediasink/AudioSinkWrapper.h b/dom/media/mediasink/AudioSinkWrapper.h
new file mode 100644
index 0000000000..411983c526
--- /dev/null
+++ b/dom/media/mediasink/AudioSinkWrapper.h
@@ -0,0 +1,161 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef AudioSinkWrapper_h_
+#define AudioSinkWrapper_h_
+
+#include "mozilla/AbstractThread.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/UniquePtr.h"
+
+#include "AudioSink.h"
+#include "MediaSink.h"
+
+namespace mozilla {
+class MediaData;
+template <class T>
+class MediaQueue;
+
+/**
+ * A wrapper around AudioSink to provide the interface of MediaSink.
+ */
+class AudioSinkWrapper : public MediaSink {
+  using PlaybackParams = AudioSink::PlaybackParams;
+
+  // An AudioSink factory.
+  class Creator {
+   public:
+    virtual ~Creator() = default;
+    virtual AudioSink* Create() = 0;
+  };
+
+  // Wrap around a function object which creates AudioSinks.
+  template <typename Function>
+  class CreatorImpl : public Creator {
+   public:
+    explicit CreatorImpl(const Function& aFunc) : mFunction(aFunc) {}
+    AudioSink* Create() override { return mFunction(); }
+
+   private:
+    Function mFunction;
+  };
+
+ public:
+  template <typename Function>
+  AudioSinkWrapper(AbstractThread* aOwnerThread,
+                   MediaQueue<AudioData>& aAudioQueue, const Function& aFunc,
+                   double aVolume, double aPlaybackRate, bool aPreservesPitch,
+                   RefPtr<AudioDeviceInfo> aAudioDevice)
+      : mOwnerThread(aOwnerThread),
+        mCreator(new CreatorImpl<Function>(aFunc)),
+        mAudioDevice(std::move(aAudioDevice)),
+        mIsStarted(false),
+        mParams(aVolume, aPlaybackRate, aPreservesPitch),
+        // Give an invalid value to facilitate debug if used before playback
+        // starts.
+        mPlayDuration(media::TimeUnit::Invalid()),
+        mAudioEnded(true),
+        mAudioQueue(aAudioQueue) {}
+
+  RefPtr<EndedPromise> OnEnded(TrackType aType) override;
+  media::TimeUnit GetEndTime(TrackType aType) const override;
+  media::TimeUnit GetPosition(TimeStamp* aTimeStamp = nullptr) override;
+  bool HasUnplayedFrames(TrackType aType) const override;
+  media::TimeUnit UnplayedDuration(TrackType aType) const override;
+  void DropAudioPacketsIfNeeded(const media::TimeUnit& aMediaPosition);
+
+  void SetVolume(double aVolume) override;
+  void SetStreamName(const nsAString& aStreamName) override;
+  void SetPlaybackRate(double aPlaybackRate) override;
+  void SetPreservesPitch(bool aPreservesPitch) override;
+  void SetPlaying(bool aPlaying) override;
+
+  double PlaybackRate() const override;
+
+  nsresult Start(const media::TimeUnit& aStartTime,
+                 const MediaInfo& aInfo) override;
+  void Stop() override;
+  bool IsStarted() const override;
+  bool IsPlaying() const override;
+
+  const AudioDeviceInfo* AudioDevice() const override { return mAudioDevice; }
+
+  void Shutdown() override;
+
+  void GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) override;
+
+  void EnableTreatAudioUnderrunAsSilence(bool aEnabled) override;
+
+ private:
+  // The clock that was in use for the previous position query, allowing to
+  // detect clock switches.
+  enum class ClockSource {
+    // The clock comes from an underlying system-level audio stream.
+    AudioStream,
+    // The clock comes from the system clock.
+    SystemClock,
+    // The stream is paused, a constant time is reported.
+    Paused
+  } mLastClockSource = ClockSource::Paused;
+  bool IsMuted() const;
+  void OnMuted(bool aMuted);
+  virtual ~AudioSinkWrapper();
+
+  void AssertOwnerThread() const {
+    MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
+  }
+
+  // An AudioSink can be started synchronously from the MDSM thread, or
+  // asynchronously.
+  // In synchronous mode, the clock doesn't advance until the sink has been
+  // created, initialized and started. This is useful for the initial startup,
+  // and when seeking.
+  // In asynchronous mode, the clock will keep going forward (using the system
+  // clock) until the AudioSink is started, at which point the clock will use
+  // the AudioSink clock. This is used when unmuting a media element.
+  enum class AudioSinkStartPolicy { SYNC, ASYNC };
+  nsresult StartAudioSink(const media::TimeUnit& aStartTime,
+                          AudioSinkStartPolicy aPolicy);
+
+  // Get the current media position using the system clock. This is used when
+  // the audio is muted, or when the media has no audio track. Otherwise, the
+  // media's position is based on the clock of the AudioStream.
+  media::TimeUnit GetSystemClockPosition(TimeStamp aNow) const;
+  bool CheckIfEnded() const;
+
+  void OnAudioEnded();
+
+  bool IsAudioSourceEnded(const MediaInfo& aInfo) const;
+
+  const RefPtr<AbstractThread> mOwnerThread;
+  UniquePtr<Creator> mCreator;
+  UniquePtr<AudioSink> mAudioSink;
+  // The output device this AudioSink is playing data to. The system's default
+  // device is used if this is null.
+  const RefPtr<AudioDeviceInfo> mAudioDevice;
+  // Will only exist when media has an audio track.
+  RefPtr<EndedPromise> mEndedPromise;
+  MozPromiseHolder<EndedPromise> mEndedPromiseHolder;
+
+  bool mIsStarted;
+  PlaybackParams mParams;
+
+  TimeStamp mPlayStartTime;
+  media::TimeUnit mPlayDuration;
+
+  bool mAudioEnded;
+  MozPromiseRequestHolder<EndedPromise> mAudioSinkEndedPromise;
+  MediaQueue<AudioData>& mAudioQueue;
+
+  // True if we'd like to treat underrun as silent frames. But that can only be
+  // applied in the special situation for seamless looping.
+  bool mTreatUnderrunAsSilence = false;
+};
+
+}  // namespace mozilla
+
+#endif  // AudioSinkWrapper_h_
diff --git a/dom/media/mediasink/DecodedStream.cpp b/dom/media/mediasink/DecodedStream.cpp
new file mode 100644
index 0000000000..0a488dcfdf
--- /dev/null
+++ b/dom/media/mediasink/DecodedStream.cpp
@@ -0,0 +1,1171 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "DecodedStream.h"
+
+#include "AudioDecoderInputTrack.h"
+#include "AudioSegment.h"
+#include "MediaData.h"
+#include "MediaDecoderStateMachine.h"
+#include "MediaQueue.h"
+#include "MediaTrackGraph.h"
+#include "MediaTrackListener.h"
+#include "SharedBuffer.h"
+#include "Tracing.h"
+#include "VideoSegment.h"
+#include "VideoUtils.h"
+#include "mozilla/AbstractThread.h"
+#include "mozilla/CheckedInt.h"
+#include "mozilla/ProfilerLabels.h"
+#include "mozilla/ProfilerMarkerTypes.h"
+#include "mozilla/SyncRunnable.h"
+#include "mozilla/gfx/Point.h"
+#include "mozilla/StaticPrefs_dom.h"
+#include "nsProxyRelease.h"
+
+namespace mozilla {
+
+using media::NullableTimeUnit;
+using media::TimeUnit;
+
+extern LazyLogModule gMediaDecoderLog;
+
+#define LOG_DS(type, fmt, ...)    \
+  MOZ_LOG(gMediaDecoderLog, type, \
+          ("DecodedStream=%p " fmt, this, ##__VA_ARGS__))
+
+#define PLAYBACK_PROFILER_MARKER(markerString) \
+  PROFILER_MARKER_TEXT(FUNCTION_SIGNATURE, MEDIA_PLAYBACK, {}, markerString)
+
+/*
+ * A container class to make it easier to pass the playback info all the
+ * way to DecodedStreamGraphListener from DecodedStream.
+ */
+struct PlaybackInfoInit {
+  TimeUnit mStartTime;
+  MediaInfo mInfo;
+};
+
+class DecodedStreamGraphListener;
+
+class SourceVideoTrackListener : public MediaTrackListener {
+ public:
+  SourceVideoTrackListener(DecodedStreamGraphListener* aGraphListener,
+                           SourceMediaTrack* aVideoTrack,
+                           MediaTrack* aAudioTrack,
+                           nsISerialEventTarget* aDecoderThread);
+
+  void NotifyOutput(MediaTrackGraph* aGraph,
+                    TrackTime aCurrentTrackTime) override;
+  void NotifyEnded(MediaTrackGraph* aGraph) override;
+
+ private:
+  const RefPtr<DecodedStreamGraphListener> mGraphListener;
+  const RefPtr<SourceMediaTrack> mVideoTrack;
+  const RefPtr<const MediaTrack> mAudioTrack;
+  const RefPtr<nsISerialEventTarget> mDecoderThread;
+  TrackTime mLastVideoOutputTime = 0;
+};
+
+class DecodedStreamGraphListener {
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(DecodedStreamGraphListener)
+ public:
+  DecodedStreamGraphListener(
+      nsISerialEventTarget* aDecoderThread, AudioDecoderInputTrack* aAudioTrack,
+      MozPromiseHolder<DecodedStream::EndedPromise>&& aAudioEndedHolder,
+      SourceMediaTrack* aVideoTrack,
+      MozPromiseHolder<DecodedStream::EndedPromise>&& aVideoEndedHolder)
+      : mDecoderThread(aDecoderThread),
+        mVideoTrackListener(
+            aVideoTrack ? MakeRefPtr<SourceVideoTrackListener>(
+                              this, aVideoTrack, aAudioTrack, aDecoderThread)
+                        : nullptr),
+        mAudioEndedHolder(std::move(aAudioEndedHolder)),
+        mVideoEndedHolder(std::move(aVideoEndedHolder)),
+        mAudioTrack(aAudioTrack),
+        mVideoTrack(aVideoTrack) {
+    MOZ_ASSERT(NS_IsMainThread());
+    MOZ_ASSERT(mDecoderThread);
+
+    if (mAudioTrack) {
+      mOnAudioOutput = mAudioTrack->OnOutput().Connect(
+          mDecoderThread,
+          [self = RefPtr<DecodedStreamGraphListener>(this)](TrackTime aTime) {
+            self->NotifyOutput(MediaSegment::AUDIO, aTime);
+          });
+      mOnAudioEnd = mAudioTrack->OnEnd().Connect(
+          mDecoderThread, [self = RefPtr<DecodedStreamGraphListener>(this)]() {
+            self->NotifyEnded(MediaSegment::AUDIO);
+          });
+    } else {
+      mAudioEnded = true;
+      mAudioEndedHolder.ResolveIfExists(true, __func__);
+    }
+
+    if (mVideoTrackListener) {
+      mVideoTrack->AddListener(mVideoTrackListener);
+    } else {
+      mVideoEnded = true;
+      mVideoEndedHolder.ResolveIfExists(true, __func__);
+    }
+  }
+
+  void Close() {
+    AssertOnDecoderThread();
+    if (mAudioTrack) {
+      mAudioTrack->Close();
+    }
+    if (mVideoTrack) {
+      mVideoTrack->End();
+    }
+    mAudioEndedHolder.ResolveIfExists(false, __func__);
+    mVideoEndedHolder.ResolveIfExists(false, __func__);
+    mOnAudioOutput.DisconnectIfExists();
+    mOnAudioEnd.DisconnectIfExists();
+  }
+
+  void NotifyOutput(MediaSegment::Type aType, TrackTime aCurrentTrackTime) {
+    AssertOnDecoderThread();
+    if (aType == MediaSegment::AUDIO) {
+      mAudioOutputFrames = aCurrentTrackTime;
+    } else if (aType == MediaSegment::VIDEO) {
+      if (aCurrentTrackTime >= mVideoEndTime) {
+        mVideoTrack->End();
+      }
+    } else {
+      MOZ_CRASH("Unexpected track type");
+    }
+
+    MOZ_ASSERT_IF(aType == MediaSegment::AUDIO, !mAudioEnded);
+    MOZ_ASSERT_IF(aType == MediaSegment::VIDEO, !mVideoEnded);
+    // This situation would happen when playing audio in >1x playback rate,
+    // because the audio output clock isn't align the graph time and would go
+    // forward faster. Eg. playback rate=2, when the graph time passes 10s, the
+    // audio clock time actually already goes forward 20s. After audio track
+    // ended, video track would tirgger the clock, but the video time still
+    // follows the graph time, which is smaller than the preivous audio clock
+    // time and should be ignored.
+    if (aCurrentTrackTime <= mLastOutputTime) {
+      MOZ_ASSERT(aType == MediaSegment::VIDEO);
+      return;
+    }
+    MOZ_ASSERT(aCurrentTrackTime > mLastOutputTime);
+    mLastOutputTime = aCurrentTrackTime;
+
+    // Only when audio track doesn't exists or has reached the end, video
+    // track should drive the clock.
+    MOZ_ASSERT_IF(aType == MediaSegment::VIDEO, mAudioEnded);
+    const MediaTrack* track = aType == MediaSegment::VIDEO
+                                  ? static_cast<MediaTrack*>(mVideoTrack)
+                                  : static_cast<MediaTrack*>(mAudioTrack);
+    mOnOutput.Notify(track->TrackTimeToMicroseconds(aCurrentTrackTime));
+  }
+
+  void NotifyEnded(MediaSegment::Type aType) {
+    AssertOnDecoderThread();
+    if (aType == MediaSegment::AUDIO) {
+      MOZ_ASSERT(!mAudioEnded);
+      mAudioEnded = true;
+      mAudioEndedHolder.ResolveIfExists(true, __func__);
+    } else if (aType == MediaSegment::VIDEO) {
+      MOZ_ASSERT(!mVideoEnded);
+      mVideoEnded = true;
+      mVideoEndedHolder.ResolveIfExists(true, __func__);
+    } else {
+      MOZ_CRASH("Unexpected track type");
+    }
+  }
+
+  /**
+   * Tell the graph listener to end the track sourced by the given track after
+   * it has seen at least aEnd worth of output reported as processed by the
+   * graph.
+   *
+   * A TrackTime of TRACK_TIME_MAX indicates that the track has no end and is
+   * the default.
+   *
+   * This method of ending tracks is needed because the MediaTrackGraph
+   * processes ended tracks (through SourceMediaTrack::EndTrack) at the
+   * beginning of an iteration, but waits until the end of the iteration to
+   * process any ControlMessages. When such a ControlMessage is a listener that
+   * is to be added to a track that has ended in its very first iteration, the
+   * track ends before the listener tracking this ending is added. This can lead
+   * to a MediaStreamTrack ending on main thread (it uses another listener)
+   * before the listeners to render the track get added, potentially meaning a
+   * media element doesn't progress before reaching the end although data was
+   * available.
+   */
+  void EndVideoTrackAt(MediaTrack* aTrack, TrackTime aEnd) {
+    AssertOnDecoderThread();
+    MOZ_DIAGNOSTIC_ASSERT(aTrack == mVideoTrack);
+    mVideoEndTime = aEnd;
+  }
+
+  void Forget() {
+    MOZ_ASSERT(NS_IsMainThread());
+    if (mVideoTrackListener && !mVideoTrack->IsDestroyed()) {
+      mVideoTrack->RemoveListener(mVideoTrackListener);
+    }
+    mVideoTrackListener = nullptr;
+  }
+
+  TrackTime GetAudioFramesPlayed() {
+    AssertOnDecoderThread();
+    return mAudioOutputFrames;
+  }
+
+  MediaEventSource<int64_t>& OnOutput() { return mOnOutput; }
+
+ private:
+  ~DecodedStreamGraphListener() {
+    MOZ_ASSERT(mAudioEndedHolder.IsEmpty());
+    MOZ_ASSERT(mVideoEndedHolder.IsEmpty());
+  }
+
+  inline void AssertOnDecoderThread() const {
+    MOZ_ASSERT(mDecoderThread->IsOnCurrentThread());
+  }
+
+  const RefPtr<nsISerialEventTarget> mDecoderThread;
+
+  // Accessible on any thread, but only notify on the decoder thread.
+  MediaEventProducer<int64_t> mOnOutput;
+
+  RefPtr<SourceVideoTrackListener> mVideoTrackListener;
+
+  // These can be resolved on the main thread on creation if there is no
+  // corresponding track, otherwise they are resolved on the decoder thread.
+  MozPromiseHolder<DecodedStream::EndedPromise> mAudioEndedHolder;
+  MozPromiseHolder<DecodedStream::EndedPromise> mVideoEndedHolder;
+
+  // Decoder thread only.
+  TrackTime mAudioOutputFrames = 0;
+  TrackTime mLastOutputTime = 0;
+  bool mAudioEnded = false;
+  bool mVideoEnded = false;
+
+  // Any thread.
+  const RefPtr<AudioDecoderInputTrack> mAudioTrack;
+  const RefPtr<SourceMediaTrack> mVideoTrack;
+  MediaEventListener mOnAudioOutput;
+  MediaEventListener mOnAudioEnd;
+  Atomic<TrackTime> mVideoEndTime{TRACK_TIME_MAX};
+};
+
+SourceVideoTrackListener::SourceVideoTrackListener(
+    DecodedStreamGraphListener* aGraphListener, SourceMediaTrack* aVideoTrack,
+    MediaTrack* aAudioTrack, nsISerialEventTarget* aDecoderThread)
+    : mGraphListener(aGraphListener),
+      mVideoTrack(aVideoTrack),
+      mAudioTrack(aAudioTrack),
+      mDecoderThread(aDecoderThread) {}
+
+void SourceVideoTrackListener::NotifyOutput(MediaTrackGraph* aGraph,
+                                            TrackTime aCurrentTrackTime) {
+  aGraph->AssertOnGraphThreadOrNotRunning();
+  if (mAudioTrack && !mAudioTrack->Ended()) {
+    // Only audio playout drives the clock forward, if present and live.
+    return;
+  }
+  // The graph can iterate without time advancing, but the invariant is that
+  // time can never go backwards.
+  if (aCurrentTrackTime <= mLastVideoOutputTime) {
+    MOZ_ASSERT(aCurrentTrackTime == mLastVideoOutputTime);
+    return;
+  }
+  mLastVideoOutputTime = aCurrentTrackTime;
+  mDecoderThread->Dispatch(NS_NewRunnableFunction(
+      "SourceVideoTrackListener::NotifyOutput",
+      [self = RefPtr<SourceVideoTrackListener>(this), aCurrentTrackTime]() {
+        self->mGraphListener->NotifyOutput(MediaSegment::VIDEO,
+                                           aCurrentTrackTime);
+      }));
+}
+
+void SourceVideoTrackListener::NotifyEnded(MediaTrackGraph* aGraph) {
+  aGraph->AssertOnGraphThreadOrNotRunning();
+  mDecoderThread->Dispatch(NS_NewRunnableFunction(
+      "SourceVideoTrackListener::NotifyEnded",
+      [self = RefPtr<SourceVideoTrackListener>(this)]() {
+        self->mGraphListener->NotifyEnded(MediaSegment::VIDEO);
+      }));
+}
+
+/**
+ * All MediaStream-related data is protected by the decoder's monitor. We have
+ * at most one DecodedStreamData per MediaDecoder. XXX Its tracks are used as
+ * inputs for all output tracks created by OutputStreamManager after calls to
+ * captureStream/UntilEnded. Seeking creates new source tracks, as does
+ * replaying after the input as ended. In the latter case, the new sources are
+ * not connected to tracks created by captureStreamUntilEnded.
+ */
+class DecodedStreamData final {
+ public:
+  DecodedStreamData(
+      PlaybackInfoInit&& aInit, MediaTrackGraph* aGraph,
+      RefPtr<ProcessedMediaTrack> aAudioOutputTrack,
+      RefPtr<ProcessedMediaTrack> aVideoOutputTrack,
+      MozPromiseHolder<DecodedStream::EndedPromise>&& aAudioEndedPromise,
+      MozPromiseHolder<DecodedStream::EndedPromise>&& aVideoEndedPromise,
+      float aPlaybackRate, float aVolume, bool aPreservesPitch,
+      nsISerialEventTarget* aDecoderThread);
+  ~DecodedStreamData();
+  MediaEventSource<int64_t>& OnOutput();
+  // This is used to mark track as closed and should be called before Forget().
+  // Decoder thread only.
+  void Close();
+  // After calling this function, the DecodedStreamData would be destroyed.
+  // Main thread only.
+  void Forget();
+  void GetDebugInfo(dom::DecodedStreamDataDebugInfo& aInfo);
+
+  void WriteVideoToSegment(layers::Image* aImage, const TimeUnit& aStart,
+                           const TimeUnit& aEnd,
+                           const gfx::IntSize& aIntrinsicSize,
+                           const TimeStamp& aTimeStamp, VideoSegment* aOutput,
+                           const PrincipalHandle& aPrincipalHandle,
+                           double aPlaybackRate);
+
+  /* The following group of fields are protected by the decoder's monitor
+   * and can be read or written on any thread.
+   */
+  // Count of audio frames written to the track
+  int64_t mAudioFramesWritten;
+  // Count of video frames written to the track in the track's rate
+  TrackTime mVideoTrackWritten;
+  // mNextAudioTime is the end timestamp for the last packet sent to the track.
+  // Therefore audio packets starting at or after this time need to be copied
+  // to the output track.
+  TimeUnit mNextAudioTime;
+  // mLastVideoStartTime is the start timestamp for the last packet sent to the
+  // track. Therefore video packets starting after this time need to be copied
+  // to the output track.
+  NullableTimeUnit mLastVideoStartTime;
+  // mLastVideoEndTime is the end timestamp for the last packet sent to the
+  // track. It is used to adjust durations of chunks sent to the output track
+  // when there are overlaps in VideoData.
+  NullableTimeUnit mLastVideoEndTime;
+  // The timestamp of the last frame, so we can ensure time never goes
+  // backwards.
+  TimeStamp mLastVideoTimeStamp;
+  // The last video image sent to the track. Useful if we need to replicate
+  // the image.
+  RefPtr<layers::Image> mLastVideoImage;
+  gfx::IntSize mLastVideoImageDisplaySize;
+  bool mHaveSentFinishAudio;
+  bool mHaveSentFinishVideo;
+
+  const RefPtr<AudioDecoderInputTrack> mAudioTrack;
+  const RefPtr<SourceMediaTrack> mVideoTrack;
+  const RefPtr<ProcessedMediaTrack> mAudioOutputTrack;
+  const RefPtr<ProcessedMediaTrack> mVideoOutputTrack;
+  const RefPtr<MediaInputPort> mAudioPort;
+  const RefPtr<MediaInputPort> mVideoPort;
+  const RefPtr<DecodedStream::EndedPromise> mAudioEndedPromise;
+  const RefPtr<DecodedStream::EndedPromise> mVideoEndedPromise;
+  const RefPtr<DecodedStreamGraphListener> mListener;
+};
+
+DecodedStreamData::DecodedStreamData(
+    PlaybackInfoInit&& aInit, MediaTrackGraph* aGraph,
+    RefPtr<ProcessedMediaTrack> aAudioOutputTrack,
+    RefPtr<ProcessedMediaTrack> aVideoOutputTrack,
+    MozPromiseHolder<DecodedStream::EndedPromise>&& aAudioEndedPromise,
+    MozPromiseHolder<DecodedStream::EndedPromise>&& aVideoEndedPromise,
+    float aPlaybackRate, float aVolume, bool aPreservesPitch,
+    nsISerialEventTarget* aDecoderThread)
+    : mAudioFramesWritten(0),
+      mVideoTrackWritten(0),
+      mNextAudioTime(aInit.mStartTime),
+      mHaveSentFinishAudio(false),
+      mHaveSentFinishVideo(false),
+      mAudioTrack(aInit.mInfo.HasAudio()
+                      ? AudioDecoderInputTrack::Create(
+                            aGraph, aDecoderThread, aInit.mInfo.mAudio,
+                            aPlaybackRate, aVolume, aPreservesPitch)
+                      : nullptr),
+      mVideoTrack(aInit.mInfo.HasVideo()
+                      ? aGraph->CreateSourceTrack(MediaSegment::VIDEO)
+                      : nullptr),
+      mAudioOutputTrack(std::move(aAudioOutputTrack)),
+      mVideoOutputTrack(std::move(aVideoOutputTrack)),
+      mAudioPort((mAudioOutputTrack && mAudioTrack)
+                     ? mAudioOutputTrack->AllocateInputPort(mAudioTrack)
+                     : nullptr),
+      mVideoPort((mVideoOutputTrack && mVideoTrack)
+                     ? mVideoOutputTrack->AllocateInputPort(mVideoTrack)
+                     : nullptr),
+      mAudioEndedPromise(aAudioEndedPromise.Ensure(__func__)),
+      mVideoEndedPromise(aVideoEndedPromise.Ensure(__func__)),
+      // DecodedStreamGraphListener will resolve these promises.
+      mListener(MakeRefPtr<DecodedStreamGraphListener>(
+          aDecoderThread, mAudioTrack, std::move(aAudioEndedPromise),
+          mVideoTrack, std::move(aVideoEndedPromise))) {
+  MOZ_ASSERT(NS_IsMainThread());
+}
+
+DecodedStreamData::~DecodedStreamData() {
+  MOZ_ASSERT(NS_IsMainThread());
+  if (mAudioTrack) {
+    mAudioTrack->Destroy();
+  }
+  if (mVideoTrack) {
+    mVideoTrack->Destroy();
+  }
+  if (mAudioPort) {
+    mAudioPort->Destroy();
+  }
+  if (mVideoPort) {
+    mVideoPort->Destroy();
+  }
+}
+
+MediaEventSource<int64_t>& DecodedStreamData::OnOutput() {
+  return mListener->OnOutput();
+}
+
+void DecodedStreamData::Close() { mListener->Close(); }
+
+void DecodedStreamData::Forget() { mListener->Forget(); }
+
+void DecodedStreamData::GetDebugInfo(dom::DecodedStreamDataDebugInfo& aInfo) {
+  CopyUTF8toUTF16(nsPrintfCString("%p", this), aInfo.mInstance);
+  aInfo.mAudioFramesWritten = mAudioFramesWritten;
+  aInfo.mStreamAudioWritten = mListener->GetAudioFramesPlayed();
+  aInfo.mNextAudioTime = mNextAudioTime.ToMicroseconds();
+  aInfo.mLastVideoStartTime =
+      mLastVideoStartTime.valueOr(TimeUnit::FromMicroseconds(-1))
+          .ToMicroseconds();
+  aInfo.mLastVideoEndTime =
+      mLastVideoEndTime.valueOr(TimeUnit::FromMicroseconds(-1))
+          .ToMicroseconds();
+  aInfo.mHaveSentFinishAudio = mHaveSentFinishAudio;
+  aInfo.mHaveSentFinishVideo = mHaveSentFinishVideo;
+}
+
+DecodedStream::DecodedStream(
+    MediaDecoderStateMachine* aStateMachine,
+    nsMainThreadPtrHandle<SharedDummyTrack> aDummyTrack,
+    CopyableTArray<RefPtr<ProcessedMediaTrack>> aOutputTracks, double aVolume,
+    double aPlaybackRate, bool aPreservesPitch,
+    MediaQueue<AudioData>& aAudioQueue, MediaQueue<VideoData>& aVideoQueue,
+    RefPtr<AudioDeviceInfo> aAudioDevice)
+    : mOwnerThread(aStateMachine->OwnerThread()),
+      mDummyTrack(std::move(aDummyTrack)),
+      mWatchManager(this, mOwnerThread),
+      mPlaying(false, "DecodedStream::mPlaying"),
+      mPrincipalHandle(aStateMachine->OwnerThread(), PRINCIPAL_HANDLE_NONE,
+                       "DecodedStream::mPrincipalHandle (Mirror)"),
+      mCanonicalOutputPrincipal(aStateMachine->CanonicalOutputPrincipal()),
+      mOutputTracks(std::move(aOutputTracks)),
+      mVolume(aVolume),
+      mPlaybackRate(aPlaybackRate),
+      mPreservesPitch(aPreservesPitch),
+      mAudioQueue(aAudioQueue),
+      mVideoQueue(aVideoQueue),
+      mAudioDevice(std::move(aAudioDevice)) {}
+
+DecodedStream::~DecodedStream() {
+  MOZ_ASSERT(mStartTime.isNothing(), "playback should've ended.");
+}
+
+RefPtr<DecodedStream::EndedPromise> DecodedStream::OnEnded(TrackType aType) {
+  AssertOwnerThread();
+  MOZ_ASSERT(mStartTime.isSome());
+
+  if (aType == TrackInfo::kAudioTrack && mInfo.HasAudio()) {
+    return mAudioEndedPromise;
+  }
+  if (aType == TrackInfo::kVideoTrack && mInfo.HasVideo()) {
+    return mVideoEndedPromise;
+  }
+  return nullptr;
+}
+
+nsresult DecodedStream::Start(const TimeUnit& aStartTime,
+                              const MediaInfo& aInfo) {
+  AssertOwnerThread();
+  MOZ_ASSERT(mStartTime.isNothing(), "playback already started.");
+
+  AUTO_PROFILER_LABEL(FUNCTION_SIGNATURE, MEDIA_PLAYBACK);
+  if (profiler_thread_is_being_profiled_for_markers()) {
+    nsPrintfCString markerString("StartTime=%" PRId64,
+                                 aStartTime.ToMicroseconds());
+    PLAYBACK_PROFILER_MARKER(markerString);
+  }
+  LOG_DS(LogLevel::Debug, "Start() mStartTime=%" PRId64,
+         aStartTime.ToMicroseconds());
+
+  mStartTime.emplace(aStartTime);
+  mLastOutputTime = TimeUnit::Zero();
+  mInfo = aInfo;
+  mPlaying = true;
+  mPrincipalHandle.Connect(mCanonicalOutputPrincipal);
+  mWatchManager.Watch(mPlaying, &DecodedStream::PlayingChanged);
+  mAudibilityMonitor.emplace(
+      mInfo.mAudio.mRate,
+      StaticPrefs::dom_media_silence_duration_for_audibility());
+  ConnectListener();
+
+  class R : public Runnable {
+   public:
+    R(PlaybackInfoInit&& aInit,
+      nsMainThreadPtrHandle<SharedDummyTrack> aDummyTrack,
+      nsTArray<RefPtr<ProcessedMediaTrack>> aOutputTracks,
+      MozPromiseHolder<MediaSink::EndedPromise>&& aAudioEndedPromise,
+      MozPromiseHolder<MediaSink::EndedPromise>&& aVideoEndedPromise,
+      float aPlaybackRate, float aVolume, bool aPreservesPitch,
+      nsISerialEventTarget* aDecoderThread)
+        : Runnable("CreateDecodedStreamData"),
+          mInit(std::move(aInit)),
+          mDummyTrack(std::move(aDummyTrack)),
+          mOutputTracks(std::move(aOutputTracks)),
+          mAudioEndedPromise(std::move(aAudioEndedPromise)),
+          mVideoEndedPromise(std::move(aVideoEndedPromise)),
+          mPlaybackRate(aPlaybackRate),
+          mVolume(aVolume),
+          mPreservesPitch(aPreservesPitch),
+          mDecoderThread(aDecoderThread) {}
+    NS_IMETHOD Run() override {
+      MOZ_ASSERT(NS_IsMainThread());
+      RefPtr<ProcessedMediaTrack> audioOutputTrack;
+      RefPtr<ProcessedMediaTrack> videoOutputTrack;
+      for (const auto& track : mOutputTracks) {
+        if (track->mType == MediaSegment::AUDIO) {
+          MOZ_DIAGNOSTIC_ASSERT(
+              !audioOutputTrack,
+              "We only support capturing to one output track per kind");
+          audioOutputTrack = track;
+        } else if (track->mType == MediaSegment::VIDEO) {
+          MOZ_DIAGNOSTIC_ASSERT(
+              !videoOutputTrack,
+              "We only support capturing to one output track per kind");
+          videoOutputTrack = track;
+        } else {
+          MOZ_CRASH("Unknown media type");
+        }
+      }
+      if (!mDummyTrack) {
+        // No dummy track - no graph. This could be intentional as the owning
+        // media element needs access to the tracks on main thread to set up
+        // forwarding of them before playback starts. MDSM will re-create
+        // DecodedStream once a dummy track is available. This effectively halts
+        // playback for this DecodedStream.
+        return NS_OK;
+      }
+      if ((audioOutputTrack && audioOutputTrack->IsDestroyed()) ||
+          (videoOutputTrack && videoOutputTrack->IsDestroyed())) {
+        // A track has been destroyed and we'll soon get re-created with a
+        // proper one. This effectively halts playback for this DecodedStream.
+        return NS_OK;
+      }
+      mData = MakeUnique<DecodedStreamData>(
+          std::move(mInit), mDummyTrack->mTrack->Graph(),
+          std::move(audioOutputTrack), std::move(videoOutputTrack),
+          std::move(mAudioEndedPromise), std::move(mVideoEndedPromise),
+          mPlaybackRate, mVolume, mPreservesPitch, mDecoderThread);
+      return NS_OK;
+    }
+    UniquePtr<DecodedStreamData> ReleaseData() { return std::move(mData); }
+
+   private:
+    PlaybackInfoInit mInit;
+    nsMainThreadPtrHandle<SharedDummyTrack> mDummyTrack;
+    const nsTArray<RefPtr<ProcessedMediaTrack>> mOutputTracks;
+    MozPromiseHolder<MediaSink::EndedPromise> mAudioEndedPromise;
+    MozPromiseHolder<MediaSink::EndedPromise> mVideoEndedPromise;
+    UniquePtr<DecodedStreamData> mData;
+    const float mPlaybackRate;
+    const float mVolume;
+    const bool mPreservesPitch;
+    const RefPtr<nsISerialEventTarget> mDecoderThread;
+  };
+
+  MozPromiseHolder<DecodedStream::EndedPromise> audioEndedHolder;
+  MozPromiseHolder<DecodedStream::EndedPromise> videoEndedHolder;
+  PlaybackInfoInit init{aStartTime, aInfo};
+  nsCOMPtr<nsIRunnable> r =
+      new R(std::move(init), mDummyTrack, mOutputTracks.Clone(),
+            std::move(audioEndedHolder), std::move(videoEndedHolder),
+            static_cast<float>(mPlaybackRate), static_cast<float>(mVolume),
+            mPreservesPitch, mOwnerThread);
+  SyncRunnable::DispatchToThread(GetMainThreadSerialEventTarget(), r);
+  mData = static_cast<R*>(r.get())->ReleaseData();
+
+  if (mData) {
+    mAudioEndedPromise = mData->mAudioEndedPromise;
+    mVideoEndedPromise = mData->mVideoEndedPromise;
+    mOutputListener = mData->OnOutput().Connect(mOwnerThread, this,
+                                                &DecodedStream::NotifyOutput);
+    SendData();
+  }
+  return NS_OK;
+}
+
+void DecodedStream::Stop() {
+  AssertOwnerThread();
+  MOZ_ASSERT(mStartTime.isSome(), "playback not started.");
+
+  TRACE("DecodedStream::Stop");
+  LOG_DS(LogLevel::Debug, "Stop()");
+
+  DisconnectListener();
+  ResetVideo(mPrincipalHandle);
+  ResetAudio();
+  mStartTime.reset();
+  mAudioEndedPromise = nullptr;
+  mVideoEndedPromise = nullptr;
+
+  // Clear mData immediately when this playback session ends so we won't
+  // send data to the wrong track in SendData() in next playback session.
+  DestroyData(std::move(mData));
+
+  mPrincipalHandle.DisconnectIfConnected();
+  mWatchManager.Unwatch(mPlaying, &DecodedStream::PlayingChanged);
+  mAudibilityMonitor.reset();
+}
+
+bool DecodedStream::IsStarted() const {
+  AssertOwnerThread();
+  return mStartTime.isSome();
+}
+
+bool DecodedStream::IsPlaying() const {
+  AssertOwnerThread();
+  return IsStarted() && mPlaying;
+}
+
+void DecodedStream::Shutdown() {
+  AssertOwnerThread();
+  mPrincipalHandle.DisconnectIfConnected();
+  mWatchManager.Shutdown();
+}
+
+void DecodedStream::DestroyData(UniquePtr<DecodedStreamData>&& aData) {
+  AssertOwnerThread();
+
+  if (!aData) {
+    return;
+  }
+
+  TRACE("DecodedStream::DestroyData");
+  mOutputListener.Disconnect();
+
+  aData->Close();
+  NS_DispatchToMainThread(
+      NS_NewRunnableFunction("DecodedStream::DestroyData",
+                             [data = std::move(aData)]() { data->Forget(); }));
+}
+
+void DecodedStream::SetPlaying(bool aPlaying) {
+  AssertOwnerThread();
+
+  // Resume/pause matters only when playback started.
+  if (mStartTime.isNothing()) {
+    return;
+  }
+
+  if (profiler_thread_is_being_profiled_for_markers()) {
+    nsPrintfCString markerString("Playing=%s", aPlaying ? "true" : "false");
+    PLAYBACK_PROFILER_MARKER(markerString);
+  }
+  LOG_DS(LogLevel::Debug, "playing (%d) -> (%d)", mPlaying.Ref(), aPlaying);
+  mPlaying = aPlaying;
+}
+
+void DecodedStream::SetVolume(double aVolume) {
+  AssertOwnerThread();
+  if (profiler_thread_is_being_profiled_for_markers()) {
+    nsPrintfCString markerString("Volume=%f", aVolume);
+    PLAYBACK_PROFILER_MARKER(markerString);
+  }
+  if (mVolume == aVolume) {
+    return;
+  }
+  mVolume = aVolume;
+  if (mData && mData->mAudioTrack) {
+    mData->mAudioTrack->SetVolume(static_cast<float>(aVolume));
+  }
+}
+
+void DecodedStream::SetPlaybackRate(double aPlaybackRate) {
+  AssertOwnerThread();
+  if (profiler_thread_is_being_profiled_for_markers()) {
+    nsPrintfCString markerString("PlaybackRate=%f", aPlaybackRate);
+    PLAYBACK_PROFILER_MARKER(markerString);
+  }
+  if (mPlaybackRate == aPlaybackRate) {
+    return;
+  }
+  mPlaybackRate = aPlaybackRate;
+  if (mData && mData->mAudioTrack) {
+    mData->mAudioTrack->SetPlaybackRate(static_cast<float>(aPlaybackRate));
+  }
+}
+
+void DecodedStream::SetPreservesPitch(bool aPreservesPitch) {
+  AssertOwnerThread();
+  if (profiler_thread_is_being_profiled_for_markers()) {
+    nsPrintfCString markerString("PreservesPitch=%s",
+                                 aPreservesPitch ? "true" : "false");
+    PLAYBACK_PROFILER_MARKER(markerString);
+  }
+  if (mPreservesPitch == aPreservesPitch) {
+    return;
+  }
+  mPreservesPitch = aPreservesPitch;
+  if (mData && mData->mAudioTrack) {
+    mData->mAudioTrack->SetPreservesPitch(aPreservesPitch);
+  }
+}
+
+double DecodedStream::PlaybackRate() const {
+  AssertOwnerThread();
+  return mPlaybackRate;
+}
+
+void DecodedStream::SendAudio(const PrincipalHandle& aPrincipalHandle) {
+  AssertOwnerThread();
+
+  if (!mInfo.HasAudio()) {
+    return;
+  }
+
+  if (mData->mHaveSentFinishAudio) {
+    return;
+  }
+
+  TRACE("DecodedStream::SendAudio");
+  // It's OK to hold references to the AudioData because AudioData
+  // is ref-counted.
+  AutoTArray<RefPtr<AudioData>, 10> audio;
+  mAudioQueue.GetElementsAfter(mData->mNextAudioTime, &audio);
+
+  // This will happen everytime when the media sink switches from `AudioSink` to
+  // `DecodedStream`. If we don't insert the silence then the A/V will be out of
+  // sync.
+  RefPtr<AudioData> nextAudio = audio.IsEmpty() ? nullptr : audio[0];
+  if (RefPtr<AudioData> silence = CreateSilenceDataIfGapExists(nextAudio)) {
+    LOG_DS(LogLevel::Verbose, "Detect a gap in audio, insert silence=%u",
+           silence->Frames());
+    audio.InsertElementAt(0, silence);
+  }
+
+  // Append data which hasn't been sent to audio track before.
+  mData->mAudioTrack->AppendData(audio, aPrincipalHandle);
+  for (uint32_t i = 0; i < audio.Length(); ++i) {
+    CheckIsDataAudible(audio[i]);
+    mData->mNextAudioTime = audio[i]->GetEndTime();
+    mData->mAudioFramesWritten += audio[i]->Frames();
+  }
+
+  if (mAudioQueue.IsFinished() && !mData->mHaveSentFinishAudio) {
+    mData->mAudioTrack->NotifyEndOfStream();
+    mData->mHaveSentFinishAudio = true;
+  }
+}
+
+already_AddRefed<AudioData> DecodedStream::CreateSilenceDataIfGapExists(
+    RefPtr<AudioData>& aNextAudio) {
+  AssertOwnerThread();
+  if (!aNextAudio) {
+    return nullptr;
+  }
+  CheckedInt64 audioWrittenOffset =
+      mData->mAudioFramesWritten +
+      TimeUnitToFrames(*mStartTime, aNextAudio->mRate);
+  CheckedInt64 frameOffset =
+      TimeUnitToFrames(aNextAudio->mTime, aNextAudio->mRate);
+  if (audioWrittenOffset.value() >= frameOffset.value()) {
+    return nullptr;
+  }
+  // We've written less audio than our frame offset, return a silence data so we
+  // have enough audio to be at the correct offset for our current frames.
+  CheckedInt64 missingFrames = frameOffset - audioWrittenOffset;
+  AlignedAudioBuffer silenceBuffer(missingFrames.value() *
+                                   aNextAudio->mChannels);
+  if (!silenceBuffer) {
+    NS_WARNING("OOM in DecodedStream::CreateSilenceDataIfGapExists");
+    return nullptr;
+  }
+  auto duration = media::TimeUnit(missingFrames.value(), aNextAudio->mRate);
+  if (!duration.IsValid()) {
+    NS_WARNING("Int overflow in DecodedStream::CreateSilenceDataIfGapExists");
+    return nullptr;
+  }
+  RefPtr<AudioData> silenceData = new AudioData(
+      aNextAudio->mOffset, aNextAudio->mTime, std::move(silenceBuffer),
+      aNextAudio->mChannels, aNextAudio->mRate);
+  MOZ_DIAGNOSTIC_ASSERT(duration == silenceData->mDuration, "must be equal");
+  return silenceData.forget();
+}
+
+void DecodedStream::CheckIsDataAudible(const AudioData* aData) {
+  MOZ_ASSERT(aData);
+
+  mAudibilityMonitor->Process(aData);
+  bool isAudible = mAudibilityMonitor->RecentlyAudible();
+
+  if (isAudible != mIsAudioDataAudible) {
+    mIsAudioDataAudible = isAudible;
+    mAudibleEvent.Notify(mIsAudioDataAudible);
+  }
+}
+
+void DecodedStreamData::WriteVideoToSegment(
+    layers::Image* aImage, const TimeUnit& aStart, const TimeUnit& aEnd,
+    const gfx::IntSize& aIntrinsicSize, const TimeStamp& aTimeStamp,
+    VideoSegment* aOutput, const PrincipalHandle& aPrincipalHandle,
+    double aPlaybackRate) {
+  RefPtr<layers::Image> image = aImage;
+  aOutput->AppendFrame(image.forget(), aIntrinsicSize, aPrincipalHandle, false,
+                       aTimeStamp);
+  // Extend this so we get accurate durations for all frames.
+  // Because this track is pushed, we need durations so the graph can track
+  // when playout of the track has finished.
+  MOZ_ASSERT(aPlaybackRate > 0);
+  TrackTime start = aStart.ToTicksAtRate(mVideoTrack->mSampleRate);
+  TrackTime end = aEnd.ToTicksAtRate(mVideoTrack->mSampleRate);
+  aOutput->ExtendLastFrameBy(
+      static_cast<TrackTime>((float)(end - start) / aPlaybackRate));
+
+  mLastVideoStartTime = Some(aStart);
+  mLastVideoEndTime = Some(aEnd);
+  mLastVideoTimeStamp = aTimeStamp;
+}
+
+static bool ZeroDurationAtLastChunk(VideoSegment& aInput) {
+  // Get the last video frame's start time in VideoSegment aInput.
+  // If the start time is equal to the duration of aInput, means the last video
+  // frame's duration is zero.
+  TrackTime lastVideoStratTime;
+  aInput.GetLastFrame(&lastVideoStratTime);
+  return lastVideoStratTime == aInput.GetDuration();
+}
+
+void DecodedStream::ResetAudio() {
+  AssertOwnerThread();
+
+  if (!mData) {
+    return;
+  }
+
+  if (!mInfo.HasAudio()) {
+    return;
+  }
+
+  TRACE("DecodedStream::ResetAudio");
+  mData->mAudioTrack->ClearFutureData();
+  if (const RefPtr<AudioData>& v = mAudioQueue.PeekFront()) {
+    mData->mNextAudioTime = v->mTime;
+    mData->mHaveSentFinishAudio = false;
+  }
+}
+
+void DecodedStream::ResetVideo(const PrincipalHandle& aPrincipalHandle) {
+  AssertOwnerThread();
+
+  if (!mData) {
+    return;
+  }
+
+  if (!mInfo.HasVideo()) {
+    return;
+  }
+
+  TRACE("DecodedStream::ResetVideo");
+  TrackTime cleared = mData->mVideoTrack->ClearFutureData();
+  mData->mVideoTrackWritten -= cleared;
+  if (mData->mHaveSentFinishVideo && cleared > 0) {
+    mData->mHaveSentFinishVideo = false;
+    mData->mListener->EndVideoTrackAt(mData->mVideoTrack, TRACK_TIME_MAX);
+  }
+
+  VideoSegment resetter;
+  TimeStamp currentTime;
+  TimeUnit currentPosition = GetPosition(&currentTime);
+
+  // Giving direct consumers a frame (really *any* frame, so in this case:
+  // nullptr) at an earlier time than the previous, will signal to that consumer
+  // to discard any frames ahead in time of the new frame. To be honest, this is
+  // an ugly hack because the direct listeners of the MediaTrackGraph do not
+  // have an API that supports clearing the future frames. ImageContainer and
+  // VideoFrameContainer do though, and we will need to move to a similar API
+  // for video tracks as part of bug 1493618.
+  resetter.AppendFrame(nullptr, mData->mLastVideoImageDisplaySize,
+                       aPrincipalHandle, false, currentTime);
+  mData->mVideoTrack->AppendData(&resetter);
+
+  // Consumer buffers have been reset. We now set the next time to the start
+  // time of the current frame, so that it can be displayed again on resuming.
+  if (RefPtr<VideoData> v = mVideoQueue.PeekFront()) {
+    mData->mLastVideoStartTime = Some(v->mTime - TimeUnit::FromMicroseconds(1));
+    mData->mLastVideoEndTime = Some(v->mTime);
+  } else {
+    // There was no current frame in the queue. We set the next time to the
+    // current time, so we at least don't resume starting in the future.
+    mData->mLastVideoStartTime =
+        Some(currentPosition - TimeUnit::FromMicroseconds(1));
+    mData->mLastVideoEndTime = Some(currentPosition);
+  }
+
+  mData->mLastVideoTimeStamp = currentTime;
+}
+
+void DecodedStream::SendVideo(const PrincipalHandle& aPrincipalHandle) {
+  AssertOwnerThread();
+
+  if (!mInfo.HasVideo()) {
+    return;
+  }
+
+  if (mData->mHaveSentFinishVideo) {
+    return;
+  }
+
+  TRACE("DecodedStream::SendVideo");
+  VideoSegment output;
+  AutoTArray<RefPtr<VideoData>, 10> video;
+
+  // It's OK to hold references to the VideoData because VideoData
+  // is ref-counted.
+  mVideoQueue.GetElementsAfter(
+      mData->mLastVideoStartTime.valueOr(mStartTime.ref()), &video);
+
+  TimeStamp currentTime;
+  TimeUnit currentPosition = GetPosition(&currentTime);
+
+  if (mData->mLastVideoTimeStamp.IsNull()) {
+    mData->mLastVideoTimeStamp = currentTime;
+  }
+
+  for (uint32_t i = 0; i < video.Length(); ++i) {
+    VideoData* v = video[i];
+    TimeUnit lastStart = mData->mLastVideoStartTime.valueOr(
+        mStartTime.ref() - TimeUnit::FromMicroseconds(1));
+    TimeUnit lastEnd = mData->mLastVideoEndTime.valueOr(mStartTime.ref());
+
+    if (lastEnd < v->mTime) {
+      // Write last video frame to catch up. mLastVideoImage can be null here
+      // which is fine, it just means there's no video.
+
+      // TODO: |mLastVideoImage| should come from the last image rendered
+      // by the state machine. This will avoid the black frame when capture
+      // happens in the middle of playback (especially in th middle of a
+      // video frame). E.g. if we have a video frame that is 30 sec long
+      // and capture happens at 15 sec, we'll have to append a black frame
+      // that is 15 sec long.
+      TimeStamp t =
+          std::max(mData->mLastVideoTimeStamp,
+                   currentTime + (lastEnd - currentPosition).ToTimeDuration());
+      mData->WriteVideoToSegment(mData->mLastVideoImage, lastEnd, v->mTime,
+                                 mData->mLastVideoImageDisplaySize, t, &output,
+                                 aPrincipalHandle, mPlaybackRate);
+      lastEnd = v->mTime;
+    }
+
+    if (lastStart < v->mTime) {
+      // This frame starts after the last frame's start. Note that this could be
+      // before the last frame's end time for some videos. This only matters for
+      // the track's lifetime in the MTG, as rendering is based on timestamps,
+      // aka frame start times.
+      TimeStamp t =
+          std::max(mData->mLastVideoTimeStamp,
+                   currentTime + (lastEnd - currentPosition).ToTimeDuration());
+      TimeUnit end = std::max(
+          v->GetEndTime(),
+          lastEnd + TimeUnit::FromMicroseconds(
+                        mData->mVideoTrack->TrackTimeToMicroseconds(1) + 1));
+      mData->mLastVideoImage = v->mImage;
+      mData->mLastVideoImageDisplaySize = v->mDisplay;
+      mData->WriteVideoToSegment(v->mImage, lastEnd, end, v->mDisplay, t,
+                                 &output, aPrincipalHandle, mPlaybackRate);
+    }
+  }
+
+  // Check the output is not empty.
+  bool compensateEOS = false;
+  bool forceBlack = false;
+  if (output.GetLastFrame()) {
+    compensateEOS = ZeroDurationAtLastChunk(output);
+  }
+
+  if (output.GetDuration() > 0) {
+    mData->mVideoTrackWritten += mData->mVideoTrack->AppendData(&output);
+  }
+
+  if (mVideoQueue.IsFinished() && !mData->mHaveSentFinishVideo) {
+    if (!mData->mLastVideoImage) {
+      // We have video, but the video queue finished before we received any
+      // frame. We insert a black frame to progress any consuming
+      // HTMLMediaElement. This mirrors the behavior of VideoSink.
+
+      // Force a frame - can be null
+      compensateEOS = true;
+      // Force frame to be black
+      forceBlack = true;
+      // Override the frame's size (will be 0x0 otherwise)
+      mData->mLastVideoImageDisplaySize = mInfo.mVideo.mDisplay;
+      LOG_DS(LogLevel::Debug, "No mLastVideoImage");
+    }
+    if (compensateEOS) {
+      VideoSegment endSegment;
+      auto start = mData->mLastVideoEndTime.valueOr(mStartTime.ref());
+      mData->WriteVideoToSegment(
+          mData->mLastVideoImage, start, start,
+          mData->mLastVideoImageDisplaySize,
+          currentTime + (start - currentPosition).ToTimeDuration(), &endSegment,
+          aPrincipalHandle, mPlaybackRate);
+      // ForwardedInputTrack drops zero duration frames, even at the end of
+      // the track.  Give the frame a minimum duration so that it is not
+      // dropped.
+      endSegment.ExtendLastFrameBy(1);
+      LOG_DS(LogLevel::Debug,
+             "compensateEOS: start %s, duration %" PRId64
+             ", mPlaybackRate %lf, sample rate %" PRId32,
+             start.ToString().get(), endSegment.GetDuration(), mPlaybackRate,
+             mData->mVideoTrack->mSampleRate);
+      MOZ_ASSERT(endSegment.GetDuration() > 0);
+      if (forceBlack) {
+        endSegment.ReplaceWithDisabled();
+      }
+      mData->mVideoTrackWritten += mData->mVideoTrack->AppendData(&endSegment);
+    }
+    mData->mListener->EndVideoTrackAt(mData->mVideoTrack,
+                                      mData->mVideoTrackWritten);
+    mData->mHaveSentFinishVideo = true;
+  }
+}
+
+void DecodedStream::SendData() {
+  AssertOwnerThread();
+
+  // Not yet created on the main thread. MDSM will try again later.
+  if (!mData) {
+    return;
+  }
+
+  if (!mPlaying) {
+    return;
+  }
+
+  LOG_DS(LogLevel::Verbose, "SendData()");
+  SendAudio(mPrincipalHandle);
+  SendVideo(mPrincipalHandle);
+}
+
+TimeUnit DecodedStream::GetEndTime(TrackType aType) const {
+  AssertOwnerThread();
+  TRACE("DecodedStream::GetEndTime");
+  if (aType == TrackInfo::kAudioTrack && mInfo.HasAudio() && mData) {
+    auto t = mStartTime.ref() +
+             media::TimeUnit(mData->mAudioFramesWritten, mInfo.mAudio.mRate);
+    if (t.IsValid()) {
+      return t;
+    }
+  } else if (aType == TrackInfo::kVideoTrack && mData) {
+    return mData->mLastVideoEndTime.valueOr(mStartTime.ref());
+  }
+  return TimeUnit::Zero();
+}
+
+TimeUnit DecodedStream::GetPosition(TimeStamp* aTimeStamp) {
+  AssertOwnerThread();
+  TRACE("DecodedStream::GetPosition");
+  // This is only called after MDSM starts playback. So mStartTime is
+  // guaranteed to be something.
+  MOZ_ASSERT(mStartTime.isSome());
+  if (aTimeStamp) {
+    *aTimeStamp = TimeStamp::Now();
+  }
+  return mStartTime.ref() + mLastOutputTime;
+}
+
+void DecodedStream::NotifyOutput(int64_t aTime) {
+  AssertOwnerThread();
+  TimeUnit time = TimeUnit::FromMicroseconds(aTime);
+  if (time == mLastOutputTime) {
+    return;
+  }
+  MOZ_ASSERT(mLastOutputTime < time);
+  mLastOutputTime = time;
+  auto currentTime = GetPosition();
+
+  if (profiler_thread_is_being_profiled_for_markers()) {
+    nsPrintfCString markerString("OutputTime=%" PRId64,
+                                 currentTime.ToMicroseconds());
+    PLAYBACK_PROFILER_MARKER(markerString);
+  }
+  LOG_DS(LogLevel::Verbose, "time is now %" PRId64,
+         currentTime.ToMicroseconds());
+
+  // Remove audio samples that have been played by MTG from the queue.
+  RefPtr<AudioData> a = mAudioQueue.PeekFront();
+  for (; a && a->GetEndTime() <= currentTime;) {
+    LOG_DS(LogLevel::Debug, "Dropping audio [%" PRId64 ",%" PRId64 "]",
+           a->mTime.ToMicroseconds(), a->GetEndTime().ToMicroseconds());
+    RefPtr<AudioData> releaseMe = mAudioQueue.PopFront();
+    a = mAudioQueue.PeekFront();
+  }
+}
+
+void DecodedStream::PlayingChanged() {
+  AssertOwnerThread();
+  TRACE("DecodedStream::PlayingChanged");
+
+  if (!mPlaying) {
+    // On seek or pause we discard future frames.
+    ResetVideo(mPrincipalHandle);
+    ResetAudio();
+  }
+}
+
+void DecodedStream::ConnectListener() {
+  AssertOwnerThread();
+
+  mAudioPushListener = mAudioQueue.PushEvent().Connect(
+      mOwnerThread, this, &DecodedStream::SendData);
+  mAudioFinishListener = mAudioQueue.FinishEvent().Connect(
+      mOwnerThread, this, &DecodedStream::SendData);
+  mVideoPushListener = mVideoQueue.PushEvent().Connect(
+      mOwnerThread, this, &DecodedStream::SendData);
+  mVideoFinishListener = mVideoQueue.FinishEvent().Connect(
+      mOwnerThread, this, &DecodedStream::SendData);
+  mWatchManager.Watch(mPlaying, &DecodedStream::SendData);
+}
+
+void DecodedStream::DisconnectListener() {
+  AssertOwnerThread();
+
+  mAudioPushListener.Disconnect();
+  mVideoPushListener.Disconnect();
+  mAudioFinishListener.Disconnect();
+  mVideoFinishListener.Disconnect();
+  mWatchManager.Unwatch(mPlaying, &DecodedStream::SendData);
+}
+
+void DecodedStream::GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) {
+  AssertOwnerThread();
+  int64_t startTime = mStartTime.isSome() ? mStartTime->ToMicroseconds() : -1;
+  aInfo.mDecodedStream.mInstance =
+      NS_ConvertUTF8toUTF16(nsPrintfCString("%p", this));
+  aInfo.mDecodedStream.mStartTime = startTime;
+  aInfo.mDecodedStream.mLastOutputTime = mLastOutputTime.ToMicroseconds();
+  aInfo.mDecodedStream.mPlaying = mPlaying.Ref();
+  auto lastAudio = mAudioQueue.PeekBack();
+  aInfo.mDecodedStream.mLastAudio =
+      lastAudio ? lastAudio->GetEndTime().ToMicroseconds() : -1;
+  aInfo.mDecodedStream.mAudioQueueFinished = mAudioQueue.IsFinished();
+  aInfo.mDecodedStream.mAudioQueueSize =
+      AssertedCast<int>(mAudioQueue.GetSize());
+  if (mData) {
+    mData->GetDebugInfo(aInfo.mDecodedStream.mData);
+  }
+}
+
+#undef LOG_DS
+
+}  // namespace mozilla
diff --git a/dom/media/mediasink/DecodedStream.h b/dom/media/mediasink/DecodedStream.h
new file mode 100644
index 0000000000..4709ffeda6
--- /dev/null
+++ b/dom/media/mediasink/DecodedStream.h
@@ -0,0 +1,154 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef DecodedStream_h_
+#define DecodedStream_h_
+
+#include "AudibilityMonitor.h"
+#include "MediaEventSource.h"
+#include "MediaInfo.h"
+#include "MediaSegment.h"
+#include "MediaSink.h"
+
+#include "mozilla/AbstractThread.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/MozPromise.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/StateMirroring.h"
+#include "mozilla/UniquePtr.h"
+
+namespace mozilla {
+
+class DecodedStreamData;
+class MediaDecoderStateMachine;
+class AudioData;
+class VideoData;
+struct PlaybackInfoInit;
+class ProcessedMediaTrack;
+class TimeStamp;
+
+template <class T>
+class MediaQueue;
+
+class DecodedStream : public MediaSink {
+ public:
+  DecodedStream(MediaDecoderStateMachine* aStateMachine,
+                nsMainThreadPtrHandle<SharedDummyTrack> aDummyTrack,
+                CopyableTArray<RefPtr<ProcessedMediaTrack>> aOutputTracks,
+                double aVolume, double aPlaybackRate, bool aPreservesPitch,
+                MediaQueue<AudioData>& aAudioQueue,
+                MediaQueue<VideoData>& aVideoQueue,
+                RefPtr<AudioDeviceInfo> aAudioDevice);
+
+  RefPtr<EndedPromise> OnEnded(TrackType aType) override;
+  media::TimeUnit GetEndTime(TrackType aType) const override;
+  media::TimeUnit GetPosition(TimeStamp* aTimeStamp = nullptr) override;
+  bool HasUnplayedFrames(TrackType aType) const override {
+    // TODO: bug 1755026
+    return false;
+  }
+
+  media::TimeUnit UnplayedDuration(TrackType aType) const override {
+    // TODO: bug 1755026
+    return media::TimeUnit::Zero();
+  }
+
+  void SetVolume(double aVolume) override;
+  void SetPlaybackRate(double aPlaybackRate) override;
+  void SetPreservesPitch(bool aPreservesPitch) override;
+  void SetPlaying(bool aPlaying) override;
+
+  double PlaybackRate() const override;
+
+  nsresult Start(const media::TimeUnit& aStartTime,
+                 const MediaInfo& aInfo) override;
+  void Stop() override;
+  bool IsStarted() const override;
+  bool IsPlaying() const override;
+  void Shutdown() override;
+  void GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) override;
+  const AudioDeviceInfo* AudioDevice() const override { return mAudioDevice; }
+
+  MediaEventSource<bool>& AudibleEvent() { return mAudibleEvent; }
+
+ protected:
+  virtual ~DecodedStream();
+
+ private:
+  void DestroyData(UniquePtr<DecodedStreamData>&& aData);
+  void SendAudio(const PrincipalHandle& aPrincipalHandle);
+  void SendVideo(const PrincipalHandle& aPrincipalHandle);
+  void ResetAudio();
+  void ResetVideo(const PrincipalHandle& aPrincipalHandle);
+  void SendData();
+  void NotifyOutput(int64_t aTime);
+  void CheckIsDataAudible(const AudioData* aData);
+
+  void AssertOwnerThread() const {
+    MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
+  }
+
+  void PlayingChanged();
+
+  void ConnectListener();
+  void DisconnectListener();
+
+  // Give the audio that is going to be appended next as an input, if there is
+  // a gap between audio's time and the frames that we've written, then return
+  // a silence data that has same amount of frames and can be used to fill the
+  // gap. If no gap exists, return nullptr.
+  already_AddRefed<AudioData> CreateSilenceDataIfGapExists(
+      RefPtr<AudioData>& aNextAudio);
+
+  const RefPtr<AbstractThread> mOwnerThread;
+
+  // Used to access the graph.
+  const nsMainThreadPtrHandle<SharedDummyTrack> mDummyTrack;
+
+  /*
+   * Worker thread only members.
+   */
+  WatchManager<DecodedStream> mWatchManager;
+  UniquePtr<DecodedStreamData> mData;
+  RefPtr<EndedPromise> mAudioEndedPromise;
+  RefPtr<EndedPromise> mVideoEndedPromise;
+
+  Watchable<bool> mPlaying;
+  Mirror<PrincipalHandle> mPrincipalHandle;
+  AbstractCanonical<PrincipalHandle>* mCanonicalOutputPrincipal;
+  const nsTArray<RefPtr<ProcessedMediaTrack>> mOutputTracks;
+
+  double mVolume;
+  double mPlaybackRate;
+  bool mPreservesPitch;
+
+  media::NullableTimeUnit mStartTime;
+  media::TimeUnit mLastOutputTime;
+  MediaInfo mInfo;
+  // True when stream is producing audible sound, false when stream is silent.
+  bool mIsAudioDataAudible = false;
+  Maybe<AudibilityMonitor> mAudibilityMonitor;
+  MediaEventProducer<bool> mAudibleEvent;
+
+  MediaQueue<AudioData>& mAudioQueue;
+  MediaQueue<VideoData>& mVideoQueue;
+
+  // This is the audio device we were told to play out to.
+  // All audio is captured, so nothing is actually played out -- but we report
+  // this upwards as it could save us from being recreated when the sink
+  // changes.
+  const RefPtr<AudioDeviceInfo> mAudioDevice;
+
+  MediaEventListener mAudioPushListener;
+  MediaEventListener mVideoPushListener;
+  MediaEventListener mAudioFinishListener;
+  MediaEventListener mVideoFinishListener;
+  MediaEventListener mOutputListener;
+};
+
+}  // namespace mozilla
+
+#endif  // DecodedStream_h_
diff --git a/dom/media/mediasink/MediaSink.h b/dom/media/mediasink/MediaSink.h
new file mode 100644
index 0000000000..de6f26dcc9
--- /dev/null
+++ b/dom/media/mediasink/MediaSink.h
@@ -0,0 +1,142 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MediaSink_h_
+#define MediaSink_h_
+
+#include "MediaInfo.h"
+#include "mozilla/MozPromise.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/dom/MediaDebugInfoBinding.h"
+#include "nsISupportsImpl.h"
+
+class AudioDeviceInfo;
+
+namespace mozilla {
+
+class TimeStamp;
+class VideoFrameContainer;
+
+/**
+ * A consumer of audio/video data which plays audio and video tracks and
+ * manages A/V sync between them.
+ *
+ * A typical sink sends audio/video outputs to the speaker and screen.
+ * However, there are also sinks which capture the output of an media element
+ * and send the output to a MediaStream.
+ *
+ * This class is used to move A/V sync management and audio/video rendering
+ * out of MDSM so it is possible for subclasses to do external rendering using
+ * specific hardware which is required by TV projects and CDM.
+ *
+ * Note this class is not thread-safe and should be called from the state
+ * machine thread only.
+ */
+class MediaSink {
+ public:
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MediaSink);
+  typedef mozilla::TrackInfo::TrackType TrackType;
+
+  // EndedPromise needs to be a non-exclusive promise as it is shared between
+  // both the AudioSink and VideoSink.
+  typedef MozPromise<bool, nsresult, /* IsExclusive = */ false> EndedPromise;
+
+  // Return a promise which is resolved when the track finishes
+  // or null if no such track.
+  // Must be called after playback starts.
+  virtual RefPtr<EndedPromise> OnEnded(TrackType aType) = 0;
+
+  // Return the end time of the audio/video data that has been consumed
+  // or 0 if no such track.
+  // Must be called after playback starts.
+  virtual media::TimeUnit GetEndTime(TrackType aType) const = 0;
+
+  // Return playback position of the media.
+  // Since A/V sync is always maintained by this sink, there is no need to
+  // specify whether we want to get audio or video position.
+  // aTimeStamp returns the timeStamp corresponding to the returned position
+  // which is used by the compositor to derive the render time of video frames.
+  // Must be called after playback starts.
+  virtual media::TimeUnit GetPosition(TimeStamp* aTimeStamp = nullptr) = 0;
+
+  // Return true if there are data consumed but not played yet.
+  // Can be called in any state.
+  virtual bool HasUnplayedFrames(TrackType aType) const = 0;
+
+  // Return the duration of data consumed but not played yet.
+  // Can be called in any state.
+  virtual media::TimeUnit UnplayedDuration(TrackType aType) const = 0;
+
+  // Set volume of the audio track.
+  // Do nothing if this sink has no audio track.
+  // Can be called in any state.
+  virtual void SetVolume(double aVolume) {}
+
+  // Set the audio stream name.
+  // Does nothing if this sink has no audio stream.
+  // Can be called in any state.
+  virtual void SetStreamName(const nsAString& aStreamName) {}
+
+  // Set the playback rate.
+  // Can be called in any state.
+  virtual void SetPlaybackRate(double aPlaybackRate) {}
+
+  // Whether to preserve pitch of the audio track.
+  // Do nothing if this sink has no audio track.
+  // Can be called in any state.
+  virtual void SetPreservesPitch(bool aPreservesPitch) {}
+
+  // Pause/resume the playback. Only work after playback starts.
+  virtual void SetPlaying(bool aPlaying) = 0;
+
+  // Get the playback rate.
+  // Can be called in any state.
+  virtual double PlaybackRate() const = 0;
+
+  // Single frame rendering operation may need to be done before playback
+  // started (1st frame) or right after seek completed or playback stopped.
+  // Do nothing if this sink has no video track. Can be called in any state.
+  virtual void Redraw(const VideoInfo& aInfo){};
+
+  // Begin a playback session with the provided start time and media info.
+  // Must be called when playback is stopped.
+  virtual nsresult Start(const media::TimeUnit& aStartTime,
+                         const MediaInfo& aInfo) = 0;
+
+  // Finish a playback session.
+  // Must be called after playback starts.
+  virtual void Stop() = 0;
+
+  // Return true if playback has started.
+  // Can be called in any state.
+  virtual bool IsStarted() const = 0;
+
+  // Return true if playback is started and not paused otherwise false.
+  // Can be called in any state.
+  virtual bool IsPlaying() const = 0;
+
+  // The audio output device this MediaSink is playing audio data to. The
+  // default device is used if this returns null.
+  virtual const AudioDeviceInfo* AudioDevice() const = 0;
+
+  // Called on the state machine thread to shut down the sink. All resources
+  // allocated by this sink should be released.
+  // Must be called after playback stopped.
+  virtual void Shutdown() {}
+
+  virtual void SetSecondaryVideoContainer(VideoFrameContainer* aSecondary) {}
+
+  virtual void GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) {}
+
+  virtual void EnableTreatAudioUnderrunAsSilence(bool aEnabled) {}
+
+ protected:
+  virtual ~MediaSink() = default;
+};
+
+}  // namespace mozilla
+
+#endif  // MediaSink_h_
diff --git a/dom/media/mediasink/VideoSink.cpp b/dom/media/mediasink/VideoSink.cpp
new file mode 100644
index 0000000000..906efdf0db
--- /dev/null
+++ b/dom/media/mediasink/VideoSink.cpp
@@ -0,0 +1,706 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef XP_WIN
+// Include Windows headers required for enabling high precision timers.
+#  include <windows.h>
+#  include <mmsystem.h>
+#endif
+
+#include "VideoSink.h"
+
+#include "MediaQueue.h"
+#include "VideoUtils.h"
+
+#include "mozilla/IntegerPrintfMacros.h"
+#include "mozilla/ProfilerLabels.h"
+#include "mozilla/ProfilerMarkerTypes.h"
+#include "mozilla/StaticPrefs_browser.h"
+#include "mozilla/StaticPrefs_media.h"
+
+namespace mozilla {
+extern LazyLogModule gMediaDecoderLog;
+}
+
+#undef FMT
+
+#define FMT(x, ...) "VideoSink=%p " x, this, ##__VA_ARGS__
+#define VSINK_LOG(x, ...) \
+  MOZ_LOG(gMediaDecoderLog, LogLevel::Debug, (FMT(x, ##__VA_ARGS__)))
+#define VSINK_LOG_V(x, ...) \
+  MOZ_LOG(gMediaDecoderLog, LogLevel::Verbose, (FMT(x, ##__VA_ARGS__)))
+
+namespace mozilla {
+
+using namespace mozilla::layers;
+
+// Minimum update frequency is 1/120th of a second, i.e. half the
+// duration of a 60-fps frame.
+static const int64_t MIN_UPDATE_INTERVAL_US = 1000000 / (60 * 2);
+
+static void SetImageToGreenPixel(PlanarYCbCrImage* aImage) {
+  static uint8_t greenPixel[] = {0x00, 0x00, 0x00};
+  PlanarYCbCrData data;
+  data.mYChannel = greenPixel;
+  data.mCbChannel = greenPixel + 1;
+  data.mCrChannel = greenPixel + 2;
+  data.mYStride = data.mCbCrStride = 1;
+  data.mPictureRect = gfx::IntRect(0, 0, 1, 1);
+  data.mYUVColorSpace = gfx::YUVColorSpace::BT601;
+  aImage->CopyData(data);
+}
+
+VideoSink::VideoSink(AbstractThread* aThread, MediaSink* aAudioSink,
+                     MediaQueue<VideoData>& aVideoQueue,
+                     VideoFrameContainer* aContainer,
+                     FrameStatistics& aFrameStats,
+                     uint32_t aVQueueSentToCompositerSize)
+    : mOwnerThread(aThread),
+      mAudioSink(aAudioSink),
+      mVideoQueue(aVideoQueue),
+      mContainer(aContainer),
+      mProducerID(ImageContainer::AllocateProducerID()),
+      mFrameStats(aFrameStats),
+      mOldCompositorDroppedCount(mContainer ? mContainer->GetDroppedImageCount()
+                                            : 0),
+      mPendingDroppedCount(0),
+      mHasVideo(false),
+      mUpdateScheduler(aThread),
+      mVideoQueueSendToCompositorSize(aVQueueSentToCompositerSize),
+      mMinVideoQueueSize(StaticPrefs::media_ruin_av_sync_enabled() ? 1 : 0)
+#ifdef XP_WIN
+      ,
+      mHiResTimersRequested(false)
+#endif
+{
+  MOZ_ASSERT(mAudioSink, "AudioSink should exist.");
+
+  if (StaticPrefs::browser_measurement_render_anims_and_video_solid() &&
+      mContainer) {
+    InitializeBlankImage();
+    MOZ_ASSERT(mBlankImage, "Blank image should exist.");
+  }
+}
+
+VideoSink::~VideoSink() {
+#ifdef XP_WIN
+  MOZ_ASSERT(!mHiResTimersRequested);
+#endif
+}
+
+RefPtr<VideoSink::EndedPromise> VideoSink::OnEnded(TrackType aType) {
+  AssertOwnerThread();
+  MOZ_ASSERT(mAudioSink->IsStarted(), "Must be called after playback starts.");
+
+  if (aType == TrackInfo::kAudioTrack) {
+    return mAudioSink->OnEnded(aType);
+  } else if (aType == TrackInfo::kVideoTrack) {
+    return mEndPromise;
+  }
+  return nullptr;
+}
+
+media::TimeUnit VideoSink::GetEndTime(TrackType aType) const {
+  AssertOwnerThread();
+  MOZ_ASSERT(mAudioSink->IsStarted(), "Must be called after playback starts.");
+
+  if (aType == TrackInfo::kVideoTrack) {
+    return mVideoFrameEndTime;
+  } else if (aType == TrackInfo::kAudioTrack) {
+    return mAudioSink->GetEndTime(aType);
+  }
+  return media::TimeUnit::Zero();
+}
+
+media::TimeUnit VideoSink::GetPosition(TimeStamp* aTimeStamp) {
+  AssertOwnerThread();
+  return mAudioSink->GetPosition(aTimeStamp);
+}
+
+bool VideoSink::HasUnplayedFrames(TrackType aType) const {
+  AssertOwnerThread();
+  MOZ_ASSERT(aType == TrackInfo::kAudioTrack,
+             "Not implemented for non audio tracks.");
+
+  return mAudioSink->HasUnplayedFrames(aType);
+}
+
+media::TimeUnit VideoSink::UnplayedDuration(TrackType aType) const {
+  AssertOwnerThread();
+  MOZ_ASSERT(aType == TrackInfo::kAudioTrack,
+             "Not implemented for non audio tracks.");
+
+  return mAudioSink->UnplayedDuration(aType);
+}
+
+void VideoSink::SetPlaybackRate(double aPlaybackRate) {
+  AssertOwnerThread();
+
+  mAudioSink->SetPlaybackRate(aPlaybackRate);
+}
+
+void VideoSink::SetVolume(double aVolume) {
+  AssertOwnerThread();
+
+  mAudioSink->SetVolume(aVolume);
+}
+
+void VideoSink::SetStreamName(const nsAString& aStreamName) {
+  AssertOwnerThread();
+
+  mAudioSink->SetStreamName(aStreamName);
+}
+
+void VideoSink::SetPreservesPitch(bool aPreservesPitch) {
+  AssertOwnerThread();
+
+  mAudioSink->SetPreservesPitch(aPreservesPitch);
+}
+
+double VideoSink::PlaybackRate() const {
+  AssertOwnerThread();
+
+  return mAudioSink->PlaybackRate();
+}
+
+void VideoSink::EnsureHighResTimersOnOnlyIfPlaying() {
+#ifdef XP_WIN
+  const bool needed = IsPlaying();
+  if (needed == mHiResTimersRequested) {
+    return;
+  }
+  if (needed) {
+    // Ensure high precision timers are enabled on Windows, otherwise the
+    // VideoSink isn't woken up at reliable intervals to set the next frame, and
+    // we drop frames while painting. Note that each call must be matched by a
+    // corresponding timeEndPeriod() call. Enabling high precision timers causes
+    // the CPU to wake up more frequently on Windows 7 and earlier, which causes
+    // more CPU load and battery use. So we only enable high precision timers
+    // when we're actually playing.
+    timeBeginPeriod(1);
+  } else {
+    timeEndPeriod(1);
+  }
+  mHiResTimersRequested = needed;
+#endif
+}
+
+void VideoSink::SetPlaying(bool aPlaying) {
+  AssertOwnerThread();
+  VSINK_LOG_V(" playing (%d) -> (%d)", mAudioSink->IsPlaying(), aPlaying);
+
+  if (!aPlaying) {
+    // Reset any update timer if paused.
+    mUpdateScheduler.Reset();
+    // Since playback is paused, tell compositor to render only current frame.
+    TimeStamp nowTime;
+    const auto clockTime = mAudioSink->GetPosition(&nowTime);
+    RenderVideoFrames(1, clockTime.ToMicroseconds(), nowTime);
+    if (mContainer) {
+      mContainer->ClearCachedResources();
+    }
+    if (mSecondaryContainer) {
+      mSecondaryContainer->ClearCachedResources();
+    }
+  }
+
+  mAudioSink->SetPlaying(aPlaying);
+
+  if (mHasVideo && aPlaying) {
+    // There's no thread in VideoSink for pulling video frames, need to trigger
+    // rendering while becoming playing status. because the VideoQueue may be
+    // full already.
+    TryUpdateRenderedVideoFrames();
+  }
+
+  EnsureHighResTimersOnOnlyIfPlaying();
+}
+
+nsresult VideoSink::Start(const media::TimeUnit& aStartTime,
+                          const MediaInfo& aInfo) {
+  AssertOwnerThread();
+  VSINK_LOG("[%s]", __func__);
+
+  nsresult rv = mAudioSink->Start(aStartTime, aInfo);
+
+  mHasVideo = aInfo.HasVideo();
+
+  if (mHasVideo) {
+    mEndPromise = mEndPromiseHolder.Ensure(__func__);
+
+    // If the underlying MediaSink has an end promise for the video track (which
+    // happens when mAudioSink refers to a DecodedStream), we must wait for it
+    // to complete before resolving our own end promise. Otherwise, MDSM might
+    // stop playback before DecodedStream plays to the end and cause
+    // test_streams_element_capture.html to time out.
+    RefPtr<EndedPromise> p = mAudioSink->OnEnded(TrackInfo::kVideoTrack);
+    if (p) {
+      RefPtr<VideoSink> self = this;
+      p->Then(
+           mOwnerThread, __func__,
+           [self]() {
+             self->mVideoSinkEndRequest.Complete();
+             self->TryUpdateRenderedVideoFrames();
+             // It is possible the video queue size is 0 and we have no
+             // frames to render. However, we need to call
+             // MaybeResolveEndPromise() to ensure mEndPromiseHolder is
+             // resolved.
+             self->MaybeResolveEndPromise();
+           },
+           [self]() {
+             self->mVideoSinkEndRequest.Complete();
+             self->TryUpdateRenderedVideoFrames();
+             self->MaybeResolveEndPromise();
+           })
+          ->Track(mVideoSinkEndRequest);
+    }
+
+    ConnectListener();
+    // Run the render loop at least once so we can resolve the end promise
+    // when video duration is 0.
+    UpdateRenderedVideoFrames();
+  }
+  return rv;
+}
+
+void VideoSink::Stop() {
+  AssertOwnerThread();
+  MOZ_ASSERT(mAudioSink->IsStarted(), "playback not started.");
+  VSINK_LOG("[%s]", __func__);
+
+  mAudioSink->Stop();
+
+  mUpdateScheduler.Reset();
+  if (mHasVideo) {
+    DisconnectListener();
+    mVideoSinkEndRequest.DisconnectIfExists();
+    mEndPromiseHolder.ResolveIfExists(true, __func__);
+    mEndPromise = nullptr;
+  }
+  mVideoFrameEndTime = media::TimeUnit::Zero();
+
+  EnsureHighResTimersOnOnlyIfPlaying();
+}
+
+bool VideoSink::IsStarted() const {
+  AssertOwnerThread();
+
+  return mAudioSink->IsStarted();
+}
+
+bool VideoSink::IsPlaying() const {
+  AssertOwnerThread();
+
+  return mAudioSink->IsPlaying();
+}
+
+const AudioDeviceInfo* VideoSink::AudioDevice() const {
+  return mAudioSink->AudioDevice();
+}
+
+void VideoSink::Shutdown() {
+  AssertOwnerThread();
+  MOZ_ASSERT(!mAudioSink->IsStarted(), "must be called after playback stops.");
+  VSINK_LOG("[%s]", __func__);
+
+  mAudioSink->Shutdown();
+}
+
+void VideoSink::OnVideoQueuePushed(RefPtr<VideoData>&& aSample) {
+  AssertOwnerThread();
+  // Listen to push event, VideoSink should try rendering ASAP if first frame
+  // arrives but update scheduler is not triggered yet.
+  if (!aSample->IsSentToCompositor()) {
+    // Since we push rendered frames back to the queue, we will receive
+    // push events for them. We only need to trigger render loop
+    // when this frame is not rendered yet.
+    TryUpdateRenderedVideoFrames();
+  }
+}
+
+void VideoSink::OnVideoQueueFinished() {
+  AssertOwnerThread();
+  // Run render loop if the end promise is not resolved yet.
+  if (!mUpdateScheduler.IsScheduled() && mAudioSink->IsPlaying() &&
+      !mEndPromiseHolder.IsEmpty()) {
+    UpdateRenderedVideoFrames();
+  }
+}
+
+void VideoSink::Redraw(const VideoInfo& aInfo) {
+  AUTO_PROFILER_LABEL("VideoSink::Redraw", MEDIA_PLAYBACK);
+  AssertOwnerThread();
+
+  // No video track, nothing to draw.
+  if (!aInfo.IsValid() || !mContainer) {
+    return;
+  }
+
+  auto now = TimeStamp::Now();
+
+  RefPtr<VideoData> video = VideoQueue().PeekFront();
+  if (video) {
+    if (mBlankImage) {
+      video->mImage = mBlankImage;
+    }
+    video->MarkSentToCompositor();
+    mContainer->SetCurrentFrame(video->mDisplay, video->mImage, now);
+    if (mSecondaryContainer) {
+      mSecondaryContainer->SetCurrentFrame(video->mDisplay, video->mImage, now);
+    }
+    return;
+  }
+
+  // When we reach here, it means there are no frames in this video track.
+  // Draw a blank frame to ensure there is something in the image container
+  // to fire 'loadeddata'.
+
+  RefPtr<Image> blank =
+      mContainer->GetImageContainer()->CreatePlanarYCbCrImage();
+  mContainer->SetCurrentFrame(aInfo.mDisplay, blank, now);
+
+  if (mSecondaryContainer) {
+    mSecondaryContainer->SetCurrentFrame(aInfo.mDisplay, blank, now);
+  }
+}
+
+void VideoSink::TryUpdateRenderedVideoFrames() {
+  AUTO_PROFILER_LABEL("VideoSink::TryUpdateRenderedVideoFrames",
+                      MEDIA_PLAYBACK);
+  AssertOwnerThread();
+  if (mUpdateScheduler.IsScheduled() || !mAudioSink->IsPlaying()) {
+    return;
+  }
+  RefPtr<VideoData> v = VideoQueue().PeekFront();
+  if (!v) {
+    // No frames to render.
+    return;
+  }
+
+  TimeStamp nowTime;
+  const media::TimeUnit clockTime = mAudioSink->GetPosition(&nowTime);
+  if (clockTime >= v->mTime) {
+    // Time to render this frame.
+    UpdateRenderedVideoFrames();
+    return;
+  }
+
+  // If we send this future frame to the compositor now, it will be rendered
+  // immediately and break A/V sync. Instead, we schedule a timer to send it
+  // later.
+  int64_t delta =
+      (v->mTime - clockTime).ToMicroseconds() / mAudioSink->PlaybackRate();
+  TimeStamp target = nowTime + TimeDuration::FromMicroseconds(delta);
+  RefPtr<VideoSink> self = this;
+  mUpdateScheduler.Ensure(
+      target, [self]() { self->UpdateRenderedVideoFramesByTimer(); },
+      [self]() { self->UpdateRenderedVideoFramesByTimer(); });
+}
+
+void VideoSink::UpdateRenderedVideoFramesByTimer() {
+  AssertOwnerThread();
+  mUpdateScheduler.CompleteRequest();
+  UpdateRenderedVideoFrames();
+}
+
+void VideoSink::ConnectListener() {
+  AssertOwnerThread();
+  mPushListener = VideoQueue().PushEvent().Connect(
+      mOwnerThread, this, &VideoSink::OnVideoQueuePushed);
+  mFinishListener = VideoQueue().FinishEvent().Connect(
+      mOwnerThread, this, &VideoSink::OnVideoQueueFinished);
+}
+
+void VideoSink::DisconnectListener() {
+  AssertOwnerThread();
+  mPushListener.Disconnect();
+  mFinishListener.Disconnect();
+}
+
+void VideoSink::RenderVideoFrames(int32_t aMaxFrames, int64_t aClockTime,
+                                  const TimeStamp& aClockTimeStamp) {
+  AUTO_PROFILER_LABEL("VideoSink::RenderVideoFrames", MEDIA_PLAYBACK);
+  AssertOwnerThread();
+
+  AutoTArray<RefPtr<VideoData>, 16> frames;
+  VideoQueue().GetFirstElements(aMaxFrames, &frames);
+  if (frames.IsEmpty() || !mContainer) {
+    return;
+  }
+
+  AutoTArray<ImageContainer::NonOwningImage, 16> images;
+  TimeStamp lastFrameTime;
+  double playbackRate = mAudioSink->PlaybackRate();
+  for (uint32_t i = 0; i < frames.Length(); ++i) {
+    VideoData* frame = frames[i];
+    bool wasSent = frame->IsSentToCompositor();
+    frame->MarkSentToCompositor();
+
+    if (!frame->mImage || !frame->mImage->IsValid() ||
+        !frame->mImage->GetSize().width || !frame->mImage->GetSize().height) {
+      continue;
+    }
+
+    if (frame->mTime.IsNegative()) {
+      // Frame times before the start time are invalid; drop such frames
+      continue;
+    }
+
+    MOZ_ASSERT(!aClockTimeStamp.IsNull());
+    int64_t delta = frame->mTime.ToMicroseconds() - aClockTime;
+    TimeStamp t =
+        aClockTimeStamp + TimeDuration::FromMicroseconds(delta / playbackRate);
+    if (!lastFrameTime.IsNull() && t <= lastFrameTime) {
+      // Timestamps out of order; drop the new frame. In theory we should
+      // probably replace the previous frame with the new frame if the
+      // timestamps are equal, but this is a corrupt video file already so
+      // never mind.
+      continue;
+    }
+    MOZ_ASSERT(!t.IsNull());
+    lastFrameTime = t;
+
+    ImageContainer::NonOwningImage* img = images.AppendElement();
+    img->mTimeStamp = t;
+    img->mImage = frame->mImage;
+    if (mBlankImage) {
+      img->mImage = mBlankImage;
+    }
+    img->mFrameID = frame->mFrameID;
+    img->mProducerID = mProducerID;
+
+    VSINK_LOG_V("playing video frame %" PRId64
+                " (id=%x, vq-queued=%zu, clock=%" PRId64 ")",
+                frame->mTime.ToMicroseconds(), frame->mFrameID,
+                VideoQueue().GetSize(), aClockTime);
+    if (!wasSent) {
+      PROFILER_MARKER("PlayVideo", MEDIA_PLAYBACK, {}, MediaSampleMarker,
+                      frame->mTime.ToMicroseconds(),
+                      frame->GetEndTime().ToMicroseconds(),
+                      VideoQueue().GetSize());
+    }
+  }
+
+  if (images.Length() > 0) {
+    mContainer->SetCurrentFrames(frames[0]->mDisplay, images);
+
+    if (mSecondaryContainer) {
+      mSecondaryContainer->SetCurrentFrames(frames[0]->mDisplay, images);
+    }
+  }
+}
+
+void VideoSink::UpdateRenderedVideoFrames() {
+  AUTO_PROFILER_LABEL("VideoSink::UpdateRenderedVideoFrames", MEDIA_PLAYBACK);
+  AssertOwnerThread();
+  MOZ_ASSERT(mAudioSink->IsPlaying(), "should be called while playing.");
+
+  // Get the current playback position.
+  TimeStamp nowTime;
+  const auto clockTime = mAudioSink->GetPosition(&nowTime);
+  MOZ_ASSERT(!clockTime.IsNegative(), "Should have positive clock time.");
+
+  uint32_t sentToCompositorCount = 0;
+  uint32_t droppedInSink = 0;
+
+  // Skip frames up to the playback position.
+  media::TimeUnit lastFrameEndTime;
+  while (VideoQueue().GetSize() > mMinVideoQueueSize &&
+         clockTime >= VideoQueue().PeekFront()->GetEndTime()) {
+    RefPtr<VideoData> frame = VideoQueue().PopFront();
+    lastFrameEndTime = frame->GetEndTime();
+    if (frame->IsSentToCompositor()) {
+      sentToCompositorCount++;
+    } else {
+      droppedInSink++;
+      VSINK_LOG_V("discarding video frame mTime=%" PRId64
+                  " clock_time=%" PRId64,
+                  frame->mTime.ToMicroseconds(), clockTime.ToMicroseconds());
+
+      struct VideoSinkDroppedFrameMarker {
+        static constexpr Span<const char> MarkerTypeName() {
+          return MakeStringSpan("VideoSinkDroppedFrame");
+        }
+        static void StreamJSONMarkerData(
+            baseprofiler::SpliceableJSONWriter& aWriter,
+            int64_t aSampleStartTimeUs, int64_t aSampleEndTimeUs,
+            int64_t aClockTimeUs) {
+          aWriter.IntProperty("sampleStartTimeUs", aSampleStartTimeUs);
+          aWriter.IntProperty("sampleEndTimeUs", aSampleEndTimeUs);
+          aWriter.IntProperty("clockTimeUs", aClockTimeUs);
+        }
+        static MarkerSchema MarkerTypeDisplay() {
+          using MS = MarkerSchema;
+          MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable};
+          schema.AddKeyLabelFormat("sampleStartTimeUs", "Sample start time",
+                                   MS::Format::Microseconds);
+          schema.AddKeyLabelFormat("sampleEndTimeUs", "Sample end time",
+                                   MS::Format::Microseconds);
+          schema.AddKeyLabelFormat("clockTimeUs", "Audio clock time",
+                                   MS::Format::Microseconds);
+          return schema;
+        }
+      };
+      profiler_add_marker(
+          "VideoSinkDroppedFrame", geckoprofiler::category::MEDIA_PLAYBACK, {},
+          VideoSinkDroppedFrameMarker{}, frame->mTime.ToMicroseconds(),
+          frame->GetEndTime().ToMicroseconds(), clockTime.ToMicroseconds());
+    }
+  }
+
+  if (droppedInSink || sentToCompositorCount) {
+    uint32_t totalCompositorDroppedCount = mContainer->GetDroppedImageCount();
+    uint32_t droppedInCompositor =
+        totalCompositorDroppedCount - mOldCompositorDroppedCount;
+    if (droppedInCompositor > 0) {
+      mOldCompositorDroppedCount = totalCompositorDroppedCount;
+      VSINK_LOG_V("%u video frame previously discarded by compositor",
+                  droppedInCompositor);
+    }
+    mPendingDroppedCount += droppedInCompositor;
+    uint32_t droppedReported = mPendingDroppedCount > sentToCompositorCount
+                                   ? sentToCompositorCount
+                                   : mPendingDroppedCount;
+    mPendingDroppedCount -= droppedReported;
+
+    mFrameStats.Accumulate({0, 0, sentToCompositorCount - droppedReported, 0,
+                            droppedInSink, droppedInCompositor});
+  }
+
+  // The presentation end time of the last video frame displayed is either
+  // the end time of the current frame, or if we dropped all frames in the
+  // queue, the end time of the last frame we removed from the queue.
+  RefPtr<VideoData> currentFrame = VideoQueue().PeekFront();
+  mVideoFrameEndTime =
+      std::max(mVideoFrameEndTime,
+               currentFrame ? currentFrame->GetEndTime() : lastFrameEndTime);
+
+  RenderVideoFrames(mVideoQueueSendToCompositorSize, clockTime.ToMicroseconds(),
+                    nowTime);
+
+  MaybeResolveEndPromise();
+
+  // Get the timestamp of the next frame. Schedule the next update at
+  // the start time of the next frame. If we don't have a next frame,
+  // we will run render loops again upon incoming frames.
+  nsTArray<RefPtr<VideoData>> frames;
+  VideoQueue().GetFirstElements(2, &frames);
+  if (frames.Length() < 2) {
+    return;
+  }
+
+  int64_t nextFrameTime = frames[1]->mTime.ToMicroseconds();
+  int64_t delta = std::max(nextFrameTime - clockTime.ToMicroseconds(),
+                           MIN_UPDATE_INTERVAL_US);
+  TimeStamp target = nowTime + TimeDuration::FromMicroseconds(
+                                   delta / mAudioSink->PlaybackRate());
+
+  RefPtr<VideoSink> self = this;
+  mUpdateScheduler.Ensure(
+      target, [self]() { self->UpdateRenderedVideoFramesByTimer(); },
+      [self]() { self->UpdateRenderedVideoFramesByTimer(); });
+}
+
+void VideoSink::MaybeResolveEndPromise() {
+  AssertOwnerThread();
+  // All frames are rendered, Let's resolve the promise.
+  if (VideoQueue().IsFinished() && VideoQueue().GetSize() <= 1 &&
+      !mVideoSinkEndRequest.Exists()) {
+    if (VideoQueue().GetSize() == 1) {
+      // Remove the last frame since we have sent it to compositor.
+      RefPtr<VideoData> frame = VideoQueue().PopFront();
+      if (mPendingDroppedCount > 0) {
+        mFrameStats.Accumulate({0, 0, 0, 0, 0, 1});
+        mPendingDroppedCount--;
+      } else {
+        mFrameStats.NotifyPresentedFrame();
+      }
+    }
+
+    TimeStamp nowTime;
+    const auto clockTime = mAudioSink->GetPosition(&nowTime);
+
+    // Clear future frames from the compositor, in case the playback position
+    // unexpectedly jumped to the end, and all frames between the previous
+    // playback position and the end were discarded. Old frames based on the
+    // previous playback position might still be queued in the compositor. See
+    // bug 1598143 for when this can happen.
+    mContainer->ClearFutureFrames(nowTime);
+    if (mSecondaryContainer) {
+      mSecondaryContainer->ClearFutureFrames(nowTime);
+    }
+
+    if (clockTime < mVideoFrameEndTime) {
+      VSINK_LOG_V(
+          "Not reach video end time yet, reschedule timer to resolve "
+          "end promise. clockTime=%" PRId64 ", endTime=%" PRId64,
+          clockTime.ToMicroseconds(), mVideoFrameEndTime.ToMicroseconds());
+      int64_t delta = (mVideoFrameEndTime - clockTime).ToMicroseconds() /
+                      mAudioSink->PlaybackRate();
+      TimeStamp target = nowTime + TimeDuration::FromMicroseconds(delta);
+      auto resolveEndPromise = [self = RefPtr<VideoSink>(this)]() {
+        self->mEndPromiseHolder.ResolveIfExists(true, __func__);
+        self->mUpdateScheduler.CompleteRequest();
+      };
+      mUpdateScheduler.Ensure(target, std::move(resolveEndPromise),
+                              std::move(resolveEndPromise));
+    } else {
+      mEndPromiseHolder.ResolveIfExists(true, __func__);
+    }
+  }
+}
+
+void VideoSink::SetSecondaryVideoContainer(VideoFrameContainer* aSecondary) {
+  AssertOwnerThread();
+  mSecondaryContainer = aSecondary;
+  if (!IsPlaying() && mSecondaryContainer) {
+    ImageContainer* mainImageContainer = mContainer->GetImageContainer();
+    ImageContainer* secondaryImageContainer =
+        mSecondaryContainer->GetImageContainer();
+    MOZ_DIAGNOSTIC_ASSERT(mainImageContainer);
+    MOZ_DIAGNOSTIC_ASSERT(secondaryImageContainer);
+
+    // If the video isn't currently playing, get the current frame and display
+    // that in the secondary container as well.
+    AutoLockImage lockImage(mainImageContainer);
+    TimeStamp now = TimeStamp::Now();
+    if (RefPtr<Image> image = lockImage.GetImage(now)) {
+      AutoTArray<ImageContainer::NonOwningImage, 1> currentFrame;
+      currentFrame.AppendElement(ImageContainer::NonOwningImage(
+          image, now, /* frameID */ 1,
+          /* producerId */ ImageContainer::AllocateProducerID()));
+      secondaryImageContainer->SetCurrentImages(currentFrame);
+    }
+  }
+}
+
+void VideoSink::GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) {
+  AssertOwnerThread();
+  aInfo.mVideoSink.mIsStarted = IsStarted();
+  aInfo.mVideoSink.mIsPlaying = IsPlaying();
+  aInfo.mVideoSink.mFinished = VideoQueue().IsFinished();
+  aInfo.mVideoSink.mSize = VideoQueue().GetSize();
+  aInfo.mVideoSink.mVideoFrameEndTime = mVideoFrameEndTime.ToMicroseconds();
+  aInfo.mVideoSink.mHasVideo = mHasVideo;
+  aInfo.mVideoSink.mVideoSinkEndRequestExists = mVideoSinkEndRequest.Exists();
+  aInfo.mVideoSink.mEndPromiseHolderIsEmpty = mEndPromiseHolder.IsEmpty();
+  mAudioSink->GetDebugInfo(aInfo);
+}
+
+bool VideoSink::InitializeBlankImage() {
+  mBlankImage = mContainer->GetImageContainer()->CreatePlanarYCbCrImage();
+  if (mBlankImage == nullptr) {
+    return false;
+  }
+  SetImageToGreenPixel(mBlankImage->AsPlanarYCbCrImage());
+  return true;
+}
+
+void VideoSink::EnableTreatAudioUnderrunAsSilence(bool aEnabled) {
+  mAudioSink->EnableTreatAudioUnderrunAsSilence(aEnabled);
+}
+
+}  // namespace mozilla
diff --git a/dom/media/mediasink/VideoSink.h b/dom/media/mediasink/VideoSink.h
new file mode 100644
index 0000000000..7f2528d870
--- /dev/null
+++ b/dom/media/mediasink/VideoSink.h
@@ -0,0 +1,177 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef VideoSink_h_
+#define VideoSink_h_
+
+#include "FrameStatistics.h"
+#include "ImageContainer.h"
+#include "MediaEventSource.h"
+#include "MediaSink.h"
+#include "MediaTimer.h"
+#include "VideoFrameContainer.h"
+#include "mozilla/AbstractThread.h"
+#include "mozilla/MozPromise.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/TimeStamp.h"
+
+namespace mozilla {
+
+class VideoFrameContainer;
+template <class T>
+class MediaQueue;
+
+class VideoSink : public MediaSink {
+  typedef mozilla::layers::ImageContainer::ProducerID ProducerID;
+
+ public:
+  VideoSink(AbstractThread* aThread, MediaSink* aAudioSink,
+            MediaQueue<VideoData>& aVideoQueue, VideoFrameContainer* aContainer,
+            FrameStatistics& aFrameStats, uint32_t aVQueueSentToCompositerSize);
+
+  RefPtr<EndedPromise> OnEnded(TrackType aType) override;
+
+  media::TimeUnit GetEndTime(TrackType aType) const override;
+
+  media::TimeUnit GetPosition(TimeStamp* aTimeStamp = nullptr) override;
+
+  bool HasUnplayedFrames(TrackType aType) const override;
+  media::TimeUnit UnplayedDuration(TrackType aType) const override;
+
+  void SetPlaybackRate(double aPlaybackRate) override;
+
+  void SetVolume(double aVolume) override;
+
+  void SetStreamName(const nsAString& aStreamName) override;
+
+  void SetPreservesPitch(bool aPreservesPitch) override;
+
+  void SetPlaying(bool aPlaying) override;
+
+  double PlaybackRate() const override;
+
+  void Redraw(const VideoInfo& aInfo) override;
+
+  nsresult Start(const media::TimeUnit& aStartTime,
+                 const MediaInfo& aInfo) override;
+
+  void Stop() override;
+
+  bool IsStarted() const override;
+
+  bool IsPlaying() const override;
+
+  const AudioDeviceInfo* AudioDevice() const override;
+
+  void Shutdown() override;
+
+  void SetSecondaryVideoContainer(VideoFrameContainer* aSecondary) override;
+
+  void GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) override;
+
+  void EnableTreatAudioUnderrunAsSilence(bool aEnabled) override;
+
+ private:
+  virtual ~VideoSink();
+
+  // VideoQueue listener related.
+  void OnVideoQueuePushed(RefPtr<VideoData>&& aSample);
+  void OnVideoQueueFinished();
+  void ConnectListener();
+  void DisconnectListener();
+
+  void EnsureHighResTimersOnOnlyIfPlaying();
+
+  // Sets VideoQueue images into the VideoFrameContainer. Called on the shared
+  // state machine thread. The first aMaxFrames (at most) are set.
+  // aClockTime and aClockTimeStamp are used as the baseline for deriving
+  // timestamps for the frames; when omitted, aMaxFrames must be 1 and
+  // a null timestamp is passed to the VideoFrameContainer.
+  // If the VideoQueue is empty, this does nothing.
+  void RenderVideoFrames(int32_t aMaxFrames, int64_t aClockTime = 0,
+                         const TimeStamp& aClickTimeStamp = TimeStamp());
+
+  // Triggered while videosink is started, videosink becomes "playing" status,
+  // or VideoQueue event arrived.
+  void TryUpdateRenderedVideoFrames();
+
+  // If we have video, display a video frame if it's time for display has
+  // arrived, otherwise sleep until it's time for the next frame. Update the
+  // current frame time as appropriate, and trigger ready state update.
+  // Called on the shared state machine thread.
+  void UpdateRenderedVideoFrames();
+  void UpdateRenderedVideoFramesByTimer();
+
+  void MaybeResolveEndPromise();
+
+  void AssertOwnerThread() const {
+    MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
+  }
+
+  MediaQueue<VideoData>& VideoQueue() const { return mVideoQueue; }
+
+  const RefPtr<AbstractThread> mOwnerThread;
+  const RefPtr<MediaSink> mAudioSink;
+  MediaQueue<VideoData>& mVideoQueue;
+  VideoFrameContainer* mContainer;
+  RefPtr<VideoFrameContainer> mSecondaryContainer;
+
+  // Producer ID to help ImageContainer distinguish different streams of
+  // FrameIDs. A unique and immutable value per VideoSink.
+  const ProducerID mProducerID;
+
+  // Used to notify MediaDecoder's frame statistics
+  FrameStatistics& mFrameStats;
+
+  RefPtr<EndedPromise> mEndPromise;
+  MozPromiseHolder<EndedPromise> mEndPromiseHolder;
+  MozPromiseRequestHolder<EndedPromise> mVideoSinkEndRequest;
+
+  // The presentation end time of the last video frame which has been displayed.
+  media::TimeUnit mVideoFrameEndTime;
+
+  uint32_t mOldCompositorDroppedCount;
+  uint32_t mPendingDroppedCount;
+
+  // Event listeners for VideoQueue
+  MediaEventListener mPushListener;
+  MediaEventListener mFinishListener;
+
+  // True if this sink is going to handle video track.
+  bool mHasVideo;
+
+  // Used to trigger another update of rendered frames in next round.
+  DelayedScheduler mUpdateScheduler;
+
+  // Max frame number sent to compositor at a time.
+  // Based on the pref value obtained in MDSM.
+  const uint32_t mVideoQueueSendToCompositorSize;
+
+  // Talos tests for the compositor require at least one frame in the
+  // video queue so that the compositor has something to composit during
+  // the talos test when the decode is stressed. We have a minimum size
+  // on the video queue in order to facilitate this talos test.
+  // Note: Normal playback should not have a queue size of more than 0,
+  // otherwise A/V sync will be ruined! *Only* make this non-zero for
+  // testing purposes.
+  const uint32_t mMinVideoQueueSize;
+
+#ifdef XP_WIN
+  // Whether we've called timeBeginPeriod(1) to request high resolution
+  // timers. We request high resolution timers when playback starts, and
+  // turn them off when playback is paused. Enabling high resolution
+  // timers can cause higher CPU usage and battery drain on Windows 7,
+  // but reduces our frame drop rate.
+  bool mHiResTimersRequested;
+#endif
+
+  RefPtr<layers::Image> mBlankImage;
+  bool InitializeBlankImage();
+};
+
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/mediasink/moz.build b/dom/media/mediasink/moz.build
new file mode 100644
index 0000000000..6db074538f
--- /dev/null
+++ b/dom/media/mediasink/moz.build
@@ -0,0 +1,25 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+UNIFIED_SOURCES += [
+    "AudioDecoderInputTrack.cpp",
+    "AudioSink.cpp",
+    "AudioSinkWrapper.cpp",
+    "DecodedStream.cpp",
+    "VideoSink.cpp",
+]
+
+EXPORTS += [
+    "MediaSink.h",
+]
+
+LOCAL_INCLUDES += [
+    "/dom/media",
+]
+
+include("/ipc/chromium/chromium-config.mozbuild")
+
+FINAL_LIBRARY = "xul"