14 files changed, 4815 insertions, 0 deletions
diff --git a/dom/media/encoder/ContainerWriter.h b/dom/media/encoder/ContainerWriter.h
new file mode 100644
index 0000000000..724c8b90c9
--- /dev/null
+++ b/dom/media/encoder/ContainerWriter.h
@@ -0,0 +1,75 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ContainerWriter_h_
+#define ContainerWriter_h_
+
+#include "nsTArray.h"
+#include "EncodedFrame.h"
+#include "TrackMetadataBase.h"
+
+namespace mozilla {
+/**
+ * ContainerWriter packs encoded track data into a specific media container.
+ */
+class ContainerWriter {
+ public:
+  ContainerWriter() : mInitialized(false), mIsWritingComplete(false) {}
+  virtual ~ContainerWriter() {}
+  // Mapping to DOMMediaStream::TrackTypeHints
+  enum {
+    CREATE_AUDIO_TRACK = 1 << 0,
+    CREATE_VIDEO_TRACK = 1 << 1,
+  };
+  enum { END_OF_STREAM = 1 << 0 };
+
+  /**
+   * Writes encoded track data from aData into the internal stream of container
+   * writer. aFlags is used to signal the impl of different conditions
+   * such as END_OF_STREAM. Each impl may handle different flags, and should be
+   * documented accordingly. Currently, WriteEncodedTrack doesn't support
+   * explicit track specification, though each impl may provide logic to
+   * allocate frames into different tracks.
+   */
+  virtual nsresult WriteEncodedTrack(
+      const nsTArray<RefPtr<EncodedFrame>>& aData, uint32_t aFlags = 0) = 0;
+
+  /**
+   * Stores the metadata for all given tracks to the muxer.
+   *
+   * This method checks the integrity of aMetadata.
+   * If the metadata isn't well formatted, this method returns NS_ERROR_FAILURE.
+   * If the metadata is well formatted, it stores the metadata and returns
+   * NS_OK.
+   */
+  virtual nsresult SetMetadata(
+      const nsTArray<RefPtr<TrackMetadataBase>>& aMetadata) = 0;
+
+  /**
+   * Indicate if the writer has finished to output data
+   */
+  virtual bool IsWritingComplete() { return mIsWritingComplete; }
+
+  enum { FLUSH_NEEDED = 1 << 0, GET_HEADER = 1 << 1 };
+
+  /**
+   * Copies the final container data to a buffer if it has accumulated enough
+   * packets from WriteEncodedTrack. This buffer of data is appended to
+   * aOutputBufs, and existing elements of aOutputBufs should not be modified.
+   * aFlags is true with FLUSH_NEEDED will force OggWriter to flush an ogg page
+   * even it is not full, and copy these container data to a buffer for
+   * aOutputBufs to append.
+   */
+  virtual nsresult GetContainerData(nsTArray<nsTArray<uint8_t>>* aOutputBufs,
+                                    uint32_t aFlags = 0) = 0;
+
+ protected:
+  bool mInitialized;
+  bool mIsWritingComplete;
+};
+
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/encoder/EncodedFrame.h b/dom/media/encoder/EncodedFrame.h
new file mode 100644
index 0000000000..e76babef89
--- /dev/null
+++ b/dom/media/encoder/EncodedFrame.h
@@ -0,0 +1,64 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef EncodedFrame_h_
+#define EncodedFrame_h_
+
+#include "nsISupportsImpl.h"
+#include "mozilla/media/MediaUtils.h"
+#include "TimeUnits.h"
+#include "VideoUtils.h"
+
+namespace mozilla {
+
+// Represent an encoded frame emitted by an encoder
+class EncodedFrame final {
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(EncodedFrame)
+ public:
+  enum FrameType {
+    VP8_I_FRAME,       // VP8 intraframe
+    VP8_P_FRAME,       // VP8 predicted frame
+    OPUS_AUDIO_FRAME,  // Opus audio frame
+    UNKNOWN            // FrameType not set
+  };
+  using ConstFrameData = const media::Refcountable<nsTArray<uint8_t>>;
+  using FrameData = media::Refcountable<nsTArray<uint8_t>>;
+  EncodedFrame(const media::TimeUnit& aTime, uint64_t aDuration,
+               uint64_t aDurationBase, FrameType aFrameType,
+               RefPtr<ConstFrameData> aData)
+      : mTime(aTime),
+        mDuration(aDuration),
+        mDurationBase(aDurationBase),
+        mFrameType(aFrameType),
+        mFrameData(std::move(aData)) {
+    MOZ_ASSERT(mFrameData);
+    MOZ_ASSERT_IF(mFrameType == VP8_I_FRAME, mDurationBase == PR_USEC_PER_SEC);
+    MOZ_ASSERT_IF(mFrameType == VP8_P_FRAME, mDurationBase == PR_USEC_PER_SEC);
+    MOZ_ASSERT_IF(mFrameType == OPUS_AUDIO_FRAME, mDurationBase == 48000);
+  }
+  // Timestamp in microseconds
+  const media::TimeUnit mTime;
+  // The playback duration of this packet in mDurationBase.
+  const uint64_t mDuration;
+  // The time base of mDuration.
+  const uint64_t mDurationBase;
+  // Represent what is in the FrameData
+  const FrameType mFrameType;
+  // Encoded data
+  const RefPtr<ConstFrameData> mFrameData;
+
+  // The end time of the frame in microseconds.
+  media::TimeUnit GetEndTime() const {
+    return mTime + media::TimeUnit(mDuration, mDurationBase);
+  }
+
+ private:
+  // Private destructor, to discourage deletion outside of Release():
+  ~EncodedFrame() = default;
+};
+
+}  // namespace mozilla
+
+#endif  // EncodedFrame_h_
diff --git a/dom/media/encoder/MediaEncoder.cpp b/dom/media/encoder/MediaEncoder.cpp
new file mode 100644
index 0000000000..cae5c4ab4e
--- /dev/null
+++ b/dom/media/encoder/MediaEncoder.cpp
@@ -0,0 +1,1131 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "MediaEncoder.h"
+
+#include <algorithm>
+#include "AudioNodeEngine.h"
+#include "AudioNodeTrack.h"
+#include "DriftCompensation.h"
+#include "MediaDecoder.h"
+#include "MediaTrackGraph.h"
+#include "MediaTrackListener.h"
+#include "mozilla/dom/AudioNode.h"
+#include "mozilla/dom/AudioStreamTrack.h"
+#include "mozilla/dom/Blob.h"
+#include "mozilla/dom/BlobImpl.h"
+#include "mozilla/dom/MediaStreamTrack.h"
+#include "mozilla/dom/MutableBlobStorage.h"
+#include "mozilla/dom/VideoStreamTrack.h"
+#include "mozilla/gfx/Point.h"  // IntSize
+#include "mozilla/Logging.h"
+#include "mozilla/Preferences.h"
+#include "mozilla/ProfilerLabels.h"
+#include "mozilla/StaticPrefs_media.h"
+#include "mozilla/StaticPtr.h"
+#include "mozilla/TaskQueue.h"
+#include "mozilla/Unused.h"
+#include "Muxer.h"
+#include "nsMimeTypes.h"
+#include "nsThreadUtils.h"
+#include "OggWriter.h"
+#include "OpusTrackEncoder.h"
+#include "TimeUnits.h"
+#include "Tracing.h"
+
+#include "VP8TrackEncoder.h"
+#include "WebMWriter.h"
+
+mozilla::LazyLogModule gMediaEncoderLog("MediaEncoder");
+#define LOG(type, msg) MOZ_LOG(gMediaEncoderLog, type, msg)
+
+namespace mozilla {
+
+using namespace dom;
+using namespace media;
+
+namespace {
+class BlobStorer : public MutableBlobStorageCallback {
+  MozPromiseHolder<MediaEncoder::BlobPromise> mHolder;
+
+  virtual ~BlobStorer() = default;
+
+ public:
+  BlobStorer() = default;
+
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(BlobStorer, override)
+
+  void BlobStoreCompleted(MutableBlobStorage*, BlobImpl* aBlobImpl,
+                          nsresult aRv) override {
+    MOZ_ASSERT(NS_IsMainThread());
+    if (NS_FAILED(aRv)) {
+      mHolder.Reject(aRv, __func__);
+      return;
+    }
+
+    mHolder.Resolve(aBlobImpl, __func__);
+  }
+
+  RefPtr<MediaEncoder::BlobPromise> Promise() {
+    return mHolder.Ensure(__func__);
+  }
+};
+}  // namespace
+
+class MediaEncoder::AudioTrackListener : public DirectMediaTrackListener {
+ public:
+  AudioTrackListener(RefPtr<DriftCompensator> aDriftCompensator,
+                     RefPtr<MediaEncoder> aMediaEncoder)
+      : mDirectConnected(false),
+        mInitialized(false),
+        mRemoved(false),
+        mDriftCompensator(std::move(aDriftCompensator)),
+        mMediaEncoder(std::move(aMediaEncoder)),
+        mEncoderThread(mMediaEncoder->mEncoderThread),
+        mShutdownPromise(mShutdownHolder.Ensure(__func__)) {
+    MOZ_ASSERT(mMediaEncoder);
+    MOZ_ASSERT(mMediaEncoder->mAudioEncoder);
+    MOZ_ASSERT(mEncoderThread);
+  }
+
+  void NotifyDirectListenerInstalled(InstallationResult aResult) override {
+    if (aResult == InstallationResult::SUCCESS) {
+      LOG(LogLevel::Info, ("Audio track direct listener installed"));
+      mDirectConnected = true;
+    } else {
+      LOG(LogLevel::Info, ("Audio track failed to install direct listener"));
+      MOZ_ASSERT(!mDirectConnected);
+    }
+  }
+
+  void NotifyDirectListenerUninstalled() override {
+    mDirectConnected = false;
+
+    if (mRemoved) {
+      mMediaEncoder = nullptr;
+      mEncoderThread = nullptr;
+    }
+  }
+
+  void NotifyQueuedChanges(MediaTrackGraph* aGraph, TrackTime aTrackOffset,
+                           const MediaSegment& aQueuedMedia) override {
+    TRACE_COMMENT("MediaEncoder::NotifyQueuedChanges", "%p",
+                  mMediaEncoder.get());
+    MOZ_ASSERT(mMediaEncoder);
+    MOZ_ASSERT(mEncoderThread);
+
+    if (!mInitialized) {
+      mDriftCompensator->NotifyAudioStart(TimeStamp::Now());
+      mInitialized = true;
+    }
+
+    mDriftCompensator->NotifyAudio(aQueuedMedia.GetDuration());
+
+    const AudioSegment& audio = static_cast<const AudioSegment&>(aQueuedMedia);
+
+    AudioSegment copy;
+    copy.AppendSlice(audio, 0, audio.GetDuration());
+
+    nsresult rv = mEncoderThread->Dispatch(NS_NewRunnableFunction(
+        "mozilla::AudioTrackEncoder::AppendAudioSegment",
+        [encoder = mMediaEncoder, copy = std::move(copy)]() mutable {
+          encoder->mAudioEncoder->AppendAudioSegment(std::move(copy));
+        }));
+    MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+    Unused << rv;
+  }
+
+  void NotifyEnded(MediaTrackGraph* aGraph) override {
+    MOZ_ASSERT(mMediaEncoder);
+    MOZ_ASSERT(mMediaEncoder->mAudioEncoder);
+    MOZ_ASSERT(mEncoderThread);
+
+    nsresult rv = mEncoderThread->Dispatch(
+        NS_NewRunnableFunction("mozilla::AudioTrackEncoder::NotifyEndOfStream",
+                               [encoder = mMediaEncoder] {
+                                 encoder->mAudioEncoder->NotifyEndOfStream();
+                               }));
+    MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+    Unused << rv;
+  }
+
+  void NotifyRemoved(MediaTrackGraph* aGraph) override {
+    nsresult rv = mEncoderThread->Dispatch(
+        NS_NewRunnableFunction("mozilla::AudioTrackEncoder::NotifyEndOfStream",
+                               [encoder = mMediaEncoder] {
+                                 encoder->mAudioEncoder->NotifyEndOfStream();
+                               }));
+    MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+    Unused << rv;
+
+    mRemoved = true;
+
+    if (!mDirectConnected) {
+      mMediaEncoder = nullptr;
+      mEncoderThread = nullptr;
+    }
+
+    mShutdownHolder.Resolve(true, __func__);
+  }
+
+  const RefPtr<GenericNonExclusivePromise>& OnShutdown() const {
+    return mShutdownPromise;
+  }
+
+ private:
+  bool mDirectConnected;
+  bool mInitialized;
+  bool mRemoved;
+  const RefPtr<DriftCompensator> mDriftCompensator;
+  RefPtr<MediaEncoder> mMediaEncoder;
+  RefPtr<TaskQueue> mEncoderThread;
+  MozPromiseHolder<GenericNonExclusivePromise> mShutdownHolder;
+  const RefPtr<GenericNonExclusivePromise> mShutdownPromise;
+};
+
+class MediaEncoder::VideoTrackListener : public DirectMediaTrackListener {
+ public:
+  explicit VideoTrackListener(RefPtr<MediaEncoder> aMediaEncoder)
+      : mDirectConnected(false),
+        mInitialized(false),
+        mRemoved(false),
+        mPendingAdvanceCurrentTime(false),
+        mMediaEncoder(std::move(aMediaEncoder)),
+        mEncoderThread(mMediaEncoder->mEncoderThread),
+        mShutdownPromise(mShutdownHolder.Ensure(__func__)) {
+    MOZ_ASSERT(mMediaEncoder);
+    MOZ_ASSERT(mEncoderThread);
+  }
+
+  void NotifyDirectListenerInstalled(InstallationResult aResult) override {
+    if (aResult == InstallationResult::SUCCESS) {
+      LOG(LogLevel::Info, ("Video track direct listener installed"));
+      mDirectConnected = true;
+    } else {
+      LOG(LogLevel::Info, ("Video track failed to install direct listener"));
+      MOZ_ASSERT(!mDirectConnected);
+      return;
+    }
+  }
+
+  void NotifyDirectListenerUninstalled() override {
+    mDirectConnected = false;
+
+    if (mRemoved) {
+      mMediaEncoder = nullptr;
+      mEncoderThread = nullptr;
+    }
+  }
+
+  void NotifyQueuedChanges(MediaTrackGraph* aGraph, TrackTime aTrackOffset,
+                           const MediaSegment& aQueuedMedia) override {
+    TRACE_COMMENT("MediaEncoder::NotifyQueuedChanges", "%p",
+                  mMediaEncoder.get());
+    MOZ_ASSERT(mMediaEncoder);
+    MOZ_ASSERT(mMediaEncoder->mVideoEncoder);
+    MOZ_ASSERT(mEncoderThread);
+
+    mCurrentTime = TimeStamp::Now();
+    if (!mInitialized) {
+      nsresult rv = mEncoderThread->Dispatch(
+          NS_NewRunnableFunction("mozilla::VideoTrackEncoder::SetStartOffset",
+                                 [encoder = mMediaEncoder, now = mCurrentTime] {
+                                   encoder->mVideoEncoder->SetStartOffset(now);
+                                 }));
+      MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+      Unused << rv;
+      mInitialized = true;
+    }
+
+    if (!mPendingAdvanceCurrentTime) {
+      mPendingAdvanceCurrentTime = true;
+      nsresult rv = mEncoderThread->Dispatch(NS_NewRunnableFunction(
+          "mozilla::VideoTrackEncoder::AdvanceCurrentTime",
+          [encoder = mMediaEncoder, now = mCurrentTime] {
+            encoder->mVideoListener->mPendingAdvanceCurrentTime = false;
+            encoder->mVideoEncoder->AdvanceCurrentTime(now);
+          }));
+      MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+      Unused << rv;
+    }
+  }
+
+  void NotifyRealtimeTrackData(MediaTrackGraph* aGraph, TrackTime aTrackOffset,
+                               const MediaSegment& aMedia) override {
+    TRACE_COMMENT("MediaEncoder::NotifyRealtimeTrackData", "%p",
+                  mMediaEncoder.get());
+    MOZ_ASSERT(mMediaEncoder);
+    MOZ_ASSERT(mMediaEncoder->mVideoEncoder);
+    MOZ_ASSERT(mEncoderThread);
+    MOZ_ASSERT(aMedia.GetType() == MediaSegment::VIDEO);
+
+    const VideoSegment& video = static_cast<const VideoSegment&>(aMedia);
+    VideoSegment copy;
+    for (VideoSegment::ConstChunkIterator iter(video); !iter.IsEnded();
+         iter.Next()) {
+      copy.AppendFrame(do_AddRef(iter->mFrame.GetImage()),
+                       iter->mFrame.GetIntrinsicSize(),
+                       iter->mFrame.GetPrincipalHandle(),
+                       iter->mFrame.GetForceBlack(), iter->mTimeStamp);
+    }
+
+    nsresult rv = mEncoderThread->Dispatch(NS_NewRunnableFunction(
+        "mozilla::VideoTrackEncoder::AppendVideoSegment",
+        [encoder = mMediaEncoder, copy = std::move(copy)]() mutable {
+          encoder->mVideoEncoder->AppendVideoSegment(std::move(copy));
+        }));
+    MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+    Unused << rv;
+  }
+
+  void NotifyEnabledStateChanged(MediaTrackGraph* aGraph,
+                                 bool aEnabled) override {
+    MOZ_ASSERT(mMediaEncoder);
+    MOZ_ASSERT(mMediaEncoder->mVideoEncoder);
+    MOZ_ASSERT(mEncoderThread);
+
+    nsresult rv;
+    if (aEnabled) {
+      rv = mEncoderThread->Dispatch(NS_NewRunnableFunction(
+          "mozilla::VideoTrackEncoder::Enable",
+          [encoder = mMediaEncoder, now = TimeStamp::Now()] {
+            encoder->mVideoEncoder->Enable(now);
+          }));
+    } else {
+      rv = mEncoderThread->Dispatch(NS_NewRunnableFunction(
+          "mozilla::VideoTrackEncoder::Disable",
+          [encoder = mMediaEncoder, now = TimeStamp::Now()] {
+            encoder->mVideoEncoder->Disable(now);
+          }));
+    }
+    MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+    Unused << rv;
+  }
+
+  void NotifyEnded(MediaTrackGraph* aGraph) override {
+    MOZ_ASSERT(mMediaEncoder);
+    MOZ_ASSERT(mMediaEncoder->mVideoEncoder);
+    MOZ_ASSERT(mEncoderThread);
+
+    nsresult rv = mEncoderThread->Dispatch(NS_NewRunnableFunction(
+        "mozilla::VideoTrackEncoder::NotifyEndOfStream",
+        [encoder = mMediaEncoder, now = mCurrentTime] {
+          if (!now.IsNull()) {
+            encoder->mVideoEncoder->AdvanceCurrentTime(now);
+          }
+          encoder->mVideoEncoder->NotifyEndOfStream();
+        }));
+    MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+    Unused << rv;
+  }
+
+  void NotifyRemoved(MediaTrackGraph* aGraph) override {
+    nsresult rv = mEncoderThread->Dispatch(NS_NewRunnableFunction(
+        "mozilla::VideoTrackEncoder::NotifyEndOfStream",
+        [encoder = mMediaEncoder, now = mCurrentTime] {
+          if (!now.IsNull()) {
+            encoder->mVideoEncoder->AdvanceCurrentTime(now);
+          }
+          encoder->mVideoEncoder->NotifyEndOfStream();
+        }));
+    MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+    Unused << rv;
+
+    mRemoved = true;
+
+    if (!mDirectConnected) {
+      mMediaEncoder = nullptr;
+      mEncoderThread = nullptr;
+    }
+
+    mShutdownHolder.Resolve(true, __func__);
+  }
+
+  const RefPtr<GenericNonExclusivePromise>& OnShutdown() const {
+    return mShutdownPromise;
+  }
+
+ private:
+  bool mDirectConnected;
+  bool mInitialized;
+  bool mRemoved;
+  TimeStamp mCurrentTime;
+  Atomic<bool> mPendingAdvanceCurrentTime;
+  RefPtr<MediaEncoder> mMediaEncoder;
+  RefPtr<TaskQueue> mEncoderThread;
+  MozPromiseHolder<GenericNonExclusivePromise> mShutdownHolder;
+  const RefPtr<GenericNonExclusivePromise> mShutdownPromise;
+};
+
+class MediaEncoder::EncoderListener : public TrackEncoderListener {
+ public:
+  EncoderListener(TaskQueue* aEncoderThread, MediaEncoder* aEncoder)
+      : mEncoderThread(aEncoderThread), mEncoder(aEncoder) {}
+
+  void Forget() {
+    MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+    mEncoder = nullptr;
+  }
+
+  void Initialized(TrackEncoder* aTrackEncoder) override {
+    MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+    MOZ_ASSERT(aTrackEncoder->IsInitialized());
+
+    if (!mEncoder) {
+      return;
+    }
+
+    mEncoder->UpdateInitialized();
+  }
+
+  void Started(TrackEncoder* aTrackEncoder) override {
+    MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+    MOZ_ASSERT(aTrackEncoder->IsStarted());
+
+    if (!mEncoder) {
+      return;
+    }
+
+    mEncoder->UpdateStarted();
+  }
+
+  void Error(TrackEncoder* aTrackEncoder) override {
+    MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+
+    if (!mEncoder) {
+      return;
+    }
+
+    mEncoder->SetError();
+  }
+
+ protected:
+  RefPtr<TaskQueue> mEncoderThread;
+  RefPtr<MediaEncoder> mEncoder;
+};
+
+MediaEncoder::MediaEncoder(
+    RefPtr<TaskQueue> aEncoderThread,
+    RefPtr<DriftCompensator> aDriftCompensator,
+    UniquePtr<ContainerWriter> aWriter,
+    UniquePtr<AudioTrackEncoder> aAudioEncoder,
+    UniquePtr<VideoTrackEncoder> aVideoEncoder,
+    UniquePtr<MediaQueue<EncodedFrame>> aEncodedAudioQueue,
+    UniquePtr<MediaQueue<EncodedFrame>> aEncodedVideoQueue,
+    TrackRate aTrackRate, const nsAString& aMimeType, uint64_t aMaxMemory,
+    TimeDuration aTimeslice)
+    : mMainThread(GetMainThreadSerialEventTarget()),
+      mEncoderThread(std::move(aEncoderThread)),
+      mEncodedAudioQueue(std::move(aEncodedAudioQueue)),
+      mEncodedVideoQueue(std::move(aEncodedVideoQueue)),
+      mMuxer(MakeUnique<Muxer>(std::move(aWriter), *mEncodedAudioQueue,
+                               *mEncodedVideoQueue)),
+      mAudioEncoder(std::move(aAudioEncoder)),
+      mAudioListener(mAudioEncoder ? MakeAndAddRef<AudioTrackListener>(
+                                         std::move(aDriftCompensator), this)
+                                   : nullptr),
+      mVideoEncoder(std::move(aVideoEncoder)),
+      mVideoListener(mVideoEncoder ? MakeAndAddRef<VideoTrackListener>(this)
+                                   : nullptr),
+      mEncoderListener(MakeAndAddRef<EncoderListener>(mEncoderThread, this)),
+      mMimeType(aMimeType),
+      mMaxMemory(aMaxMemory),
+      mTimeslice(aTimeslice),
+      mStartTime(TimeStamp::Now()),
+      mInitialized(false),
+      mStarted(false),
+      mCompleted(false),
+      mError(false) {
+  if (!mAudioEncoder) {
+    mMuxedAudioEndTime = TimeUnit::FromInfinity();
+    mEncodedAudioQueue->Finish();
+  }
+  if (!mVideoEncoder) {
+    mMuxedVideoEndTime = TimeUnit::FromInfinity();
+    mEncodedVideoQueue->Finish();
+  }
+}
+
+void MediaEncoder::RegisterListeners() {
+  if (mAudioEncoder) {
+    mAudioPushListener = mEncodedAudioQueue->PushEvent().Connect(
+        mEncoderThread, this, &MediaEncoder::OnEncodedAudioPushed);
+    mAudioFinishListener = mEncodedAudioQueue->FinishEvent().Connect(
+        mEncoderThread, this, &MediaEncoder::MaybeShutdown);
+    MOZ_ALWAYS_SUCCEEDS(mEncoderThread->Dispatch(NS_NewRunnableFunction(
+        "mozilla::AudioTrackEncoder::RegisterListener",
+        [self = RefPtr<MediaEncoder>(this), this] {
+          mAudioEncoder->RegisterListener(mEncoderListener);
+        })));
+  }
+  if (mVideoEncoder) {
+    mVideoPushListener = mEncodedVideoQueue->PushEvent().Connect(
+        mEncoderThread, this, &MediaEncoder::OnEncodedVideoPushed);
+    mVideoFinishListener = mEncodedVideoQueue->FinishEvent().Connect(
+        mEncoderThread, this, &MediaEncoder::MaybeShutdown);
+    MOZ_ALWAYS_SUCCEEDS(mEncoderThread->Dispatch(NS_NewRunnableFunction(
+        "mozilla::VideoTrackEncoder::RegisterListener",
+        [self = RefPtr<MediaEncoder>(this), this] {
+          mVideoEncoder->RegisterListener(mEncoderListener);
+        })));
+  }
+}
+
+MediaEncoder::~MediaEncoder() {
+  MOZ_ASSERT(!mAudioTrack);
+  MOZ_ASSERT(!mVideoTrack);
+  MOZ_ASSERT(!mAudioNode);
+  MOZ_ASSERT(!mInputPort);
+  MOZ_ASSERT(!mPipeTrack);
+}
+
+void MediaEncoder::EnsureGraphTrackFrom(MediaTrack* aTrack) {
+  if (mGraphTrack) {
+    return;
+  }
+  MOZ_DIAGNOSTIC_ASSERT(!aTrack->IsDestroyed());
+  mGraphTrack = MakeAndAddRef<SharedDummyTrack>(
+      aTrack->Graph()->CreateSourceTrack(MediaSegment::VIDEO));
+}
+
+void MediaEncoder::Suspend() {
+  mGraphTrack->mTrack->QueueControlMessageWithNoShutdown(
+      [self = RefPtr<MediaEncoder>(this), this] {
+        TRACE("MediaEncoder::Suspend (graph)");
+        if (NS_FAILED(mEncoderThread->Dispatch(
+                NS_NewRunnableFunction("MediaEncoder::Suspend (encoder)",
+                                       [self, this, now = TimeStamp::Now()] {
+                                         if (mAudioEncoder) {
+                                           mAudioEncoder->Suspend();
+                                         }
+                                         if (mVideoEncoder) {
+                                           mVideoEncoder->Suspend(now);
+                                         }
+                                       })))) {
+          // QueueControlMessageWithNoShutdown added an extra async step, and
+          // now `thread` has shut down.
+          return;
+        }
+      });
+}
+
+void MediaEncoder::Resume() {
+  mGraphTrack->mTrack->QueueControlMessageWithNoShutdown(
+      [self = RefPtr<MediaEncoder>(this), this] {
+        TRACE("MediaEncoder::Resume (graph)");
+        if (NS_FAILED(mEncoderThread->Dispatch(
+                NS_NewRunnableFunction("MediaEncoder::Resume (encoder)",
+                                       [self, this, now = TimeStamp::Now()] {
+                                         if (mAudioEncoder) {
+                                           mAudioEncoder->Resume();
+                                         }
+                                         if (mVideoEncoder) {
+                                           mVideoEncoder->Resume(now);
+                                         }
+                                       })))) {
+          // QueueControlMessageWithNoShutdown added an extra async step, and
+          // now `thread` has shut down.
+          return;
+        }
+      });
+}
+
+void MediaEncoder::ConnectAudioNode(AudioNode* aNode, uint32_t aOutput) {
+  MOZ_ASSERT(NS_IsMainThread());
+
+  if (mAudioNode) {
+    MOZ_ASSERT(false, "Only one audio node supported");
+    return;
+  }
+
+  // Only AudioNodeTrack of kind EXTERNAL_OUTPUT stores output audio data in
+  // the track (see AudioNodeTrack::AdvanceOutputSegment()). That means
+  // forwarding input track in recorder session won't be able to copy data from
+  // the track of non-destination node. Create a pipe track in this case.
+  if (aNode->NumberOfOutputs() > 0) {
+    AudioContext* ctx = aNode->Context();
+    AudioNodeEngine* engine = new AudioNodeEngine(nullptr);
+    AudioNodeTrack::Flags flags = AudioNodeTrack::EXTERNAL_OUTPUT |
+                                  AudioNodeTrack::NEED_MAIN_THREAD_ENDED;
+    mPipeTrack = AudioNodeTrack::Create(ctx, engine, flags, ctx->Graph());
+    AudioNodeTrack* ns = aNode->GetTrack();
+    if (ns) {
+      mInputPort = mPipeTrack->AllocateInputPort(aNode->GetTrack(), 0, aOutput);
+    }
+  }
+
+  mAudioNode = aNode;
+
+  if (mPipeTrack) {
+    mPipeTrack->AddListener(mAudioListener);
+    EnsureGraphTrackFrom(mPipeTrack);
+  } else {
+    mAudioNode->GetTrack()->AddListener(mAudioListener);
+    EnsureGraphTrackFrom(mAudioNode->GetTrack());
+  }
+}
+
+void MediaEncoder::ConnectMediaStreamTrack(MediaStreamTrack* aTrack) {
+  MOZ_ASSERT(NS_IsMainThread());
+
+  if (aTrack->Ended()) {
+    MOZ_ASSERT_UNREACHABLE("Cannot connect ended track");
+    return;
+  }
+
+  EnsureGraphTrackFrom(aTrack->GetTrack());
+
+  if (AudioStreamTrack* audio = aTrack->AsAudioStreamTrack()) {
+    if (!mAudioEncoder) {
+      // No audio encoder for this audio track. It could be disabled.
+      LOG(LogLevel::Warning, ("Cannot connect to audio track - no encoder"));
+      return;
+    }
+
+    MOZ_ASSERT(!mAudioTrack, "Only one audio track supported.");
+    MOZ_ASSERT(mAudioListener, "No audio listener for this audio track");
+
+    LOG(LogLevel::Info, ("Connected to audio track %p", aTrack));
+
+    mAudioTrack = audio;
+    audio->AddListener(mAudioListener);
+  } else if (VideoStreamTrack* video = aTrack->AsVideoStreamTrack()) {
+    if (!mVideoEncoder) {
+      // No video encoder for this video track. It could be disabled.
+      LOG(LogLevel::Warning, ("Cannot connect to video track - no encoder"));
+      return;
+    }
+
+    MOZ_ASSERT(!mVideoTrack, "Only one video track supported.");
+    MOZ_ASSERT(mVideoListener, "No video listener for this video track");
+
+    LOG(LogLevel::Info, ("Connected to video track %p", aTrack));
+
+    mVideoTrack = video;
+    video->AddDirectListener(mVideoListener);
+    video->AddListener(mVideoListener);
+  } else {
+    MOZ_ASSERT(false, "Unknown track type");
+  }
+}
+
+void MediaEncoder::RemoveMediaStreamTrack(MediaStreamTrack* aTrack) {
+  if (!aTrack) {
+    MOZ_ASSERT(false);
+    return;
+  }
+
+  if (AudioStreamTrack* audio = aTrack->AsAudioStreamTrack()) {
+    if (audio != mAudioTrack) {
+      MOZ_ASSERT(false, "Not connected to this audio track");
+      return;
+    }
+
+    if (mAudioListener) {
+      audio->RemoveDirectListener(mAudioListener);
+      audio->RemoveListener(mAudioListener);
+    }
+    mAudioTrack = nullptr;
+  } else if (VideoStreamTrack* video = aTrack->AsVideoStreamTrack()) {
+    if (video != mVideoTrack) {
+      MOZ_ASSERT(false, "Not connected to this video track");
+      return;
+    }
+
+    if (mVideoListener) {
+      video->RemoveDirectListener(mVideoListener);
+      video->RemoveListener(mVideoListener);
+    }
+    mVideoTrack = nullptr;
+  }
+}
+
+/* static */
+already_AddRefed<MediaEncoder> MediaEncoder::CreateEncoder(
+    RefPtr<TaskQueue> aEncoderThread, const nsAString& aMimeType,
+    uint32_t aAudioBitrate, uint32_t aVideoBitrate, uint8_t aTrackTypes,
+    TrackRate aTrackRate, uint64_t aMaxMemory, TimeDuration aTimeslice) {
+  AUTO_PROFILER_LABEL("MediaEncoder::CreateEncoder", OTHER);
+
+  UniquePtr<ContainerWriter> writer;
+  UniquePtr<AudioTrackEncoder> audioEncoder;
+  UniquePtr<VideoTrackEncoder> videoEncoder;
+  auto encodedAudioQueue = MakeUnique<MediaQueue<EncodedFrame>>();
+  auto encodedVideoQueue = MakeUnique<MediaQueue<EncodedFrame>>();
+  auto driftCompensator =
+      MakeRefPtr<DriftCompensator>(aEncoderThread, aTrackRate);
+
+  Maybe<MediaContainerType> mimeType = MakeMediaContainerType(aMimeType);
+  if (!mimeType) {
+    return nullptr;
+  }
+
+  for (const auto& codec : mimeType->ExtendedType().Codecs().Range()) {
+    if (codec.EqualsLiteral("opus")) {
+      MOZ_ASSERT(!audioEncoder);
+      audioEncoder =
+          MakeUnique<OpusTrackEncoder>(aTrackRate, *encodedAudioQueue);
+    } else if (codec.EqualsLiteral("vp8") || codec.EqualsLiteral("vp8.0")) {
+      MOZ_ASSERT(!videoEncoder);
+      if (Preferences::GetBool("media.recorder.video.frame_drops", true)) {
+        videoEncoder = MakeUnique<VP8TrackEncoder>(driftCompensator, aTrackRate,
+                                                   *encodedVideoQueue,
+                                                   FrameDroppingMode::ALLOW);
+      } else {
+        videoEncoder = MakeUnique<VP8TrackEncoder>(driftCompensator, aTrackRate,
+                                                   *encodedVideoQueue,
+                                                   FrameDroppingMode::DISALLOW);
+      }
+    } else {
+      MOZ_CRASH("Unknown codec");
+    }
+  }
+
+  if (mimeType->Type() == MEDIAMIMETYPE(VIDEO_WEBM) ||
+      mimeType->Type() == MEDIAMIMETYPE(AUDIO_WEBM)) {
+    MOZ_ASSERT_IF(mimeType->Type() == MEDIAMIMETYPE(AUDIO_WEBM), !videoEncoder);
+    writer = MakeUnique<WebMWriter>();
+  } else if (mimeType->Type() == MEDIAMIMETYPE(AUDIO_OGG)) {
+    MOZ_ASSERT(audioEncoder);
+    MOZ_ASSERT(!videoEncoder);
+    writer = MakeUnique<OggWriter>();
+  }
+  NS_ENSURE_TRUE(writer, nullptr);
+
+  LOG(LogLevel::Info,
+      ("Create encoder result:a[%p](%u bps) v[%p](%u bps) w[%p] mimeType = "
+       "%s.",
+       audioEncoder.get(), aAudioBitrate, videoEncoder.get(), aVideoBitrate,
+       writer.get(), NS_ConvertUTF16toUTF8(aMimeType).get()));
+
+  if (audioEncoder) {
+    audioEncoder->SetWorkerThread(aEncoderThread);
+    if (aAudioBitrate != 0) {
+      audioEncoder->SetBitrate(aAudioBitrate);
+    }
+  }
+  if (videoEncoder) {
+    videoEncoder->SetWorkerThread(aEncoderThread);
+    if (aVideoBitrate != 0) {
+      videoEncoder->SetBitrate(aVideoBitrate);
+    }
+  }
+  RefPtr<MediaEncoder> encoder = new MediaEncoder(
+      std::move(aEncoderThread), std::move(driftCompensator), std::move(writer),
+      std::move(audioEncoder), std::move(videoEncoder),
+      std::move(encodedAudioQueue), std::move(encodedVideoQueue), aTrackRate,
+      aMimeType, aMaxMemory, aTimeslice);
+
+  encoder->RegisterListeners();
+
+  return encoder.forget();
+}
+
+nsresult MediaEncoder::GetEncodedData(
+    nsTArray<nsTArray<uint8_t>>* aOutputBufs) {
+  AUTO_PROFILER_LABEL("MediaEncoder::GetEncodedData", OTHER);
+
+  MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+
+  LOG(LogLevel::Verbose,
+      ("GetEncodedData TimeStamp = %f", GetEncodeTimeStamp()));
+
+  if (!mInitialized) {
+    return NS_ERROR_NOT_INITIALIZED;
+  }
+
+  nsresult rv = mMuxer->GetData(aOutputBufs);
+  if (mMuxer->IsFinished()) {
+    mCompleted = true;
+  }
+
+  LOG(LogLevel::Verbose,
+      ("END GetEncodedData TimeStamp=%f "
+       "mCompleted=%d, aComplete=%d, vComplete=%d",
+       GetEncodeTimeStamp(), mCompleted,
+       !mAudioEncoder || mAudioEncoder->IsEncodingComplete(),
+       !mVideoEncoder || mVideoEncoder->IsEncodingComplete()));
+
+  return rv;
+}
+
+void MediaEncoder::MaybeShutdown() {
+  MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+  if (!mEncodedAudioQueue->IsFinished()) {
+    LOG(LogLevel::Debug,
+        ("MediaEncoder %p not shutting down, audio is still live", this));
+    return;
+  }
+
+  if (!mEncodedVideoQueue->IsFinished()) {
+    LOG(LogLevel::Debug,
+        ("MediaEncoder %p not shutting down, video is still live", this));
+    return;
+  }
+
+  mShutdownEvent.Notify();
+
+  // Stop will Shutdown() gracefully.
+  Unused << InvokeAsync(mMainThread, this, __func__, &MediaEncoder::Stop);
+}
+
+RefPtr<GenericNonExclusivePromise> MediaEncoder::Shutdown() {
+  MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+  if (mShutdownPromise) {
+    return mShutdownPromise;
+  }
+
+  LOG(LogLevel::Info, ("MediaEncoder is shutting down."));
+
+  AutoTArray<RefPtr<GenericNonExclusivePromise>, 2> shutdownPromises;
+  if (mAudioListener) {
+    shutdownPromises.AppendElement(mAudioListener->OnShutdown());
+  }
+  if (mVideoListener) {
+    shutdownPromises.AppendElement(mVideoListener->OnShutdown());
+  }
+
+  mShutdownPromise =
+      GenericNonExclusivePromise::All(mEncoderThread, shutdownPromises)
+          ->Then(mEncoderThread, __func__,
+                 [](const GenericNonExclusivePromise::AllPromiseType::
+                        ResolveOrRejectValue& aValue) {
+                   if (aValue.IsResolve()) {
+                     return GenericNonExclusivePromise::CreateAndResolve(
+                         true, __func__);
+                   }
+                   return GenericNonExclusivePromise::CreateAndReject(
+                       aValue.RejectValue(), __func__);
+                 });
+
+  mShutdownPromise->Then(
+      mEncoderThread, __func__, [self = RefPtr<MediaEncoder>(this), this] {
+        if (mAudioEncoder) {
+          mAudioEncoder->UnregisterListener(mEncoderListener);
+        }
+        if (mVideoEncoder) {
+          mVideoEncoder->UnregisterListener(mEncoderListener);
+        }
+        mEncoderListener->Forget();
+        mMuxer->Disconnect();
+        mAudioPushListener.DisconnectIfExists();
+        mAudioFinishListener.DisconnectIfExists();
+        mVideoPushListener.DisconnectIfExists();
+        mVideoFinishListener.DisconnectIfExists();
+      });
+
+  return mShutdownPromise;
+}
+
+RefPtr<GenericNonExclusivePromise> MediaEncoder::Stop() {
+  MOZ_ASSERT(NS_IsMainThread());
+
+  LOG(LogLevel::Info, ("MediaEncoder %p Stop", this));
+
+  DisconnectTracks();
+
+  return InvokeAsync(mEncoderThread, this, __func__, &MediaEncoder::Shutdown);
+}
+
+RefPtr<GenericNonExclusivePromise> MediaEncoder::Cancel() {
+  MOZ_ASSERT(NS_IsMainThread());
+
+  LOG(LogLevel::Info, ("MediaEncoder %p Cancel", this));
+
+  DisconnectTracks();
+
+  return InvokeAsync(mEncoderThread, __func__,
+                     [self = RefPtr<MediaEncoder>(this), this]() {
+                       if (mAudioEncoder) {
+                         mAudioEncoder->Cancel();
+                       }
+                       if (mVideoEncoder) {
+                         mVideoEncoder->Cancel();
+                       }
+                       return Shutdown();
+                     });
+}
+
+bool MediaEncoder::HasError() {
+  MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+  return mError;
+}
+
+void MediaEncoder::SetError() {
+  MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+
+  if (mError) {
+    return;
+  }
+
+  mError = true;
+  mErrorEvent.Notify();
+}
+
+auto MediaEncoder::RequestData() -> RefPtr<BlobPromise> {
+  MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+  TimeUnit muxedEndTime = std::min(mMuxedAudioEndTime, mMuxedVideoEndTime);
+  mLastBlobTime = muxedEndTime;
+  mLastExtractTime = muxedEndTime;
+  return Extract()->Then(
+      mMainThread, __func__,
+      [this, self = RefPtr<MediaEncoder>(this)](
+          const GenericPromise::ResolveOrRejectValue& aValue) {
+        // Even if rejected, we want to gather what has already been
+        // extracted into the current blob and expose that.
+        Unused << NS_WARN_IF(aValue.IsReject());
+        return GatherBlob();
+      });
+}
+
+void MediaEncoder::MaybeCreateMutableBlobStorage() {
+  MOZ_ASSERT(NS_IsMainThread());
+  if (!mMutableBlobStorage) {
+    mMutableBlobStorage = new MutableBlobStorage(
+        MutableBlobStorage::eCouldBeInTemporaryFile, nullptr, mMaxMemory);
+  }
+}
+
+void MediaEncoder::OnEncodedAudioPushed(const RefPtr<EncodedFrame>& aFrame) {
+  MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+  mMuxedAudioEndTime = aFrame->GetEndTime();
+  MaybeExtractOrGatherBlob();
+}
+
+void MediaEncoder::OnEncodedVideoPushed(const RefPtr<EncodedFrame>& aFrame) {
+  MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+  mMuxedVideoEndTime = aFrame->GetEndTime();
+  MaybeExtractOrGatherBlob();
+}
+
+void MediaEncoder::MaybeExtractOrGatherBlob() {
+  MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+
+  TimeUnit muxedEndTime = std::min(mMuxedAudioEndTime, mMuxedVideoEndTime);
+  if ((muxedEndTime - mLastBlobTime).ToTimeDuration() >= mTimeslice) {
+    LOG(LogLevel::Verbose, ("MediaEncoder %p Muxed %.2fs of data since last "
+                            "blob. Issuing new blob.",
+                            this, (muxedEndTime - mLastBlobTime).ToSeconds()));
+    RequestData()->Then(mEncoderThread, __func__,
+                        [this, self = RefPtr<MediaEncoder>(this)](
+                            const BlobPromise::ResolveOrRejectValue& aValue) {
+                          if (aValue.IsReject()) {
+                            SetError();
+                            return;
+                          }
+                          RefPtr<BlobImpl> blob = aValue.ResolveValue();
+                          mDataAvailableEvent.Notify(std::move(blob));
+                        });
+  }
+
+  if (muxedEndTime - mLastExtractTime > TimeUnit::FromSeconds(1)) {
+    // Extract data from the muxer at least every second.
+    LOG(LogLevel::Verbose,
+        ("MediaEncoder %p Muxed %.2fs of data since last "
+         "extract. Extracting more data into blob.",
+         this, (muxedEndTime - mLastExtractTime).ToSeconds()));
+    mLastExtractTime = muxedEndTime;
+    Unused << Extract();
+  }
+}
+
+// Pull encoded media data from MediaEncoder and put into MutableBlobStorage.
+RefPtr<GenericPromise> MediaEncoder::Extract() {
+  MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+
+  LOG(LogLevel::Debug, ("MediaEncoder %p Extract", this));
+
+  AUTO_PROFILER_LABEL("MediaEncoder::Extract", OTHER);
+
+  // Pull encoded media data from MediaEncoder
+  nsTArray<nsTArray<uint8_t>> buffer;
+  nsresult rv = GetEncodedData(&buffer);
+  MOZ_ASSERT(rv != NS_ERROR_INVALID_ARG, "Invalid args can be prevented.");
+  if (NS_FAILED(rv)) {
+    MOZ_RELEASE_ASSERT(buffer.IsEmpty());
+    // Even if we failed to encode more data, it might be time to push a blob
+    // with already encoded data.
+  }
+
+  // To ensure Extract() promises are resolved in calling order, we always
+  // invoke the main thread. Even when the encoded buffer is empty.
+  return InvokeAsync(
+      mMainThread, __func__,
+      [self = RefPtr<MediaEncoder>(this), this, buffer = std::move(buffer)] {
+        MaybeCreateMutableBlobStorage();
+        for (const auto& part : buffer) {
+          if (part.IsEmpty()) {
+            continue;
+          }
+
+          nsresult rv =
+              mMutableBlobStorage->Append(part.Elements(), part.Length());
+          if (NS_WARN_IF(NS_FAILED(rv))) {
+            return GenericPromise::CreateAndReject(rv, __func__);
+          }
+        }
+        return GenericPromise::CreateAndResolve(true, __func__);
+      });
+}
+
+auto MediaEncoder::GatherBlob() -> RefPtr<BlobPromise> {
+  MOZ_ASSERT(NS_IsMainThread());
+  if (!mBlobPromise) {
+    return mBlobPromise = GatherBlobImpl();
+  }
+  return mBlobPromise = mBlobPromise->Then(mMainThread, __func__,
+                                           [self = RefPtr<MediaEncoder>(this)] {
+                                             return self->GatherBlobImpl();
+                                           });
+}
+
+auto MediaEncoder::GatherBlobImpl() -> RefPtr<BlobPromise> {
+  RefPtr<BlobStorer> storer = MakeAndAddRef<BlobStorer>();
+  MaybeCreateMutableBlobStorage();
+  mMutableBlobStorage->GetBlobImplWhenReady(NS_ConvertUTF16toUTF8(mMimeType),
+                                            storer);
+  mMutableBlobStorage = nullptr;
+
+  storer->Promise()->Then(
+      mMainThread, __func__,
+      [self = RefPtr<MediaEncoder>(this), p = storer->Promise()] {
+        if (self->mBlobPromise == p) {
+          // Reset BlobPromise.
+          self->mBlobPromise = nullptr;
+        }
+      });
+
+  return storer->Promise();
+}
+
+void MediaEncoder::DisconnectTracks() {
+  MOZ_ASSERT(NS_IsMainThread());
+
+  if (mAudioNode) {
+    mAudioNode->GetTrack()->RemoveListener(mAudioListener);
+    if (mInputPort) {
+      mInputPort->Destroy();
+      mInputPort = nullptr;
+    }
+    if (mPipeTrack) {
+      mPipeTrack->RemoveListener(mAudioListener);
+      mPipeTrack->Destroy();
+      mPipeTrack = nullptr;
+    }
+    mAudioNode = nullptr;
+  }
+
+  if (mAudioTrack) {
+    RemoveMediaStreamTrack(mAudioTrack);
+  }
+
+  if (mVideoTrack) {
+    RemoveMediaStreamTrack(mVideoTrack);
+  }
+}
+
+bool MediaEncoder::IsWebMEncoderEnabled() {
+  return StaticPrefs::media_encoder_webm_enabled();
+}
+
+void MediaEncoder::UpdateInitialized() {
+  MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+
+  if (mInitialized) {
+    // This could happen if an encoder re-inits due to a resolution change.
+    return;
+  }
+
+  if (mAudioEncoder && !mAudioEncoder->IsInitialized()) {
+    LOG(LogLevel::Debug,
+        ("MediaEncoder %p UpdateInitialized waiting for audio", this));
+    return;
+  }
+
+  if (mVideoEncoder && !mVideoEncoder->IsInitialized()) {
+    LOG(LogLevel::Debug,
+        ("MediaEncoder %p UpdateInitialized waiting for video", this));
+    return;
+  }
+
+  MOZ_ASSERT(mMuxer->NeedsMetadata());
+  nsTArray<RefPtr<TrackMetadataBase>> meta;
+  if (mAudioEncoder && !*meta.AppendElement(mAudioEncoder->GetMetadata())) {
+    LOG(LogLevel::Error, ("Audio metadata is null"));
+    SetError();
+    return;
+  }
+  if (mVideoEncoder && !*meta.AppendElement(mVideoEncoder->GetMetadata())) {
+    LOG(LogLevel::Error, ("Video metadata is null"));
+    SetError();
+    return;
+  }
+
+  if (NS_FAILED(mMuxer->SetMetadata(meta))) {
+    LOG(LogLevel::Error, ("SetMetadata failed"));
+    SetError();
+    return;
+  }
+
+  LOG(LogLevel::Info,
+      ("MediaEncoder %p UpdateInitialized set metadata in muxer", this));
+
+  mInitialized = true;
+}
+
+void MediaEncoder::UpdateStarted() {
+  MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+
+  if (mStarted) {
+    return;
+  }
+
+  if (mAudioEncoder && !mAudioEncoder->IsStarted()) {
+    return;
+  }
+
+  if (mVideoEncoder && !mVideoEncoder->IsStarted()) {
+    return;
+  }
+
+  mStarted = true;
+
+  // Start issuing timeslice-based blobs.
+  MOZ_ASSERT(mLastBlobTime == TimeUnit::Zero());
+
+  mStartedEvent.Notify();
+}
+
+/*
+ * SizeOfExcludingThis measures memory being used by the Media Encoder.
+ * Currently it measures the size of the Encoder buffer and memory occupied
+ * by mAudioEncoder, mVideoEncoder, and any current blob storage.
+ */
+auto MediaEncoder::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf)
+    -> RefPtr<SizeOfPromise> {
+  MOZ_ASSERT(NS_IsMainThread());
+  size_t blobStorageSize =
+      mMutableBlobStorage ? mMutableBlobStorage->SizeOfCurrentMemoryBuffer()
+                          : 0;
+
+  return InvokeAsync(
+      mEncoderThread, __func__,
+      [self = RefPtr<MediaEncoder>(this), this, blobStorageSize,
+       aMallocSizeOf]() {
+        size_t size = 0;
+        if (mAudioEncoder) {
+          size += mAudioEncoder->SizeOfExcludingThis(aMallocSizeOf);
+        }
+        if (mVideoEncoder) {
+          size += mVideoEncoder->SizeOfExcludingThis(aMallocSizeOf);
+        }
+        return SizeOfPromise::CreateAndResolve(blobStorageSize + size,
+                                               __func__);
+      });
+}
+
+}  // namespace mozilla
+
+#undef LOG
diff --git a/dom/media/encoder/MediaEncoder.h b/dom/media/encoder/MediaEncoder.h
new file mode 100644
index 0000000000..005d1f2dce
--- /dev/null
+++ b/dom/media/encoder/MediaEncoder.h
@@ -0,0 +1,401 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MediaEncoder_h_
+#define MediaEncoder_h_
+
+#include "ContainerWriter.h"
+#include "CubebUtils.h"
+#include "MediaQueue.h"
+#include "MediaTrackGraph.h"
+#include "MediaTrackListener.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/MemoryReporting.h"
+#include "mozilla/MozPromise.h"
+#include "mozilla/UniquePtr.h"
+#include "nsIMemoryReporter.h"
+#include "TrackEncoder.h"
+
+namespace mozilla {
+
+class DriftCompensator;
+class Muxer;
+class Runnable;
+class TaskQueue;
+
+namespace dom {
+class AudioNode;
+class AudioStreamTrack;
+class BlobImpl;
+class MediaStreamTrack;
+class MutableBlobStorage;
+class VideoStreamTrack;
+}  // namespace dom
+
+class DriftCompensator;
+
+/**
+ * MediaEncoder is the framework of encoding module, it controls and manages
+ * procedures between Muxer, ContainerWriter and TrackEncoder. ContainerWriter
+ * writes the encoded track data into a specific container (e.g. ogg, webm).
+ * AudioTrackEncoder and VideoTrackEncoder are subclasses of TrackEncoder, and
+ * are responsible for encoding raw data coming from MediaStreamTracks.
+ *
+ * MediaEncoder solves threading issues by doing message passing to a TaskQueue
+ * (the "encoder thread") as passed in to the constructor. Each
+ * MediaStreamTrack to be recorded is set up with a MediaTrackListener.
+ * Typically there are a non-direct track listeners for audio, direct listeners
+ * for video, and there is always a non-direct listener on each track for
+ * time-keeping. The listeners forward data to their corresponding TrackEncoders
+ * on the encoder thread.
+ *
+ * The MediaEncoder listens to events from all TrackEncoders, and in turn
+ * signals events to interested parties. Typically a MediaRecorder::Session.
+ * The MediaEncoder automatically encodes incoming data, muxes it, writes it
+ * into a container and stores the container data into a MutableBlobStorage.
+ * It is timeslice-aware so that it can notify listeners when it's time to
+ * expose a blob due to filling the timeslice.
+ *
+ * MediaEncoder is designed to be a passive component, neither does it own or is
+ * in charge of managing threads. Instead this is done by its owner.
+ *
+ * For example, usage from MediaRecorder of this component would be:
+ * 1) Create an encoder with a valid MIME type. Note that there are more
+ *    configuration options, see the docs on MediaEncoder::CreateEncoder.
+ *    => encoder = MediaEncoder::CreateEncoder(aMIMEType);
+ *    It then creates track encoders and the appropriate ContainerWriter
+ *    according to the MIME type
+ *
+ * 2) Connect handlers through MediaEventListeners to the MediaEncoder's
+ *    MediaEventSources, StartedEvent(), DataAvailableEvent(), ErrorEvent() and
+ *    ShutdownEvent().
+ *    => listener = encoder->DataAvailableEvent().Connect(mainThread, &OnBlob);
+ *
+ * 3) Connect the sources to be recorded. Either through:
+ *    => encoder->ConnectAudioNode(node);
+ *    or
+ *    => encoder->ConnectMediaStreamTrack(track);
+ *    These should not be mixed. When connecting MediaStreamTracks there is
+ *    support for at most one of each kind.
+ *
+ * 4) MediaEncoder automatically encodes data from the connected tracks, muxes
+ *    them and writes it all into a blob, including metadata. When the blob
+ *    contains at least `timeslice` worth of data it notifies the
+ *    DataAvailableEvent that was connected in step 2.
+ *    => void OnBlob(RefPtr<BlobImpl> aBlob) {
+ *    =>   DispatchBlobEvent(Blob::Create(GetOwnerGlobal(), aBlob));
+ *    => };
+ *
+ * 5) To stop encoding, there are multiple options:
+ *
+ *    5.1) Stop() for a graceful stop.
+ *         => encoder->Stop();
+ *
+ *    5.2) Cancel() for an immediate stop, if you don't need the data currently
+ *         buffered.
+ *         => encoder->Cancel();
+ *
+ *    5.3) When all input tracks end, the MediaEncoder will automatically stop
+ *         and shut down.
+ */
+class MediaEncoder {
+ private:
+  class AudioTrackListener;
+  class VideoTrackListener;
+  class EncoderListener;
+
+ public:
+  using BlobPromise =
+      MozPromise<RefPtr<dom::BlobImpl>, nsresult, false /* IsExclusive */>;
+  using SizeOfPromise = MozPromise<size_t, size_t, true /* IsExclusive */>;
+
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MediaEncoder)
+
+ private:
+  MediaEncoder(RefPtr<TaskQueue> aEncoderThread,
+               RefPtr<DriftCompensator> aDriftCompensator,
+               UniquePtr<ContainerWriter> aWriter,
+               UniquePtr<AudioTrackEncoder> aAudioEncoder,
+               UniquePtr<VideoTrackEncoder> aVideoEncoder,
+               UniquePtr<MediaQueue<EncodedFrame>> aEncodedAudioQueue,
+               UniquePtr<MediaQueue<EncodedFrame>> aEncodedVideoQueue,
+               TrackRate aTrackRate, const nsAString& aMIMEType,
+               uint64_t aMaxMemory, TimeDuration aTimeslice);
+
+ public:
+  /**
+   * Called on main thread from MediaRecorder::Pause.
+   */
+  void Suspend();
+
+  /**
+   * Called on main thread from MediaRecorder::Resume.
+   */
+  void Resume();
+
+  /**
+   * Disconnects the input tracks, causing the encoding to stop.
+   */
+  void DisconnectTracks();
+
+  /**
+   * Connects an AudioNode with the appropriate encoder.
+   */
+  void ConnectAudioNode(dom::AudioNode* aNode, uint32_t aOutput);
+
+  /**
+   * Connects a MediaStreamTrack with the appropriate encoder.
+   */
+  void ConnectMediaStreamTrack(dom::MediaStreamTrack* aTrack);
+
+  /**
+   * Removes a connected MediaStreamTrack.
+   */
+  void RemoveMediaStreamTrack(dom::MediaStreamTrack* aTrack);
+
+  /**
+   * Creates an encoder with the given MIME type. This must be a valid MIME type
+   * or we will crash hard.
+   * Bitrates are given either explicit, or with 0 for defaults.
+   * aTrackRate is the rate in which data will be fed to the TrackEncoders.
+   * aMaxMemory is the maximum number of bytes of muxed data allowed in memory.
+   * Beyond that the blob is moved to a temporary file.
+   * aTimeslice is the minimum duration of muxed data we gather before
+   * automatically issuing a dataavailable event.
+   */
+  static already_AddRefed<MediaEncoder> CreateEncoder(
+      RefPtr<TaskQueue> aEncoderThread, const nsAString& aMimeType,
+      uint32_t aAudioBitrate, uint32_t aVideoBitrate, uint8_t aTrackTypes,
+      TrackRate aTrackRate, uint64_t aMaxMemory, TimeDuration aTimeslice);
+
+  /**
+   * Encodes raw data for all tracks to aOutputBufs. The buffer of container
+   * data is allocated in ContainerWriter::GetContainerData().
+   *
+   * On its first call, metadata is also encoded. TrackEncoders must have been
+   * initialized before this is called.
+   */
+  nsresult GetEncodedData(nsTArray<nsTArray<uint8_t>>* aOutputBufs);
+
+  /**
+   * Asserts that Shutdown() has been called. Reasons are encoding
+   * complete, encounter an error, or being canceled by its caller.
+   */
+  void AssertShutdownCalled() { MOZ_ASSERT(mShutdownPromise); }
+
+  /**
+   * Stops (encoding any data currently buffered) the encoding and shuts down
+   * the encoder using Shutdown().
+   */
+  RefPtr<GenericNonExclusivePromise> Stop();
+
+  /**
+   * Cancels (discarding any data currently buffered) the encoding and shuts
+   * down the encoder using Shutdown().
+   */
+  RefPtr<GenericNonExclusivePromise> Cancel();
+
+  bool HasError();
+
+  static bool IsWebMEncoderEnabled();
+
+  /**
+   * Updates internal state when track encoders are all initialized.
+   */
+  void UpdateInitialized();
+
+  /**
+   * Updates internal state when track encoders are all initialized, and
+   * notifies listeners that this MediaEncoder has been started.
+   */
+  void UpdateStarted();
+
+  MOZ_DEFINE_MALLOC_SIZE_OF(MallocSizeOf)
+  /*
+   * Measure the size of the buffer, and heap memory in bytes occupied by
+   * mAudioEncoder and mVideoEncoder.
+   */
+  RefPtr<SizeOfPromise> SizeOfExcludingThis(
+      mozilla::MallocSizeOf aMallocSizeOf);
+
+  /**
+   * Encode, mux and store into blob storage what has been buffered until now,
+   * then return the blob backed by that storage.
+   */
+  RefPtr<BlobPromise> RequestData();
+
+  // Event that gets notified when all track encoders have received data.
+  MediaEventSource<void>& StartedEvent() { return mStartedEvent; }
+  // Event that gets notified when there was an error preventing continued
+  // recording somewhere in the MediaEncoder stack.
+  MediaEventSource<void>& ErrorEvent() { return mErrorEvent; }
+  // Event that gets notified when the MediaEncoder stack has been shut down.
+  MediaEventSource<void>& ShutdownEvent() { return mShutdownEvent; }
+  // Event that gets notified after we have muxed at least mTimeslice worth of
+  // data into the current blob storage.
+  MediaEventSource<RefPtr<dom::BlobImpl>>& DataAvailableEvent() {
+    return mDataAvailableEvent;
+  }
+
+ protected:
+  ~MediaEncoder();
+
+ private:
+  /**
+   * Registers listeners.
+   */
+  void RegisterListeners();
+
+  /**
+   * Sets mGraphTrack if not already set, using a new stream from aTrack's
+   * graph.
+   */
+  void EnsureGraphTrackFrom(MediaTrack* aTrack);
+
+  /**
+   * Shuts down gracefully if there is no remaining live track encoder.
+   */
+  void MaybeShutdown();
+
+  /**
+   * Waits for TrackEncoders to shut down, then shuts down the MediaEncoder and
+   * cleans up track encoders.
+   */
+  RefPtr<GenericNonExclusivePromise> Shutdown();
+
+  /**
+   * Sets mError to true, notifies listeners of the error if mError changed,
+   * and stops encoding.
+   */
+  void SetError();
+
+  /**
+   * Creates a new MutableBlobStorage if one doesn't exist.
+   */
+  void MaybeCreateMutableBlobStorage();
+
+  /**
+   * Called when an encoded audio frame has been pushed by the audio encoder.
+   */
+  void OnEncodedAudioPushed(const RefPtr<EncodedFrame>& aFrame);
+
+  /**
+   * Called when an encoded video frame has been pushed by the video encoder.
+   */
+  void OnEncodedVideoPushed(const RefPtr<EncodedFrame>& aFrame);
+
+  /**
+   * If enough data has been pushed to the muxer, extract it into the current
+   * blob storage. If more than mTimeslice data has been pushed to the muxer
+   * since the last DataAvailableEvent was notified, also gather the blob and
+   * notify MediaRecorder.
+   */
+  void MaybeExtractOrGatherBlob();
+
+  // Extracts encoded and muxed data into the current blob storage, creating one
+  // if it doesn't exist. The returned promise resolves when data has been
+  // stored into the blob.
+  RefPtr<GenericPromise> Extract();
+
+  // Stops gathering data into the current blob and resolves when the current
+  // blob is available. Future data will be stored in a new blob.
+  // Should a previous async GatherBlob() operation still be in progress, we'll
+  // wait for it to finish before starting this one.
+  RefPtr<BlobPromise> GatherBlob();
+
+  RefPtr<BlobPromise> GatherBlobImpl();
+
+  const RefPtr<nsISerialEventTarget> mMainThread;
+  const RefPtr<TaskQueue> mEncoderThread;
+  const RefPtr<DriftCompensator> mDriftCompensator;
+
+  const UniquePtr<MediaQueue<EncodedFrame>> mEncodedAudioQueue;
+  const UniquePtr<MediaQueue<EncodedFrame>> mEncodedVideoQueue;
+
+  const UniquePtr<Muxer> mMuxer;
+  const UniquePtr<AudioTrackEncoder> mAudioEncoder;
+  const RefPtr<AudioTrackListener> mAudioListener;
+  const UniquePtr<VideoTrackEncoder> mVideoEncoder;
+  const RefPtr<VideoTrackListener> mVideoListener;
+  const RefPtr<EncoderListener> mEncoderListener;
+
+ public:
+  const nsString mMimeType;
+
+  // Max memory to use for the MutableBlobStorage.
+  const uint64_t mMaxMemory;
+
+  // The interval of passing encoded data from MutableBlobStorage to
+  // onDataAvailable handler.
+  const TimeDuration mTimeslice;
+
+ private:
+  MediaEventListener mAudioPushListener;
+  MediaEventListener mAudioFinishListener;
+  MediaEventListener mVideoPushListener;
+  MediaEventListener mVideoFinishListener;
+
+  MediaEventProducer<void> mStartedEvent;
+  MediaEventProducer<void> mErrorEvent;
+  MediaEventProducer<void> mShutdownEvent;
+  MediaEventProducer<RefPtr<dom::BlobImpl>> mDataAvailableEvent;
+
+  // The AudioNode we are encoding.
+  // Will be null when input is media stream or destination node.
+  RefPtr<dom::AudioNode> mAudioNode;
+  // Pipe-track for allowing a track listener on a non-destination AudioNode.
+  // Will be null when input is media stream or destination node.
+  RefPtr<AudioNodeTrack> mPipeTrack;
+  // Input port that connect mAudioNode to mPipeTrack.
+  // Will be null when input is media stream or destination node.
+  RefPtr<MediaInputPort> mInputPort;
+  // An audio track that we are encoding. Will be null if the input stream
+  // doesn't contain audio on start() or if the input is an AudioNode.
+  RefPtr<dom::AudioStreamTrack> mAudioTrack;
+  // A video track that we are encoding. Will be null if the input stream
+  // doesn't contain video on start() or if the input is an AudioNode.
+  RefPtr<dom::VideoStreamTrack> mVideoTrack;
+
+  // A stream to keep the MediaTrackGraph alive while we're recording.
+  RefPtr<SharedDummyTrack> mGraphTrack;
+
+  // A buffer to cache muxed encoded data.
+  RefPtr<dom::MutableBlobStorage> mMutableBlobStorage;
+  // If set, is a promise for the latest GatherBlob() operation. Allows
+  // GatherBlob() operations to be serialized in order to avoid races.
+  RefPtr<BlobPromise> mBlobPromise;
+  // The end time of the muxed data in the last gathered blob. If more than one
+  // track is present, this is the end time of the track that ends the earliest
+  // in the last blob. Encoder thread only.
+  media::TimeUnit mLastBlobTime;
+  // The end time of the muxed data in the current blob storage. If more than
+  // one track is present, this is the end time of the track that ends the
+  // earliest in the current blob storage. Encoder thread only.
+  media::TimeUnit mLastExtractTime;
+  // The end time of encoded audio data sent to the muxer. Positive infinity if
+  // there is no audio encoder. Encoder thread only.
+  media::TimeUnit mMuxedAudioEndTime;
+  // The end time of encoded video data sent to the muxer. Positive infinity if
+  // there is no video encoder. Encoder thread only.
+  media::TimeUnit mMuxedVideoEndTime;
+
+  TimeStamp mStartTime;
+  bool mInitialized;
+  bool mStarted;
+  bool mCompleted;
+  bool mError;
+  // Set when shutdown starts.
+  RefPtr<GenericNonExclusivePromise> mShutdownPromise;
+  // Get duration from create encoder, for logging purpose
+  double GetEncodeTimeStamp() {
+    TimeDuration decodeTime;
+    decodeTime = TimeStamp::Now() - mStartTime;
+    return decodeTime.ToMilliseconds();
+  }
+};
+
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/encoder/Muxer.cpp b/dom/media/encoder/Muxer.cpp
new file mode 100644
index 0000000000..8225062ee5
--- /dev/null
+++ b/dom/media/encoder/Muxer.cpp
@@ -0,0 +1,185 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "Muxer.h"
+
+#include "ContainerWriter.h"
+
+namespace mozilla {
+
+LazyLogModule gMuxerLog("Muxer");
+#define LOG(type, ...) MOZ_LOG(gMuxerLog, type, (__VA_ARGS__))
+
+Muxer::Muxer(UniquePtr<ContainerWriter> aWriter,
+             MediaQueue<EncodedFrame>& aEncodedAudioQueue,
+             MediaQueue<EncodedFrame>& aEncodedVideoQueue)
+    : mEncodedAudioQueue(aEncodedAudioQueue),
+      mEncodedVideoQueue(aEncodedVideoQueue),
+      mWriter(std::move(aWriter)) {}
+
+void Muxer::Disconnect() {
+  mAudioPushListener.DisconnectIfExists();
+  mAudioFinishListener.DisconnectIfExists();
+  mVideoPushListener.DisconnectIfExists();
+  mVideoFinishListener.DisconnectIfExists();
+}
+
+bool Muxer::IsFinished() { return mWriter->IsWritingComplete(); }
+
+nsresult Muxer::SetMetadata(
+    const nsTArray<RefPtr<TrackMetadataBase>>& aMetadata) {
+  MOZ_DIAGNOSTIC_ASSERT(!mMetadataSet);
+  MOZ_DIAGNOSTIC_ASSERT(!mHasAudio);
+  MOZ_DIAGNOSTIC_ASSERT(!mHasVideo);
+  nsresult rv = mWriter->SetMetadata(aMetadata);
+  if (NS_FAILED(rv)) {
+    LOG(LogLevel::Error, "%p Setting metadata failed, tracks=%zu", this,
+        aMetadata.Length());
+    return rv;
+  }
+
+  for (const auto& track : aMetadata) {
+    switch (track->GetKind()) {
+      case TrackMetadataBase::METADATA_OPUS:
+      case TrackMetadataBase::METADATA_VORBIS:
+      case TrackMetadataBase::METADATA_AAC:
+      case TrackMetadataBase::METADATA_AMR:
+      case TrackMetadataBase::METADATA_EVRC:
+        MOZ_ASSERT(!mHasAudio, "Only one audio track supported");
+        mHasAudio = true;
+        break;
+      case TrackMetadataBase::METADATA_VP8:
+        MOZ_ASSERT(!mHasVideo, "Only one video track supported");
+        mHasVideo = true;
+        break;
+      default:
+        MOZ_CRASH("Unknown codec metadata");
+    };
+  }
+  mMetadataSet = true;
+  MOZ_ASSERT(mHasAudio || mHasVideo);
+  LOG(LogLevel::Info, "%p Metadata set; audio=%d, video=%d", this, mHasAudio,
+      mHasVideo);
+  return NS_OK;
+}
+
+nsresult Muxer::GetData(nsTArray<nsTArray<uint8_t>>* aOutputBuffers) {
+  MOZ_ASSERT(mHasAudio || mHasVideo);
+
+  nsresult rv;
+  if (!mMetadataEncoded) {
+    rv = mWriter->GetContainerData(aOutputBuffers, ContainerWriter::GET_HEADER);
+    if (NS_FAILED(rv)) {
+      LOG(LogLevel::Error, "%p Failed getting metadata from writer", this);
+      return rv;
+    }
+    mMetadataEncoded = true;
+  }
+
+  if (mEncodedAudioQueue.GetSize() == 0 && !mEncodedAudioQueue.IsFinished() &&
+      mEncodedVideoQueue.GetSize() == 0 && !mEncodedVideoQueue.IsFinished()) {
+    // Nothing to mux.
+    return NS_OK;
+  }
+
+  rv = Mux();
+  if (NS_FAILED(rv)) {
+    LOG(LogLevel::Error, "%p Failed muxing data into writer", this);
+    return rv;
+  }
+
+  MOZ_ASSERT_IF(
+      mEncodedAudioQueue.IsFinished() && mEncodedVideoQueue.IsFinished(),
+      mEncodedAudioQueue.AtEndOfStream());
+  MOZ_ASSERT_IF(
+      mEncodedAudioQueue.IsFinished() && mEncodedVideoQueue.IsFinished(),
+      mEncodedVideoQueue.AtEndOfStream());
+  uint32_t flags =
+      mEncodedAudioQueue.AtEndOfStream() && mEncodedVideoQueue.AtEndOfStream()
+          ? ContainerWriter::FLUSH_NEEDED
+          : 0;
+
+  if (mEncodedAudioQueue.AtEndOfStream() &&
+      mEncodedVideoQueue.AtEndOfStream()) {
+    LOG(LogLevel::Info, "%p All data written", this);
+  }
+
+  return mWriter->GetContainerData(aOutputBuffers, flags);
+}
+
+nsresult Muxer::Mux() {
+  MOZ_ASSERT(mMetadataSet);
+  MOZ_ASSERT(mHasAudio || mHasVideo);
+
+  nsTArray<RefPtr<EncodedFrame>> frames;
+  // The times at which we expect our next video and audio frames. These are
+  // based on the time + duration (GetEndTime()) of the last seen frames.
+  // Assumes that the encoders write the correct duration for frames.;
+  media::TimeUnit expectedNextVideoTime;
+  media::TimeUnit expectedNextAudioTime;
+  // Interleave frames until we're out of audio or video
+  while (mEncodedVideoQueue.GetSize() > 0 && mEncodedAudioQueue.GetSize() > 0) {
+    RefPtr<EncodedFrame> videoFrame = mEncodedVideoQueue.PeekFront();
+    RefPtr<EncodedFrame> audioFrame = mEncodedAudioQueue.PeekFront();
+    // For any expected time our frames should occur at or after that time.
+    MOZ_ASSERT(videoFrame->mTime >= expectedNextVideoTime);
+    MOZ_ASSERT(audioFrame->mTime >= expectedNextAudioTime);
+    if (videoFrame->mTime <= audioFrame->mTime) {
+      expectedNextVideoTime = videoFrame->GetEndTime();
+      RefPtr<EncodedFrame> frame = mEncodedVideoQueue.PopFront();
+      frames.AppendElement(std::move(frame));
+    } else {
+      expectedNextAudioTime = audioFrame->GetEndTime();
+      RefPtr<EncodedFrame> frame = mEncodedAudioQueue.PopFront();
+      frames.AppendElement(std::move(frame));
+    }
+  }
+
+  // If we're out of audio we still may be able to add more video...
+  if (mEncodedAudioQueue.GetSize() == 0) {
+    while (mEncodedVideoQueue.GetSize() > 0) {
+      if (!mEncodedAudioQueue.AtEndOfStream() &&
+          mEncodedVideoQueue.PeekFront()->mTime > expectedNextAudioTime) {
+        // Audio encoding is not complete and since the video frame comes
+        // after our next audio frame we cannot safely add it.
+        break;
+      }
+      frames.AppendElement(mEncodedVideoQueue.PopFront());
+    }
+  }
+
+  // If we're out of video we still may be able to add more audio...
+  if (mEncodedVideoQueue.GetSize() == 0) {
+    while (mEncodedAudioQueue.GetSize() > 0) {
+      if (!mEncodedVideoQueue.AtEndOfStream() &&
+          mEncodedAudioQueue.PeekFront()->mTime > expectedNextVideoTime) {
+        // Video encoding is not complete and since the audio frame comes
+        // after our next video frame we cannot safely add it.
+        break;
+      }
+      frames.AppendElement(mEncodedAudioQueue.PopFront());
+    }
+  }
+
+  LOG(LogLevel::Debug,
+      "%p Muxed data, remaining-audio=%zu, remaining-video=%zu", this,
+      mEncodedAudioQueue.GetSize(), mEncodedVideoQueue.GetSize());
+
+  // If encoding is complete for both encoders we should signal end of stream,
+  // otherwise we keep going.
+  uint32_t flags =
+      mEncodedVideoQueue.AtEndOfStream() && mEncodedAudioQueue.AtEndOfStream()
+          ? ContainerWriter::END_OF_STREAM
+          : 0;
+  nsresult rv = mWriter->WriteEncodedTrack(frames, flags);
+  if (NS_FAILED(rv)) {
+    LOG(LogLevel::Error, "Error! Failed to write muxed data to the container");
+  }
+  return rv;
+}
+
+}  // namespace mozilla
+
+#undef LOG
diff --git a/dom/media/encoder/Muxer.h b/dom/media/encoder/Muxer.h
new file mode 100644
index 0000000000..983e260230
--- /dev/null
+++ b/dom/media/encoder/Muxer.h
@@ -0,0 +1,71 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef DOM_MEDIA_ENCODER_MUXER_H_
+#define DOM_MEDIA_ENCODER_MUXER_H_
+
+#include "MediaQueue.h"
+#include "mozilla/media/MediaUtils.h"
+
+namespace mozilla {
+
+class ContainerWriter;
+class EncodedFrame;
+class TrackMetadataBase;
+
+// Generic Muxer class that helps pace the output from track encoders to the
+// ContainerWriter, so time never appears to go backwards.
+// Note that the entire class is written for single threaded access.
+class Muxer {
+ public:
+  Muxer(UniquePtr<ContainerWriter> aWriter,
+        MediaQueue<EncodedFrame>& aEncodedAudioQueue,
+        MediaQueue<EncodedFrame>& aEncodedVideoQueue);
+  ~Muxer() = default;
+
+  // Disconnects MediaQueues such that they will no longer be consumed.
+  // Idempotent.
+  void Disconnect();
+
+  // Returns true when all tracks have ended, and all data has been muxed and
+  // fetched.
+  bool IsFinished();
+
+  // Returns true if this muxer has not been given metadata yet.
+  bool NeedsMetadata() const { return !mMetadataSet; }
+
+  // Sets metadata for all tracks. This may only be called once.
+  nsresult SetMetadata(const nsTArray<RefPtr<TrackMetadataBase>>& aMetadata);
+
+  // Gets the data that has been muxed and written into the container so far.
+  nsresult GetData(nsTArray<nsTArray<uint8_t>>* aOutputBuffers);
+
+ private:
+  // Writes data in MediaQueues to the ContainerWriter.
+  nsresult Mux();
+
+  // Audio frames that have been encoded and are pending write to the muxer.
+  MediaQueue<EncodedFrame>& mEncodedAudioQueue;
+  // Video frames that have been encoded and are pending write to the muxer.
+  MediaQueue<EncodedFrame>& mEncodedVideoQueue;
+  // Listeners driving the muxing as encoded data gets produced.
+  MediaEventListener mAudioPushListener;
+  MediaEventListener mAudioFinishListener;
+  MediaEventListener mVideoPushListener;
+  MediaEventListener mVideoFinishListener;
+  // The writer for the specific container we're recording into.
+  UniquePtr<ContainerWriter> mWriter;
+  // True once metadata has been set in the muxer.
+  bool mMetadataSet = false;
+  // True once metadata has been written to file.
+  bool mMetadataEncoded = false;
+  // True if metadata is set and contains an audio track.
+  bool mHasAudio = false;
+  // True if metadata is set and contains a video track.
+  bool mHasVideo = false;
+};
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/encoder/OpusTrackEncoder.cpp b/dom/media/encoder/OpusTrackEncoder.cpp
new file mode 100644
index 0000000000..1238ef8ea0
--- /dev/null
+++ b/dom/media/encoder/OpusTrackEncoder.cpp
@@ -0,0 +1,441 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "OpusTrackEncoder.h"
+#include "nsString.h"
+#include "mozilla/CheckedInt.h"
+#include "mozilla/ProfilerLabels.h"
+#include "VideoUtils.h"
+
+#include <opus/opus.h>
+
+#define LOG(args, ...)
+
+namespace mozilla {
+
+// The Opus format supports up to 8 channels, and supports multitrack audio up
+// to 255 channels, but the current implementation supports only mono and
+// stereo, and downmixes any more than that.
+constexpr int MAX_SUPPORTED_AUDIO_CHANNELS = 8;
+
+// http://www.opus-codec.org/docs/html_api-1.0.2/group__opus__encoder.html
+// In section "opus_encoder_init", channels must be 1 or 2 of input signal.
+constexpr int MAX_CHANNELS = 2;
+
+// A maximum data bytes for Opus to encode.
+constexpr int MAX_DATA_BYTES = 4096;
+
+// http://tools.ietf.org/html/draft-ietf-codec-oggopus-00#section-4
+// Second paragraph, " The granule position of an audio data page is in units
+// of PCM audio samples at a fixed rate of 48 kHz."
+constexpr int kOpusSamplingRate = 48000;
+
+// The duration of an Opus frame, and it must be 2.5, 5, 10, 20, 40 or 60 ms.
+constexpr int kFrameDurationMs = 20;
+
+// The supported sampling rate of input signal (Hz),
+// must be one of the following. Will resampled to 48kHz otherwise.
+constexpr int kOpusSupportedInputSamplingRates[] = {8000, 12000, 16000, 24000,
+                                                    48000};
+
+namespace {
+
+// An endian-neutral serialization of integers. Serializing T in little endian
+// format to aOutput, where T is a 16 bits or 32 bits integer.
+template <typename T>
+static void SerializeToBuffer(T aValue, nsTArray<uint8_t>* aOutput) {
+  for (uint32_t i = 0; i < sizeof(T); i++) {
+    aOutput->AppendElement((uint8_t)(0x000000ff & (aValue >> (i * 8))));
+  }
+}
+
+static inline void SerializeToBuffer(const nsCString& aComment,
+                                     nsTArray<uint8_t>* aOutput) {
+  // Format of serializing a string to buffer is, the length of string (32 bits,
+  // little endian), and the string.
+  SerializeToBuffer((uint32_t)(aComment.Length()), aOutput);
+  aOutput->AppendElements(aComment.get(), aComment.Length());
+}
+
+static void SerializeOpusIdHeader(uint8_t aChannelCount, uint16_t aPreskip,
+                                  uint32_t aInputSampleRate,
+                                  nsTArray<uint8_t>* aOutput) {
+  // The magic signature, null terminator has to be stripped off from strings.
+  constexpr uint8_t magic[] = "OpusHead";
+  aOutput->AppendElements(magic, sizeof(magic) - 1);
+
+  // The version must always be 1 (8 bits, unsigned).
+  aOutput->AppendElement(1);
+
+  // Number of output channels (8 bits, unsigned).
+  aOutput->AppendElement(aChannelCount);
+
+  // Number of samples (at 48 kHz) to discard from the decoder output when
+  // starting playback (16 bits, unsigned, little endian).
+  SerializeToBuffer(aPreskip, aOutput);
+
+  // The sampling rate of input source (32 bits, unsigned, little endian).
+  SerializeToBuffer(aInputSampleRate, aOutput);
+
+  // Output gain, an encoder should set this field to zero (16 bits, signed,
+  // little endian).
+  SerializeToBuffer((int16_t)0, aOutput);
+
+  // Channel mapping family. Family 0 allows only 1 or 2 channels (8 bits,
+  // unsigned).
+  aOutput->AppendElement(0);
+}
+
+static void SerializeOpusCommentHeader(const nsCString& aVendor,
+                                       const nsTArray<nsCString>& aComments,
+                                       nsTArray<uint8_t>* aOutput) {
+  // The magic signature, null terminator has to be stripped off.
+  constexpr uint8_t magic[] = "OpusTags";
+  aOutput->AppendElements(magic, sizeof(magic) - 1);
+
+  // The vendor; Should append in the following order:
+  // vendor string length (32 bits, unsigned, little endian)
+  // vendor string.
+  SerializeToBuffer(aVendor, aOutput);
+
+  // Add comments; Should append in the following order:
+  // comment list length (32 bits, unsigned, little endian)
+  // comment #0 string length (32 bits, unsigned, little endian)
+  // comment #0 string
+  // comment #1 string length (32 bits, unsigned, little endian)
+  // comment #1 string ...
+  SerializeToBuffer((uint32_t)aComments.Length(), aOutput);
+  for (uint32_t i = 0; i < aComments.Length(); ++i) {
+    SerializeToBuffer(aComments[i], aOutput);
+  }
+}
+
+bool IsSampleRateSupported(TrackRate aSampleRate) {
+  // According to www.opus-codec.org, creating an opus encoder requires the
+  // sampling rate of source signal be one of 8000, 12000, 16000, 24000, or
+  // 48000. If this constraint is not satisfied, we resample the input to 48kHz.
+  AutoTArray<int, 5> supportedSamplingRates;
+  supportedSamplingRates.AppendElements(
+      kOpusSupportedInputSamplingRates,
+      ArrayLength(kOpusSupportedInputSamplingRates));
+  return supportedSamplingRates.Contains(aSampleRate);
+}
+
+}  // Anonymous namespace.
+
+OpusTrackEncoder::OpusTrackEncoder(TrackRate aTrackRate,
+                                   MediaQueue<EncodedFrame>& aEncodedDataQueue)
+    : AudioTrackEncoder(aTrackRate, aEncodedDataQueue),
+      mOutputSampleRate(IsSampleRateSupported(aTrackRate) ? aTrackRate
+                                                          : kOpusSamplingRate),
+      mEncoder(nullptr),
+      mLookahead(0),
+      mLookaheadWritten(0),
+      mResampler(nullptr),
+      mNumOutputFrames(0) {}
+
+OpusTrackEncoder::~OpusTrackEncoder() {
+  if (mEncoder) {
+    opus_encoder_destroy(mEncoder);
+  }
+  if (mResampler) {
+    speex_resampler_destroy(mResampler);
+    mResampler = nullptr;
+  }
+}
+
+nsresult OpusTrackEncoder::Init(int aChannels) {
+  NS_ENSURE_TRUE((aChannels <= MAX_SUPPORTED_AUDIO_CHANNELS) && (aChannels > 0),
+                 NS_ERROR_FAILURE);
+
+  // This version of encoder API only support 1 or 2 channels,
+  // So set the mChannels less or equal 2 and
+  // let InterleaveTrackData downmix pcm data.
+  mChannels = aChannels > MAX_CHANNELS ? MAX_CHANNELS : aChannels;
+
+  // Reject non-audio sample rates.
+  NS_ENSURE_TRUE(mTrackRate >= 8000, NS_ERROR_INVALID_ARG);
+  NS_ENSURE_TRUE(mTrackRate <= 192000, NS_ERROR_INVALID_ARG);
+
+  if (NeedsResampler()) {
+    int error;
+    mResampler = speex_resampler_init(mChannels, mTrackRate, kOpusSamplingRate,
+                                      SPEEX_RESAMPLER_QUALITY_DEFAULT, &error);
+
+    if (error != RESAMPLER_ERR_SUCCESS) {
+      return NS_ERROR_FAILURE;
+    }
+  }
+
+  int error = 0;
+  mEncoder = opus_encoder_create(mOutputSampleRate, mChannels,
+                                 OPUS_APPLICATION_AUDIO, &error);
+
+  if (error != OPUS_OK) {
+    return NS_ERROR_FAILURE;
+  }
+
+  if (mAudioBitrate) {
+    int bps = static_cast<int>(
+        std::min<uint32_t>(mAudioBitrate, std::numeric_limits<int>::max()));
+    error = opus_encoder_ctl(mEncoder, OPUS_SET_BITRATE(bps));
+    if (error != OPUS_OK) {
+      return NS_ERROR_FAILURE;
+    }
+  }
+
+  // In the case of Opus we need to calculate the codec delay based on the
+  // pre-skip. For more information see:
+  // https://tools.ietf.org/html/rfc7845#section-4.2
+  error = opus_encoder_ctl(mEncoder, OPUS_GET_LOOKAHEAD(&mLookahead));
+  if (error != OPUS_OK) {
+    mLookahead = 0;
+    return NS_ERROR_FAILURE;
+  }
+
+  SetInitialized();
+
+  return NS_OK;
+}
+
+int OpusTrackEncoder::GetLookahead() const {
+  return mLookahead * kOpusSamplingRate / mOutputSampleRate;
+}
+
+int OpusTrackEncoder::NumInputFramesPerPacket() const {
+  return mTrackRate * kFrameDurationMs / 1000;
+}
+
+int OpusTrackEncoder::NumOutputFramesPerPacket() const {
+  return mOutputSampleRate * kFrameDurationMs / 1000;
+}
+
+bool OpusTrackEncoder::NeedsResampler() const {
+  // A resampler is needed when mTrackRate is not supported by the opus encoder.
+  // This is equivalent to !IsSampleRateSupported(mTrackRate) but less cycles.
+  return mTrackRate != mOutputSampleRate &&
+         mOutputSampleRate == kOpusSamplingRate;
+}
+
+already_AddRefed<TrackMetadataBase> OpusTrackEncoder::GetMetadata() {
+  AUTO_PROFILER_LABEL("OpusTrackEncoder::GetMetadata", OTHER);
+
+  MOZ_ASSERT(mInitialized);
+
+  if (!mInitialized) {
+    return nullptr;
+  }
+
+  RefPtr<OpusMetadata> meta = new OpusMetadata();
+  meta->mChannels = mChannels;
+  meta->mSamplingFrequency = mTrackRate;
+
+  // Ogg and Webm timestamps are always sampled at 48k for Opus.
+  SerializeOpusIdHeader(mChannels,
+                        mLookahead * (kOpusSamplingRate / mOutputSampleRate),
+                        mTrackRate, &meta->mIdHeader);
+
+  nsCString vendor;
+  vendor.AppendASCII(opus_get_version_string());
+
+  nsTArray<nsCString> comments;
+  comments.AppendElement(
+      nsLiteralCString("ENCODER=Mozilla" MOZ_APP_UA_VERSION));
+
+  SerializeOpusCommentHeader(vendor, comments, &meta->mCommentHeader);
+
+  return meta.forget();
+}
+
+nsresult OpusTrackEncoder::Encode(AudioSegment* aSegment) {
+  AUTO_PROFILER_LABEL("OpusTrackEncoder::Encode", OTHER);
+
+  MOZ_ASSERT(aSegment);
+  MOZ_ASSERT(mInitialized || mCanceled);
+
+  if (mCanceled || IsEncodingComplete()) {
+    return NS_ERROR_FAILURE;
+  }
+
+  if (!mInitialized) {
+    // calculation below depends on the truth that mInitialized is true.
+    return NS_ERROR_FAILURE;
+  }
+
+  int result = 0;
+  // Loop until we run out of packets of input data
+  while (result >= 0 && !IsEncodingComplete()) {
+    // re-sampled frames left last time which didn't fit into an Opus packet
+    // duration.
+    const int framesLeft = mResampledLeftover.Length() / mChannels;
+    MOZ_ASSERT(NumOutputFramesPerPacket() >= framesLeft);
+    // Fetch input frames such that there will be n frames where (n +
+    // framesLeft) >= NumOutputFramesPerPacket() after re-sampling.
+    const int framesToFetch = NumInputFramesPerPacket() -
+                              (framesLeft * mTrackRate / kOpusSamplingRate) +
+                              (NeedsResampler() ? 1 : 0);
+
+    if (!mEndOfStream && aSegment->GetDuration() < framesToFetch) {
+      // Not enough raw data
+      return NS_OK;
+    }
+
+    // Start encoding data.
+    AutoTArray<AudioDataValue, 9600> pcm;
+    pcm.SetLength(NumOutputFramesPerPacket() * mChannels);
+
+    int frameCopied = 0;
+
+    for (AudioSegment::ChunkIterator iter(*aSegment);
+         !iter.IsEnded() && frameCopied < framesToFetch; iter.Next()) {
+      AudioChunk chunk = *iter;
+
+      // Chunk to the required frame size.
+      TrackTime frameToCopy =
+          std::min(chunk.GetDuration(),
+                   static_cast<TrackTime>(framesToFetch - frameCopied));
+
+      // Possible greatest value of framesToFetch = 3844: see
+      // https://bugzilla.mozilla.org/show_bug.cgi?id=1349421#c8. frameToCopy
+      // should not be able to exceed this value.
+      MOZ_ASSERT(frameToCopy <= 3844, "frameToCopy exceeded expected range");
+
+      if (!chunk.IsNull()) {
+        // Append the interleaved data to the end of pcm buffer.
+        AudioTrackEncoder::InterleaveTrackData(
+            chunk, frameToCopy, mChannels,
+            pcm.Elements() + frameCopied * mChannels);
+      } else {
+        CheckedInt<int> memsetLength =
+            CheckedInt<int>(frameToCopy) * mChannels * sizeof(AudioDataValue);
+        if (!memsetLength.isValid()) {
+          // This should never happen, but we use a defensive check because
+          // we really don't want a bad memset
+          MOZ_ASSERT_UNREACHABLE("memsetLength invalid!");
+          return NS_ERROR_FAILURE;
+        }
+        memset(pcm.Elements() + frameCopied * mChannels, 0,
+               memsetLength.value());
+      }
+
+      frameCopied += frameToCopy;
+    }
+
+    // Possible greatest value of framesToFetch = 3844: see
+    // https://bugzilla.mozilla.org/show_bug.cgi?id=1349421#c8. frameCopied
+    // should not be able to exceed this value.
+    MOZ_ASSERT(frameCopied <= 3844, "frameCopied exceeded expected range");
+
+    int framesInPCM = frameCopied;
+    if (mResampler) {
+      AutoTArray<AudioDataValue, 9600> resamplingDest;
+      uint32_t inframes = frameCopied;
+      uint32_t outframes = inframes * kOpusSamplingRate / mTrackRate + 1;
+
+      // We want to consume all the input data, so we slightly oversize the
+      // resampled data buffer so we can fit the output data in. We cannot
+      // really predict the output frame count at each call.
+      resamplingDest.SetLength(outframes * mChannels);
+
+      float* in = reinterpret_cast<float*>(pcm.Elements());
+      float* out = reinterpret_cast<float*>(resamplingDest.Elements());
+      speex_resampler_process_interleaved_float(mResampler, in, &inframes, out,
+                                                &outframes);
+
+      MOZ_ASSERT(pcm.Length() >= mResampledLeftover.Length());
+      PodCopy(pcm.Elements(), mResampledLeftover.Elements(),
+              mResampledLeftover.Length());
+
+      uint32_t outframesToCopy = std::min(
+          outframes,
+          static_cast<uint32_t>(NumOutputFramesPerPacket() - framesLeft));
+
+      MOZ_ASSERT(pcm.Length() - mResampledLeftover.Length() >=
+                 outframesToCopy * mChannels);
+      PodCopy(pcm.Elements() + mResampledLeftover.Length(),
+              resamplingDest.Elements(), outframesToCopy * mChannels);
+      int frameLeftover = outframes - outframesToCopy;
+      mResampledLeftover.SetLength(frameLeftover * mChannels);
+      PodCopy(mResampledLeftover.Elements(),
+              resamplingDest.Elements() + outframesToCopy * mChannels,
+              mResampledLeftover.Length());
+      // This is always at 48000Hz.
+      framesInPCM = framesLeft + outframesToCopy;
+    }
+
+    // Remove the raw data which has been pulled to pcm buffer.
+    // The value of frameCopied should be equal to (or smaller than, if eos)
+    // NumOutputFramesPerPacket().
+    aSegment->RemoveLeading(frameCopied);
+
+    // Has reached the end of input stream and all queued data has pulled for
+    // encoding.
+    bool isFinalPacket = false;
+    if (aSegment->GetDuration() == 0 && mEndOfStream &&
+        framesInPCM < NumOutputFramesPerPacket()) {
+      // Pad |mLookahead| samples to the end of the track to prevent loss of
+      // original data.
+      const int toWrite = std::min(mLookahead - mLookaheadWritten,
+                                   NumOutputFramesPerPacket() - framesInPCM);
+      PodZero(pcm.Elements() + framesInPCM * mChannels, toWrite * mChannels);
+      mLookaheadWritten += toWrite;
+      framesInPCM += toWrite;
+      if (mLookaheadWritten == mLookahead) {
+        isFinalPacket = true;
+      }
+    }
+
+    MOZ_ASSERT_IF(!isFinalPacket, framesInPCM == NumOutputFramesPerPacket());
+
+    // Append null data to pcm buffer if the leftover data is not enough for
+    // opus encoder.
+    if (framesInPCM < NumOutputFramesPerPacket() && isFinalPacket) {
+      PodZero(pcm.Elements() + framesInPCM * mChannels,
+              (NumOutputFramesPerPacket() - framesInPCM) * mChannels);
+    }
+    auto frameData = MakeRefPtr<EncodedFrame::FrameData>();
+    // Encode the data with Opus Encoder.
+    frameData->SetLength(MAX_DATA_BYTES);
+    // result is returned as opus error code if it is negative.
+    result = 0;
+    const float* pcmBuf = static_cast<float*>(pcm.Elements());
+    result = opus_encode_float(mEncoder, pcmBuf, NumOutputFramesPerPacket(),
+                               frameData->Elements(), MAX_DATA_BYTES);
+    frameData->SetLength(result >= 0 ? result : 0);
+
+    if (result < 0) {
+      LOG("[Opus] Fail to encode data! Result: %s.", opus_strerror(result));
+    }
+    if (isFinalPacket) {
+      if (mResampler) {
+        speex_resampler_destroy(mResampler);
+        mResampler = nullptr;
+      }
+      mResampledLeftover.SetLength(0);
+    }
+
+    // timestamp should be the time of the first sample
+    mEncodedDataQueue.Push(MakeAndAddRef<EncodedFrame>(
+        media::TimeUnit(mNumOutputFrames + mLookahead, mOutputSampleRate),
+        static_cast<uint64_t>(framesInPCM) * kOpusSamplingRate /
+            mOutputSampleRate,
+        kOpusSamplingRate, EncodedFrame::OPUS_AUDIO_FRAME,
+        std::move(frameData)));
+
+    mNumOutputFrames += NumOutputFramesPerPacket();
+    LOG("[Opus] mOutputTimeStamp %.3f.",
+        media::TimeUnit(mNumOutputFrames, mOutputSampleRate).ToSeconds());
+
+    if (isFinalPacket) {
+      LOG("[Opus] Done encoding.");
+      mEncodedDataQueue.Finish();
+    }
+  }
+
+  return result >= 0 ? NS_OK : NS_ERROR_FAILURE;
+}
+
+}  // namespace mozilla
+
+#undef LOG
diff --git a/dom/media/encoder/OpusTrackEncoder.h b/dom/media/encoder/OpusTrackEncoder.h
new file mode 100644
index 0000000000..5206944169
--- /dev/null
+++ b/dom/media/encoder/OpusTrackEncoder.h
@@ -0,0 +1,117 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef OpusTrackEncoder_h_
+#define OpusTrackEncoder_h_
+
+#include <stdint.h>
+#include <speex/speex_resampler.h>
+#include "TimeUnits.h"
+#include "TrackEncoder.h"
+
+struct OpusEncoder;
+
+namespace mozilla {
+
+// Opus meta data structure
+class OpusMetadata : public TrackMetadataBase {
+ public:
+  // The ID Header of OggOpus. refer to http://wiki.xiph.org/OggOpus.
+  nsTArray<uint8_t> mIdHeader;
+  // The Comment Header of OggOpus.
+  nsTArray<uint8_t> mCommentHeader;
+  int32_t mChannels;
+  float mSamplingFrequency;
+  MetadataKind GetKind() const override { return METADATA_OPUS; }
+};
+
+class OpusTrackEncoder : public AudioTrackEncoder {
+ public:
+  OpusTrackEncoder(TrackRate aTrackRate,
+                   MediaQueue<EncodedFrame>& aEncodedDataQueue);
+  virtual ~OpusTrackEncoder();
+
+  already_AddRefed<TrackMetadataBase> GetMetadata() override;
+
+  /**
+   * The encoder lookahead at 48k rate.
+   */
+  int GetLookahead() const;
+
+ protected:
+  /**
+   * The number of frames, in the input rate mTrackRate, needed to fill an
+   * encoded opus packet. A frame is a sample per channel.
+   */
+  int NumInputFramesPerPacket() const override;
+
+  nsresult Init(int aChannels) override;
+
+  /**
+   * Encodes buffered data and pushes it to mEncodedDataQueue.
+   */
+  nsresult Encode(AudioSegment* aSegment) override;
+
+  /**
+   * The number of frames, in the output rate (see GetOutputSampleRate), needed
+   * to fill an encoded opus packet. A frame is a sample per channel.
+   */
+  int NumOutputFramesPerPacket() const;
+
+  /**
+   * True if the input needs to be resampled to be fed to the underlying opus
+   * encoder.
+   */
+  bool NeedsResampler() const;
+
+ public:
+  /**
+   * Get the samplerate of the data to be fed to the Opus encoder. This might be
+   * different from the input samplerate if resampling occurs.
+   */
+  const TrackRate mOutputSampleRate;
+
+ private:
+  /**
+   * The Opus encoder from libopus.
+   */
+  OpusEncoder* mEncoder;
+
+  /**
+   * Total samples of delay added by codec (in rate mOutputSampleRate), can
+   * be queried by the encoder. From the perspective of decoding, real data
+   * begins this many samples late, so the encoder needs to append this many
+   * null samples to the end of stream, in order to align the time of input and
+   * output.
+   */
+  int mLookahead;
+
+  /**
+   * Number of mLookahead samples that has been written. When non-zero and equal
+   * to mLookahead, encoding is complete.
+   */
+  int mLookaheadWritten;
+
+  /**
+   * If the input sample rate does not divide 48kHz evenly, the input data are
+   * resampled.
+   */
+  SpeexResamplerState* mResampler;
+
+  /**
+   * Store the resampled frames that don't fit into an Opus packet duration.
+   * They will be prepended to the resampled frames next encoding cycle.
+   */
+  nsTArray<AudioDataValue> mResampledLeftover;
+
+  /**
+   * Number of audio frames encoded, in kOpusSamplingRate.
+   */
+  uint64_t mNumOutputFrames;
+};
+
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/encoder/TrackEncoder.cpp b/dom/media/encoder/TrackEncoder.cpp
new file mode 100644
index 0000000000..8e03fd6fe3
--- /dev/null
+++ b/dom/media/encoder/TrackEncoder.cpp
@@ -0,0 +1,822 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "TrackEncoder.h"
+
+#include "AudioChannelFormat.h"
+#include "DriftCompensation.h"
+#include "MediaTrackGraph.h"
+#include "MediaTrackListener.h"
+#include "mozilla/AbstractThread.h"
+#include "mozilla/Logging.h"
+#include "mozilla/ProfilerLabels.h"
+#include "mozilla/RollingMean.h"
+#include "VideoUtils.h"
+#include "mozilla/Telemetry.h"
+
+namespace mozilla {
+
+LazyLogModule gTrackEncoderLog("TrackEncoder");
+#define TRACK_LOG(type, msg) MOZ_LOG(gTrackEncoderLog, type, msg)
+
+constexpr int DEFAULT_CHANNELS = 1;
+constexpr int DEFAULT_FRAME_WIDTH = 640;
+constexpr int DEFAULT_FRAME_HEIGHT = 480;
+constexpr int DEFAULT_FRAME_RATE = 30;
+// 10 second threshold if the audio encoder cannot be initialized.
+constexpr int AUDIO_INIT_FAILED_DURATION = 10;
+// 30 second threshold if the video encoder cannot be initialized.
+constexpr int VIDEO_INIT_FAILED_DURATION = 30;
+constexpr int FRAMERATE_DETECTION_ROLLING_WINDOW = 3;
+constexpr size_t FRAMERATE_DETECTION_MIN_CHUNKS = 5;
+constexpr int FRAMERATE_DETECTION_MAX_DURATION_S = 6;
+
+TrackEncoder::TrackEncoder(TrackRate aTrackRate,
+                           MediaQueue<EncodedFrame>& aEncodedDataQueue)
+    : mInitialized(false),
+      mStarted(false),
+      mEndOfStream(false),
+      mCanceled(false),
+      mInitCounter(0),
+      mSuspended(false),
+      mTrackRate(aTrackRate),
+      mEncodedDataQueue(aEncodedDataQueue) {}
+
+bool TrackEncoder::IsInitialized() {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+  return mInitialized;
+}
+
+bool TrackEncoder::IsStarted() {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+  return mStarted;
+}
+
+bool TrackEncoder::IsEncodingComplete() const {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+  return mEncodedDataQueue.IsFinished();
+}
+
+void TrackEncoder::SetInitialized() {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+
+  if (mInitialized) {
+    return;
+  }
+
+  mInitialized = true;
+
+  for (auto& l : mListeners.Clone()) {
+    l->Initialized(this);
+  }
+}
+
+void TrackEncoder::SetStarted() {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+
+  if (mStarted) {
+    return;
+  }
+
+  mStarted = true;
+
+  for (auto& l : mListeners.Clone()) {
+    l->Started(this);
+  }
+}
+
+void TrackEncoder::OnError() {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+
+  Cancel();
+
+  for (auto& l : mListeners.Clone()) {
+    l->Error(this);
+  }
+}
+
+void TrackEncoder::RegisterListener(TrackEncoderListener* aListener) {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+  MOZ_ASSERT(!mListeners.Contains(aListener));
+  mListeners.AppendElement(aListener);
+}
+
+bool TrackEncoder::UnregisterListener(TrackEncoderListener* aListener) {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+  return mListeners.RemoveElement(aListener);
+}
+
+void TrackEncoder::SetWorkerThread(AbstractThread* aWorkerThread) {
+  mWorkerThread = aWorkerThread;
+}
+
+void AudioTrackEncoder::Suspend() {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+  TRACK_LOG(LogLevel::Info, ("[AudioTrackEncoder %p]: Suspend(), was %s", this,
+                             mSuspended ? "suspended" : "live"));
+
+  if (mSuspended) {
+    return;
+  }
+
+  mSuspended = true;
+}
+
+void AudioTrackEncoder::Resume() {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+  TRACK_LOG(LogLevel::Info, ("[AudioTrackEncoder %p]: Resume(), was %s", this,
+                             mSuspended ? "suspended" : "live"));
+
+  if (!mSuspended) {
+    return;
+  }
+
+  mSuspended = false;
+}
+
+void AudioTrackEncoder::AppendAudioSegment(AudioSegment&& aSegment) {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+  AUTO_PROFILER_LABEL("AudioTrackEncoder::AppendAudioSegment", OTHER);
+  TRACK_LOG(LogLevel::Verbose,
+            ("[AudioTrackEncoder %p]: AppendAudioSegment() duration=%" PRIu64,
+             this, aSegment.GetDuration()));
+
+  if (mCanceled) {
+    return;
+  }
+
+  if (mEndOfStream) {
+    return;
+  }
+
+  TryInit(mOutgoingBuffer, aSegment.GetDuration());
+
+  if (mSuspended) {
+    return;
+  }
+
+  SetStarted();
+  mOutgoingBuffer.AppendFrom(&aSegment);
+
+  if (!mInitialized) {
+    return;
+  }
+
+  if (NS_FAILED(Encode(&mOutgoingBuffer))) {
+    OnError();
+    return;
+  }
+
+  MOZ_ASSERT_IF(IsEncodingComplete(), mOutgoingBuffer.IsEmpty());
+}
+
+void AudioTrackEncoder::TryInit(const AudioSegment& aSegment,
+                                TrackTime aDuration) {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+
+  if (mInitialized) {
+    return;
+  }
+
+  mInitCounter++;
+  TRACK_LOG(LogLevel::Debug,
+            ("[AudioTrackEncoder %p]: Inited the audio encoder %d times", this,
+             mInitCounter));
+
+  for (AudioSegment::ConstChunkIterator iter(aSegment); !iter.IsEnded();
+       iter.Next()) {
+    // The number of channels is determined by the first non-null chunk, and
+    // thus the audio encoder is initialized at this time.
+    if (iter->IsNull()) {
+      continue;
+    }
+
+    nsresult rv = Init(iter->mChannelData.Length());
+
+    if (NS_SUCCEEDED(rv)) {
+      TRACK_LOG(LogLevel::Info,
+                ("[AudioTrackEncoder %p]: Successfully initialized!", this));
+      return;
+    } else {
+      TRACK_LOG(
+          LogLevel::Error,
+          ("[AudioTrackEncoder %p]: Failed to initialize the encoder!", this));
+      OnError();
+      return;
+    }
+    break;
+  }
+
+  mNotInitDuration += aDuration;
+  if (!mInitialized &&
+      ((mNotInitDuration - 1) / mTrackRate >= AUDIO_INIT_FAILED_DURATION) &&
+      mInitCounter > 1) {
+    // Perform a best effort initialization since we haven't gotten any
+    // data yet. Motivated by issues like Bug 1336367
+    TRACK_LOG(LogLevel::Warning,
+              ("[AudioTrackEncoder]: Initialize failed for %ds. Attempting to "
+               "init with %d (default) channels!",
+               AUDIO_INIT_FAILED_DURATION, DEFAULT_CHANNELS));
+    nsresult rv = Init(DEFAULT_CHANNELS);
+    if (NS_FAILED(rv)) {
+      TRACK_LOG(LogLevel::Error,
+                ("[AudioTrackEncoder %p]: Default-channel-init failed.", this));
+      OnError();
+      return;
+    }
+  }
+}
+
+void AudioTrackEncoder::Cancel() {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+  TRACK_LOG(LogLevel::Info, ("[AudioTrackEncoder %p]: Cancel()", this));
+  mCanceled = true;
+  mEndOfStream = true;
+  mOutgoingBuffer.Clear();
+  mEncodedDataQueue.Finish();
+}
+
+void AudioTrackEncoder::NotifyEndOfStream() {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+  TRACK_LOG(LogLevel::Info,
+            ("[AudioTrackEncoder %p]: NotifyEndOfStream()", this));
+
+  if (!mCanceled && !mInitialized) {
+    // If source audio track is completely silent till the end of encoding,
+    // initialize the encoder with a default channel count.
+    Init(DEFAULT_CHANNELS);
+  }
+
+  if (mEndOfStream) {
+    return;
+  }
+
+  mEndOfStream = true;
+
+  if (NS_FAILED(Encode(&mOutgoingBuffer))) {
+    mOutgoingBuffer.Clear();
+    OnError();
+  }
+
+  MOZ_ASSERT(mOutgoingBuffer.GetDuration() == 0);
+}
+
+/*static*/
+void AudioTrackEncoder::InterleaveTrackData(AudioChunk& aChunk,
+                                            int32_t aDuration,
+                                            uint32_t aOutputChannels,
+                                            AudioDataValue* aOutput) {
+  uint32_t numChannelsToCopy = std::min(
+      aOutputChannels, static_cast<uint32_t>(aChunk.mChannelData.Length()));
+  switch (aChunk.mBufferFormat) {
+    case AUDIO_FORMAT_S16: {
+      AutoTArray<const int16_t*, 2> array;
+      array.SetLength(numChannelsToCopy);
+      for (uint32_t i = 0; i < array.Length(); i++) {
+        array[i] = static_cast<const int16_t*>(aChunk.mChannelData[i]);
+      }
+      InterleaveTrackData(array, aDuration, aOutputChannels, aOutput,
+                          aChunk.mVolume);
+      break;
+    }
+    case AUDIO_FORMAT_FLOAT32: {
+      AutoTArray<const float*, 2> array;
+      array.SetLength(numChannelsToCopy);
+      for (uint32_t i = 0; i < array.Length(); i++) {
+        array[i] = static_cast<const float*>(aChunk.mChannelData[i]);
+      }
+      InterleaveTrackData(array, aDuration, aOutputChannels, aOutput,
+                          aChunk.mVolume);
+      break;
+    }
+    case AUDIO_FORMAT_SILENCE: {
+      MOZ_ASSERT(false, "To implement.");
+    }
+  };
+}
+
+/*static*/
+void AudioTrackEncoder::DeInterleaveTrackData(AudioDataValue* aInput,
+                                              int32_t aDuration,
+                                              int32_t aChannels,
+                                              AudioDataValue* aOutput) {
+  for (int32_t i = 0; i < aChannels; ++i) {
+    for (int32_t j = 0; j < aDuration; ++j) {
+      aOutput[i * aDuration + j] = aInput[i + j * aChannels];
+    }
+  }
+}
+
+size_t AudioTrackEncoder::SizeOfExcludingThis(
+    mozilla::MallocSizeOf aMallocSizeOf) {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+  return mOutgoingBuffer.SizeOfExcludingThis(aMallocSizeOf);
+}
+
+VideoTrackEncoder::VideoTrackEncoder(
+    RefPtr<DriftCompensator> aDriftCompensator, TrackRate aTrackRate,
+    MediaQueue<EncodedFrame>& aEncodedDataQueue,
+    FrameDroppingMode aFrameDroppingMode)
+    : TrackEncoder(aTrackRate, aEncodedDataQueue),
+      mDriftCompensator(std::move(aDriftCompensator)),
+      mEncodedTicks(0),
+      mVideoBitrate(0),
+      mFrameDroppingMode(aFrameDroppingMode),
+      mEnabled(true) {
+  mLastChunk.mDuration = 0;
+}
+
+void VideoTrackEncoder::Suspend(const TimeStamp& aTime) {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+  TRACK_LOG(LogLevel::Info,
+            ("[VideoTrackEncoder %p]: Suspend() at %.3fs, was %s", this,
+             mStartTime.IsNull() ? 0.0 : (aTime - mStartTime).ToSeconds(),
+             mSuspended ? "suspended" : "live"));
+
+  if (mSuspended) {
+    return;
+  }
+
+  mSuspended = true;
+  mSuspendTime = aTime;
+}
+
+void VideoTrackEncoder::Resume(const TimeStamp& aTime) {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+
+  if (!mSuspended) {
+    return;
+  }
+
+  TRACK_LOG(
+      LogLevel::Info,
+      ("[VideoTrackEncoder %p]: Resume() after %.3fs, was %s", this,
+       (aTime - mSuspendTime).ToSeconds(), mSuspended ? "suspended" : "live"));
+
+  mSuspended = false;
+
+  TimeDuration suspendDuration = aTime - mSuspendTime;
+  if (!mLastChunk.mTimeStamp.IsNull()) {
+    VideoChunk* nextChunk = mIncomingBuffer.FindChunkContaining(aTime);
+    MOZ_ASSERT_IF(nextChunk, nextChunk->mTimeStamp <= aTime);
+    if (nextChunk) {
+      nextChunk->mTimeStamp = aTime;
+    }
+    mLastChunk.mTimeStamp += suspendDuration;
+  }
+  if (!mStartTime.IsNull()) {
+    mStartTime += suspendDuration;
+  }
+
+  mSuspendTime = TimeStamp();
+}
+
+void VideoTrackEncoder::Disable(const TimeStamp& aTime) {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+  TRACK_LOG(LogLevel::Debug, ("[VideoTrackEncoder %p]: Disable()", this));
+
+  if (mStartTime.IsNull()) {
+    // We haven't started yet. No need to touch future frames.
+    mEnabled = false;
+    return;
+  }
+
+  // Advancing currentTime to process any frames in mIncomingBuffer between
+  // mCurrentTime and aTime.
+  AdvanceCurrentTime(aTime);
+  if (!mLastChunk.mTimeStamp.IsNull()) {
+    // Insert a black frame at t=aTime into mIncomingBuffer, to trigger the
+    // shift to black at the right moment.
+    VideoSegment tempSegment;
+    tempSegment.AppendFrom(&mIncomingBuffer);
+    mIncomingBuffer.AppendFrame(do_AddRef(mLastChunk.mFrame.GetImage()),
+                                mLastChunk.mFrame.GetIntrinsicSize(),
+                                mLastChunk.mFrame.GetPrincipalHandle(), true,
+                                aTime);
+    mIncomingBuffer.AppendFrom(&tempSegment);
+  }
+  mEnabled = false;
+}
+
+void VideoTrackEncoder::Enable(const TimeStamp& aTime) {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+  TRACK_LOG(LogLevel::Debug, ("[VideoTrackEncoder %p]: Enable()", this));
+
+  if (mStartTime.IsNull()) {
+    // We haven't started yet. No need to touch future frames.
+    mEnabled = true;
+    return;
+  }
+
+  // Advancing currentTime to process any frames in mIncomingBuffer between
+  // mCurrentTime and aTime.
+  AdvanceCurrentTime(aTime);
+  if (!mLastChunk.mTimeStamp.IsNull()) {
+    // Insert a real frame at t=aTime into mIncomingBuffer, to trigger the
+    // shift from black at the right moment.
+    VideoSegment tempSegment;
+    tempSegment.AppendFrom(&mIncomingBuffer);
+    mIncomingBuffer.AppendFrame(do_AddRef(mLastChunk.mFrame.GetImage()),
+                                mLastChunk.mFrame.GetIntrinsicSize(),
+                                mLastChunk.mFrame.GetPrincipalHandle(),
+                                mLastChunk.mFrame.GetForceBlack(), aTime);
+    mIncomingBuffer.AppendFrom(&tempSegment);
+  }
+  mEnabled = true;
+}
+
+void VideoTrackEncoder::AppendVideoSegment(VideoSegment&& aSegment) {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+  TRACK_LOG(LogLevel::Verbose,
+            ("[VideoTrackEncoder %p]: AppendVideoSegment()", this));
+
+  if (mCanceled) {
+    return;
+  }
+
+  if (mEndOfStream) {
+    return;
+  }
+
+  for (VideoSegment::ConstChunkIterator iter(aSegment); !iter.IsEnded();
+       iter.Next()) {
+    if (iter->IsNull()) {
+      // A null image was sent. This is a signal from the source that we should
+      // clear any images buffered in the future.
+      mIncomingBuffer.Clear();
+      continue;  // Don't append iter, as it is null.
+    }
+    if (VideoChunk* c = mIncomingBuffer.GetLastChunk()) {
+      if (iter->mTimeStamp < c->mTimeStamp) {
+        // Time went backwards. This can happen when a MediaDecoder seeks.
+        // We need to handle this by removing any frames buffered in the future
+        // and start over at iter->mTimeStamp.
+        mIncomingBuffer.Clear();
+      }
+    }
+    SetStarted();
+    mIncomingBuffer.AppendFrame(do_AddRef(iter->mFrame.GetImage()),
+                                iter->mFrame.GetIntrinsicSize(),
+                                iter->mFrame.GetPrincipalHandle(),
+                                iter->mFrame.GetForceBlack(), iter->mTimeStamp);
+  }
+  aSegment.Clear();
+}
+
+void VideoTrackEncoder::Init(const VideoSegment& aSegment,
+                             const TimeStamp& aTime,
+                             size_t aFrameRateDetectionMinChunks) {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+  MOZ_ASSERT(!aTime.IsNull());
+
+  if (mInitialized) {
+    return;
+  }
+
+  mInitCounter++;
+  TRACK_LOG(LogLevel::Debug,
+            ("[VideoTrackEncoder %p]: Init the video encoder %d times", this,
+             mInitCounter));
+
+  Maybe<float> framerate;
+  if (!aSegment.IsEmpty()) {
+    // The number of whole frames, i.e., with known duration.
+    size_t frameCount = 0;
+    RollingMean<TimeDuration, TimeDuration> meanDuration(
+        FRAMERATE_DETECTION_ROLLING_WINDOW);
+    VideoSegment::ConstChunkIterator iter(aSegment);
+    TimeStamp previousChunkTime = iter->mTimeStamp;
+    iter.Next();
+    for (; !iter.IsEnded(); iter.Next(), ++frameCount) {
+      meanDuration.insert(iter->mTimeStamp - previousChunkTime);
+      previousChunkTime = iter->mTimeStamp;
+    }
+    TRACK_LOG(LogLevel::Debug, ("[VideoTrackEncoder %p]: Init() frameCount=%zu",
+                                this, frameCount));
+    if (frameCount >= aFrameRateDetectionMinChunks) {
+      if (meanDuration.empty()) {
+        // No whole frames available, use aTime as end time.
+        framerate = Some(1.0f / (aTime - mStartTime).ToSeconds());
+      } else {
+        // We want some frames for estimating the framerate.
+        framerate = Some(1.0f / meanDuration.mean().ToSeconds());
+      }
+    } else if ((aTime - mStartTime).ToSeconds() >
+               FRAMERATE_DETECTION_MAX_DURATION_S) {
+      // Instead of failing init after the fail-timeout, we fallback to a very
+      // low rate.
+      framerate = Some(static_cast<float>(frameCount) /
+                       (aTime - mStartTime).ToSeconds());
+    }
+  }
+
+  if (framerate) {
+    for (VideoSegment::ConstChunkIterator iter(aSegment); !iter.IsEnded();
+         iter.Next()) {
+      if (iter->IsNull()) {
+        continue;
+      }
+
+      gfx::IntSize imgsize = iter->mFrame.GetImage()->GetSize();
+      gfx::IntSize intrinsicSize = iter->mFrame.GetIntrinsicSize();
+      nsresult rv = Init(imgsize.width, imgsize.height, intrinsicSize.width,
+                         intrinsicSize.height, *framerate);
+
+      if (NS_SUCCEEDED(rv)) {
+        TRACK_LOG(LogLevel::Info,
+                  ("[VideoTrackEncoder %p]: Successfully initialized!", this));
+        return;
+      }
+
+      TRACK_LOG(
+          LogLevel::Error,
+          ("[VideoTrackEncoder %p]: Failed to initialize the encoder!", this));
+      OnError();
+      break;
+    }
+  }
+
+  if (((aTime - mStartTime).ToSeconds() > VIDEO_INIT_FAILED_DURATION) &&
+      mInitCounter > 1) {
+    TRACK_LOG(LogLevel::Warning,
+              ("[VideoTrackEncoder %p]: No successful init for %ds.", this,
+               VIDEO_INIT_FAILED_DURATION));
+    OnError();
+    return;
+  }
+}
+
+void VideoTrackEncoder::Cancel() {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+  TRACK_LOG(LogLevel::Info, ("[VideoTrackEncoder %p]: Cancel()", this));
+  mCanceled = true;
+  mEndOfStream = true;
+  mIncomingBuffer.Clear();
+  mOutgoingBuffer.Clear();
+  mLastChunk.SetNull(0);
+  mEncodedDataQueue.Finish();
+}
+
+void VideoTrackEncoder::NotifyEndOfStream() {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+
+  if (mCanceled) {
+    return;
+  }
+
+  if (mEndOfStream) {
+    // We have already been notified.
+    return;
+  }
+
+  mEndOfStream = true;
+  TRACK_LOG(LogLevel::Info,
+            ("[VideoTrackEncoder %p]: NotifyEndOfStream()", this));
+
+  if (!mLastChunk.IsNull()) {
+    RefPtr<layers::Image> lastImage = mLastChunk.mFrame.GetImage();
+    const TimeStamp now = TimeStamp::Now();
+    TimeStamp currentTime = mSuspended ? mSuspendTime : mCurrentTime;
+    currentTime = mDriftCompensator->GetVideoTime(now, currentTime);
+    TimeDuration absoluteEndTime = currentTime - mStartTime;
+    CheckedInt64 duration =
+        UsecsToFrames(absoluteEndTime.ToMicroseconds(), mTrackRate) -
+        mEncodedTicks;
+    if (duration.isValid() && duration.value() > 0) {
+      mEncodedTicks += duration.value();
+      TRACK_LOG(LogLevel::Debug,
+                ("[VideoTrackEncoder %p]: Appending last video frame %p at pos "
+                 "%.3fs, "
+                 "track-end=%.3fs",
+                 this, lastImage.get(),
+                 (mLastChunk.mTimeStamp - mStartTime).ToSeconds(),
+                 absoluteEndTime.ToSeconds()));
+      mOutgoingBuffer.AppendFrame(
+          lastImage.forget(), mLastChunk.mFrame.GetIntrinsicSize(),
+          PRINCIPAL_HANDLE_NONE, mLastChunk.mFrame.GetForceBlack() || !mEnabled,
+          mLastChunk.mTimeStamp);
+      mOutgoingBuffer.ExtendLastFrameBy(duration.value());
+    }
+
+    if (!mInitialized) {
+      // Try to init without waiting for an accurate framerate.
+      Init(mOutgoingBuffer, currentTime, 0);
+    }
+  }
+
+  if (mCanceled) {
+    // Previous Init failed and we got canceled. Nothing to do here.
+    return;
+  }
+
+  mIncomingBuffer.Clear();
+  mLastChunk.SetNull(0);
+
+  if (NS_WARN_IF(!mInitialized)) {
+    // Still not initialized. There was probably no real frame at all, perhaps
+    // by muting. Initialize the encoder with default frame width, frame
+    // height, and frame rate.
+    Init(DEFAULT_FRAME_WIDTH, DEFAULT_FRAME_HEIGHT, DEFAULT_FRAME_WIDTH,
+         DEFAULT_FRAME_HEIGHT, DEFAULT_FRAME_RATE);
+  }
+
+  if (NS_FAILED(Encode(&mOutgoingBuffer))) {
+    OnError();
+  }
+
+  MOZ_ASSERT(mOutgoingBuffer.IsEmpty());
+}
+
+void VideoTrackEncoder::SetStartOffset(const TimeStamp& aStartOffset) {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+  MOZ_ASSERT(mCurrentTime.IsNull());
+  TRACK_LOG(LogLevel::Info, ("[VideoTrackEncoder %p]: SetStartOffset()", this));
+  mStartTime = aStartOffset;
+  mCurrentTime = aStartOffset;
+}
+
+void VideoTrackEncoder::AdvanceCurrentTime(const TimeStamp& aTime) {
+  AUTO_PROFILER_LABEL("VideoTrackEncoder::AdvanceCurrentTime", OTHER);
+
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+  MOZ_ASSERT(!mStartTime.IsNull());
+  MOZ_ASSERT(!mCurrentTime.IsNull());
+
+  if (mCanceled) {
+    return;
+  }
+
+  if (mEndOfStream) {
+    return;
+  }
+
+  if (mSuspended) {
+    TRACK_LOG(
+        LogLevel::Verbose,
+        ("[VideoTrackEncoder %p]: AdvanceCurrentTime() suspended at %.3fs",
+         this, (mCurrentTime - mStartTime).ToSeconds()));
+    mCurrentTime = aTime;
+    mIncomingBuffer.ForgetUpToTime(mCurrentTime);
+    return;
+  }
+
+  TRACK_LOG(LogLevel::Verbose,
+            ("[VideoTrackEncoder %p]: AdvanceCurrentTime() to %.3fs", this,
+             (aTime - mStartTime).ToSeconds()));
+
+  // Grab frames within the currentTime range from the incoming buffer.
+  VideoSegment tempSegment;
+  {
+    VideoChunk* previousChunk = &mLastChunk;
+    auto appendDupes = [&](const TimeStamp& aUpTo) {
+      while ((aUpTo - previousChunk->mTimeStamp).ToSeconds() > 1.0) {
+        // We encode at least one frame per second, even if there are none
+        // flowing.
+        previousChunk->mTimeStamp += TimeDuration::FromSeconds(1.0);
+        tempSegment.AppendFrame(
+            do_AddRef(previousChunk->mFrame.GetImage()),
+            previousChunk->mFrame.GetIntrinsicSize(),
+            previousChunk->mFrame.GetPrincipalHandle(),
+            previousChunk->mFrame.GetForceBlack() || !mEnabled,
+            previousChunk->mTimeStamp);
+        TRACK_LOG(
+            LogLevel::Verbose,
+            ("[VideoTrackEncoder %p]: Duplicating video frame (%p) at pos %.3f",
+             this, previousChunk->mFrame.GetImage(),
+             (previousChunk->mTimeStamp - mStartTime).ToSeconds()));
+      }
+    };
+    for (VideoSegment::ChunkIterator iter(mIncomingBuffer); !iter.IsEnded();
+         iter.Next()) {
+      MOZ_ASSERT(!iter->IsNull());
+      if (!previousChunk->IsNull() &&
+          iter->mTimeStamp <= previousChunk->mTimeStamp) {
+        // This frame starts earlier than previousChunk. Skip.
+        continue;
+      }
+      if (iter->mTimeStamp >= aTime) {
+        // This frame starts in the future. Stop.
+        break;
+      }
+      if (!previousChunk->IsNull()) {
+        appendDupes(iter->mTimeStamp);
+      }
+      tempSegment.AppendFrame(
+          do_AddRef(iter->mFrame.GetImage()), iter->mFrame.GetIntrinsicSize(),
+          iter->mFrame.GetPrincipalHandle(),
+          iter->mFrame.GetForceBlack() || !mEnabled, iter->mTimeStamp);
+      TRACK_LOG(LogLevel::Verbose,
+                ("[VideoTrackEncoder %p]: Taking video frame (%p) at pos %.3f",
+                 this, iter->mFrame.GetImage(),
+                 (iter->mTimeStamp - mStartTime).ToSeconds()));
+      previousChunk = &*iter;
+    }
+    if (!previousChunk->IsNull()) {
+      appendDupes(aTime);
+    }
+  }
+  mCurrentTime = aTime;
+  mIncomingBuffer.ForgetUpToTime(mCurrentTime);
+
+  // Convert tempSegment timestamps to durations and add chunks with known
+  // duration to mOutgoingBuffer.
+  const TimeStamp now = TimeStamp::Now();
+  for (VideoSegment::ConstChunkIterator iter(tempSegment); !iter.IsEnded();
+       iter.Next()) {
+    VideoChunk chunk = *iter;
+
+    if (mLastChunk.mTimeStamp.IsNull()) {
+      // This is the first real chunk in the track. Make it start at the
+      // beginning of the track.
+      MOZ_ASSERT(!iter->mTimeStamp.IsNull());
+
+      TRACK_LOG(
+          LogLevel::Verbose,
+          ("[VideoTrackEncoder %p]: Got the first video frame (%p) at pos %.3f "
+           "(moving it to beginning)",
+           this, iter->mFrame.GetImage(),
+           (iter->mTimeStamp - mStartTime).ToSeconds()));
+
+      mLastChunk = *iter;
+      mLastChunk.mTimeStamp = mStartTime;
+      continue;
+    }
+
+    MOZ_ASSERT(!mLastChunk.IsNull());
+    MOZ_ASSERT(!chunk.IsNull());
+
+    TimeDuration absoluteEndTime =
+        mDriftCompensator->GetVideoTime(now, chunk.mTimeStamp) - mStartTime;
+    TRACK_LOG(LogLevel::Verbose,
+              ("[VideoTrackEncoder %p]: Appending video frame %p, at pos %.3fs "
+               "until %.3fs",
+               this, mLastChunk.mFrame.GetImage(),
+               (mDriftCompensator->GetVideoTime(now, mLastChunk.mTimeStamp) -
+                mStartTime)
+                   .ToSeconds(),
+               absoluteEndTime.ToSeconds()));
+    CheckedInt64 duration =
+        UsecsToFrames(absoluteEndTime.ToMicroseconds(), mTrackRate) -
+        mEncodedTicks;
+    if (!duration.isValid()) {
+      NS_ERROR("Duration overflow");
+      return;
+    }
+
+    if (duration.value() <= 0) {
+      // A frame either started before the last frame (can happen when
+      // multiple frames are added before SetStartOffset), or
+      // two frames were so close together that they ended up at the same
+      // position. We handle both cases by ignoring the previous frame.
+
+      TRACK_LOG(LogLevel::Verbose,
+                ("[VideoTrackEncoder %p]: Duration from frame %p to frame %p "
+                 "is %" PRId64 ". Ignoring %p",
+                 this, mLastChunk.mFrame.GetImage(), iter->mFrame.GetImage(),
+                 duration.value(), mLastChunk.mFrame.GetImage()));
+
+      TimeStamp t = mLastChunk.mTimeStamp;
+      mLastChunk = *iter;
+      mLastChunk.mTimeStamp = t;
+      continue;
+    }
+
+    mEncodedTicks += duration.value();
+    mOutgoingBuffer.AppendFrame(
+        do_AddRef(mLastChunk.mFrame.GetImage()),
+        mLastChunk.mFrame.GetIntrinsicSize(), PRINCIPAL_HANDLE_NONE,
+        mLastChunk.mFrame.GetForceBlack() || !mEnabled, mLastChunk.mTimeStamp);
+    mOutgoingBuffer.ExtendLastFrameBy(duration.value());
+    mLastChunk = chunk;
+  }
+
+  if (mOutgoingBuffer.IsEmpty()) {
+    return;
+  }
+
+  Init(mOutgoingBuffer, mCurrentTime, FRAMERATE_DETECTION_MIN_CHUNKS);
+
+  if (!mInitialized) {
+    return;
+  }
+
+  if (NS_FAILED(Encode(&mOutgoingBuffer))) {
+    OnError();
+    return;
+  }
+
+  MOZ_ASSERT(mOutgoingBuffer.IsEmpty());
+}
+
+size_t VideoTrackEncoder::SizeOfExcludingThis(
+    mozilla::MallocSizeOf aMallocSizeOf) {
+  MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+  return mIncomingBuffer.SizeOfExcludingThis(aMallocSizeOf) +
+         mOutgoingBuffer.SizeOfExcludingThis(aMallocSizeOf);
+}
+
+}  // namespace mozilla
+
+#undef TRACK_LOG
diff --git a/dom/media/encoder/TrackEncoder.h b/dom/media/encoder/TrackEncoder.h
new file mode 100644
index 0000000000..ede0ee5d0a
--- /dev/null
+++ b/dom/media/encoder/TrackEncoder.h
@@ -0,0 +1,501 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef TrackEncoder_h_
+#define TrackEncoder_h_
+
+#include "AudioSegment.h"
+#include "EncodedFrame.h"
+#include "MediaQueue.h"
+#include "MediaTrackGraph.h"
+#include "TrackMetadataBase.h"
+#include "VideoSegment.h"
+
+namespace mozilla {
+
+class AbstractThread;
+class DriftCompensator;
+class TrackEncoder;
+
+class TrackEncoderListener {
+ public:
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(TrackEncoderListener)
+
+  /**
+   * Called when the TrackEncoder has received its first real data.
+   */
+  virtual void Started(TrackEncoder* aEncoder) = 0;
+
+  /**
+   * Called when the TrackEncoder's underlying encoder has been successfully
+   * initialized and there's non-null data ready to be encoded.
+   */
+  virtual void Initialized(TrackEncoder* aEncoder) = 0;
+
+  /**
+   * Called after the TrackEncoder hit an unexpected error, causing it to
+   * abort operation.
+   */
+  virtual void Error(TrackEncoder* aEncoder) = 0;
+
+ protected:
+  virtual ~TrackEncoderListener() = default;
+};
+
+/**
+ * Base class of AudioTrackEncoder and VideoTrackEncoder. Lifetime managed by
+ * MediaEncoder. All methods are to be called only on the worker thread.
+ *
+ * The control APIs are all called by MediaEncoder on its dedicated thread. Data
+ * is encoded as soon as it has been appended (and time has advanced past its
+ * end in case of video) and pushed to mEncodedDataQueue.
+ */
+class TrackEncoder {
+ public:
+  TrackEncoder(TrackRate aTrackRate,
+               MediaQueue<EncodedFrame>& aEncodedDataQueue);
+
+  /**
+   * Called by MediaEncoder to cancel the encoding.
+   */
+  virtual void Cancel() = 0;
+
+  /**
+   * Notifies us that we have reached the end of the stream and no more data
+   * will be appended.
+   */
+  virtual void NotifyEndOfStream() = 0;
+
+  /**
+   * Creates and sets up meta data for a specific codec, called on the worker
+   * thread.
+   */
+  virtual already_AddRefed<TrackMetadataBase> GetMetadata() = 0;
+
+  /**
+   * MediaQueue containing encoded data, that is pushed as soon as it's ready.
+   */
+  MediaQueue<EncodedFrame>& EncodedDataQueue() { return mEncodedDataQueue; }
+
+  /**
+   * Returns true once this TrackEncoder is initialized.
+   */
+  bool IsInitialized();
+
+  /**
+   * Returns true once this TrackEncoder has received some data.
+   */
+  bool IsStarted();
+
+  /**
+   * True if the track encoder has encoded all source segments coming from
+   * MediaTrackGraph. Call on the worker thread.
+   */
+  bool IsEncodingComplete() const;
+
+  /**
+   * Registers a listener to events from this TrackEncoder.
+   * We hold a strong reference to the listener.
+   */
+  void RegisterListener(TrackEncoderListener* aListener);
+
+  /**
+   * Unregisters a listener from events from this TrackEncoder.
+   * The listener will stop receiving events synchronously.
+   */
+  bool UnregisterListener(TrackEncoderListener* aListener);
+
+  virtual void SetBitrate(const uint32_t aBitrate) = 0;
+
+  /**
+   * It's optional to set the worker thread, but if you do we'll assert that
+   * we are in the worker thread in every method that gets called.
+   */
+  void SetWorkerThread(AbstractThread* aWorkerThread);
+
+  /**
+   * Measure size of internal buffers.
+   */
+  virtual size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) = 0;
+
+ protected:
+  virtual ~TrackEncoder() { MOZ_ASSERT(mListeners.IsEmpty()); }
+
+  /**
+   * If this TrackEncoder was not already initialized, it is set to initialized
+   * and listeners are notified.
+   */
+  void SetInitialized();
+
+  /**
+   * If this TrackEncoder was not already marked started, its started state is
+   * set and listeners are notified.
+   */
+  void SetStarted();
+
+  /**
+   * Called after an error. Cancels the encoding and notifies listeners.
+   */
+  void OnError();
+
+  /**
+   * True if the track encoder has been initialized successfully.
+   */
+  bool mInitialized;
+
+  /**
+   * True if the track encoder has received data.
+   */
+  bool mStarted;
+
+  /**
+   * True once all data until the end of the input track has been received.
+   */
+  bool mEndOfStream;
+
+  /**
+   * True once this encoding has been cancelled.
+   */
+  bool mCanceled;
+
+  // How many times we have tried to initialize the encoder.
+  uint32_t mInitCounter;
+
+  /**
+   * True if this TrackEncoder is currently suspended.
+   */
+  bool mSuspended;
+
+  /**
+   * The track rate of source media.
+   */
+  const TrackRate mTrackRate;
+
+  /**
+   * If set we assert that all methods are called on this thread.
+   */
+  RefPtr<AbstractThread> mWorkerThread;
+
+  /**
+   * MediaQueue where encoded data ends up. Note that metadata goes out of band.
+   */
+  MediaQueue<EncodedFrame>& mEncodedDataQueue;
+
+  nsTArray<RefPtr<TrackEncoderListener>> mListeners;
+};
+
+class AudioTrackEncoder : public TrackEncoder {
+ public:
+  AudioTrackEncoder(TrackRate aTrackRate,
+                    MediaQueue<EncodedFrame>& aEncodedDataQueue)
+      : TrackEncoder(aTrackRate, aEncodedDataQueue),
+        mChannels(0),
+        mNotInitDuration(0),
+        mAudioBitrate(0) {}
+
+  /**
+   * Suspends encoding from now, i.e., all future audio data received through
+   * AppendAudioSegment() until the next Resume() will be dropped.
+   */
+  void Suspend();
+
+  /**
+   * Resumes encoding starting now, i.e., data from the next
+   * AppendAudioSegment() will get encoded.
+   */
+  void Resume();
+
+  /**
+   * Appends and consumes track data from aSegment.
+   */
+  void AppendAudioSegment(AudioSegment&& aSegment);
+
+  template <typename T>
+  static void InterleaveTrackData(nsTArray<const T*>& aInput, int32_t aDuration,
+                                  uint32_t aOutputChannels,
+                                  AudioDataValue* aOutput, float aVolume) {
+    if (aInput.Length() < aOutputChannels) {
+      // Up-mix. This might make the mChannelData have more than aChannels.
+      AudioChannelsUpMix(&aInput, aOutputChannels,
+                         SilentChannel::ZeroChannel<T>());
+    }
+
+    if (aInput.Length() > aOutputChannels) {
+      DownmixAndInterleave<T>(aInput, aDuration, aVolume, aOutputChannels,
+                              aOutput);
+    } else {
+      InterleaveAndConvertBuffer(aInput.Elements(), aDuration, aVolume,
+                                 aOutputChannels, aOutput);
+    }
+  }
+
+  /**
+   * Interleaves the track data and stores the result into aOutput. Might need
+   * to up-mix or down-mix the channel data if the channels number of this chunk
+   * is different from aOutputChannels. The channel data from aChunk might be
+   * modified by up-mixing.
+   */
+  static void InterleaveTrackData(AudioChunk& aChunk, int32_t aDuration,
+                                  uint32_t aOutputChannels,
+                                  AudioDataValue* aOutput);
+
+  /**
+   * De-interleaves the aInput data and stores the result into aOutput.
+   * No up-mix or down-mix operations inside.
+   */
+  static void DeInterleaveTrackData(AudioDataValue* aInput, int32_t aDuration,
+                                    int32_t aChannels, AudioDataValue* aOutput);
+
+  /**
+   * Measure size of internal buffers.
+   */
+  size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) override;
+
+  void SetBitrate(const uint32_t aBitrate) override {
+    mAudioBitrate = aBitrate;
+  }
+
+  /**
+   * Tries to initiate the AudioEncoder based on data in aSegment.
+   * This can be re-called often, as it will exit early should we already be
+   * initiated. mInitiated will only be set if there was enough data in
+   * aSegment to infer metadata. If mInitiated gets set, listeners are notified.
+   *
+   * Not having enough data in aSegment to initiate the encoder for an
+   * accumulated aDuration of one second will make us initiate with a default
+   * number of channels.
+   *
+   * If we attempt to initiate the underlying encoder but fail, we Cancel() and
+   * notify listeners.
+   */
+  void TryInit(const AudioSegment& aSegment, TrackTime aDuration);
+
+  void Cancel() override;
+
+  /**
+   * Dispatched from MediaTrackGraph when we have finished feeding data to
+   * mOutgoingBuffer.
+   */
+  void NotifyEndOfStream() override;
+
+ protected:
+  /**
+   * Number of samples per channel in a pcm buffer. This is also the value of
+   * frame size required by audio encoder, and listeners will be notified when
+   * at least this much data has been added to mOutgoingBuffer.
+   */
+  virtual int NumInputFramesPerPacket() const { return 0; }
+
+  /**
+   * Initializes the audio encoder. The call of this method is delayed until we
+   * have received the first valid track from MediaTrackGraph.
+   */
+  virtual nsresult Init(int aChannels) = 0;
+
+  /**
+   * Encodes buffered data and pushes it to mEncodedDataQueue.
+   */
+  virtual nsresult Encode(AudioSegment* aSegment) = 0;
+
+  /**
+   * The number of channels are used for processing PCM data in the audio
+   * encoder. This value comes from the first valid audio chunk. If encoder
+   * can't support the channels in the chunk, downmix PCM stream can be
+   * performed. This value also be used to initialize the audio encoder.
+   */
+  int mChannels;
+
+  /**
+   * A segment queue of outgoing audio track data to the encoder.
+   * The contents of mOutgoingBuffer will always be what has been appended on
+   * the encoder thread but not yet consumed by the encoder sub class.
+   */
+  AudioSegment mOutgoingBuffer;
+
+  TrackTime mNotInitDuration;
+
+  uint32_t mAudioBitrate;
+};
+
+enum class FrameDroppingMode {
+  ALLOW,     // Allowed to drop frames to keep up under load
+  DISALLOW,  // Must not drop any frames, even if it means we will OOM
+};
+
+class VideoTrackEncoder : public TrackEncoder {
+ public:
+  VideoTrackEncoder(RefPtr<DriftCompensator> aDriftCompensator,
+                    TrackRate aTrackRate,
+                    MediaQueue<EncodedFrame>& aEncodedDataQueue,
+                    FrameDroppingMode aFrameDroppingMode);
+
+  /**
+   * Suspends encoding from aTime, i.e., all video frame with a timestamp
+   * between aTime and the timestamp of the next Resume() will be dropped.
+   */
+  void Suspend(const TimeStamp& aTime);
+
+  /**
+   * Resumes encoding starting at aTime.
+   */
+  void Resume(const TimeStamp& aTime);
+
+  /**
+   * Makes the video black from aTime.
+   */
+  void Disable(const TimeStamp& aTime);
+
+  /**
+   * Makes the video non-black from aTime.
+   *
+   * NB that it could still be forced black for other reasons, like principals.
+   */
+  void Enable(const TimeStamp& aTime);
+
+  /**
+   * Appends source video frames to mIncomingBuffer. We only append the source
+   * chunk if the image is different from mLastChunk's image. Called on the
+   * MediaTrackGraph thread.
+   */
+  void AppendVideoSegment(VideoSegment&& aSegment);
+
+  /**
+   * Measure size of internal buffers.
+   */
+  size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) override;
+
+  void SetBitrate(const uint32_t aBitrate) override {
+    mVideoBitrate = aBitrate;
+  }
+
+  /**
+   * Tries to initiate the VideoEncoder based on data in aSegment.
+   * This can be re-called often, as it will exit early should we already be
+   * initiated. mInitiated will only be set if there was enough data in
+   * aSegment to infer metadata. If mInitiated gets set, listeners are notified.
+   * The amount of chunks needed can be controlled by
+   * aFrameRateDetectionMinChunks which denotes the minimum number of chunks
+   * needed to infer the framerate.
+   *
+   * Failing to initiate the encoder for an accumulated aDuration of 30 seconds
+   * is seen as an error and will cancel the current encoding.
+   */
+  void Init(const VideoSegment& aSegment, const TimeStamp& aTime,
+            size_t aFrameRateDetectionMinChunks);
+
+  TrackTime SecondsToMediaTime(double aS) const {
+    NS_ASSERTION(0 <= aS && aS <= TRACK_TICKS_MAX / TRACK_RATE_MAX,
+                 "Bad seconds");
+    return mTrackRate * aS;
+  }
+
+  /**
+   * MediaTrackGraph notifies us about the time of the track's start.
+   * This gets called on the MediaEncoder thread after a dispatch.
+   */
+  void SetStartOffset(const TimeStamp& aStartOffset);
+
+  void Cancel() override;
+
+  /**
+   * Notifies us that we have reached the end of the stream and no more data
+   * will be appended to mIncomingBuffer.
+   */
+  void NotifyEndOfStream() override;
+
+  /**
+   * Dispatched from MediaTrackGraph when it has run an iteration so we can
+   * hand more data to the encoder.
+   */
+  void AdvanceCurrentTime(const TimeStamp& aTime);
+
+ protected:
+  /**
+   * Initialize the video encoder. In order to collect the value of width and
+   * height of source frames, this initialization is delayed until we have
+   * received the first valid video frame from MediaTrackGraph.
+   * Listeners will be notified after it has been successfully initialized.
+   */
+  virtual nsresult Init(int32_t aWidth, int32_t aHeight, int32_t aDisplayWidth,
+                        int32_t aDisplayHeight, float aEstimatedFrameRate) = 0;
+
+  /**
+   * Encodes data in the outgoing buffer and pushes it to mEncodedDataQueue.
+   */
+  virtual nsresult Encode(VideoSegment* aSegment) = 0;
+
+  /**
+   * Drift compensator for re-clocking incoming video frame wall-clock
+   * timestamps to audio time.
+   */
+  const RefPtr<DriftCompensator> mDriftCompensator;
+
+  /**
+   * The last unique frame and duration so far handled by
+   * NotifyAdvanceCurrentTime. When a new frame is detected, mLastChunk is added
+   * to mOutgoingBuffer.
+   */
+  VideoChunk mLastChunk;
+
+  /**
+   * A segment queue of incoming video track data, from listeners.
+   * The duration of mIncomingBuffer is irrelevant as we only look at TimeStamps
+   * of frames. Consumed data is replaced by null data.
+   */
+  VideoSegment mIncomingBuffer;
+
+  /**
+   * A segment queue of outgoing video track data to the encoder.
+   * The contents of mOutgoingBuffer will always be what has been consumed from
+   * mIncomingBuffer (up to mCurrentTime) but not yet consumed by the encoder
+   * sub class. There won't be any null data at the beginning of mOutgoingBuffer
+   * unless explicitly pushed by the producer.
+   */
+  VideoSegment mOutgoingBuffer;
+
+  /**
+   * The number of mTrackRate ticks we have passed to mOutgoingBuffer.
+   */
+  TrackTime mEncodedTicks;
+
+  /**
+   * The time up to which we have forwarded data from mIncomingBuffer to
+   * mOutgoingBuffer.
+   */
+  TimeStamp mCurrentTime;
+
+  /**
+   * The time the video track started, so the start of the video track can be
+   * synced to the start of the audio track.
+   *
+   * Note that this time will progress during suspension, to make sure the
+   * incoming frames stay in sync with the output.
+   */
+  TimeStamp mStartTime;
+
+  /**
+   * The time Suspend was called on the MediaRecorder, so we can calculate the
+   * duration on the next Resume().
+   */
+  TimeStamp mSuspendTime;
+
+  uint32_t mVideoBitrate;
+
+  /**
+   * ALLOW to drop frames under load.
+   * DISALLOW to encode all frames, mainly for testing.
+   */
+  FrameDroppingMode mFrameDroppingMode;
+
+  /**
+   * True if the video MediaTrackTrack this VideoTrackEncoder is attached to is
+   * currently enabled. While false, we encode all frames as black.
+   */
+  bool mEnabled;
+};
+
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/encoder/TrackMetadataBase.h b/dom/media/encoder/TrackMetadataBase.h
new file mode 100644
index 0000000000..503b52e5ec
--- /dev/null
+++ b/dom/media/encoder/TrackMetadataBase.h
@@ -0,0 +1,76 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef TrackMetadataBase_h_
+#define TrackMetadataBase_h_
+
+#include "nsTArray.h"
+#include "nsCOMPtr.h"
+namespace mozilla {
+
+// A class represent meta data for various codec format. Only support one track
+// information.
+class TrackMetadataBase {
+ public:
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(TrackMetadataBase)
+  enum MetadataKind {
+    METADATA_OPUS,  // Represent the Opus metadata
+    METADATA_VP8,
+    METADATA_VORBIS,
+    METADATA_AVC,
+    METADATA_AAC,
+    METADATA_AMR,
+    METADATA_EVRC,
+    METADATA_UNKNOWN  // Metadata Kind not set
+  };
+  // Return the specific metadata kind
+  virtual MetadataKind GetKind() const = 0;
+
+ protected:
+  // Protected destructor, to discourage deletion outside of Release():
+  virtual ~TrackMetadataBase() = default;
+};
+
+// The base class for audio metadata.
+class AudioTrackMetadata : public TrackMetadataBase {
+ public:
+  // The duration of each sample set generated by encoder. (counted by samples)
+  // If the duration is variant, this value should return 0.
+  virtual uint32_t GetAudioFrameDuration() = 0;
+
+  // The size of each sample set generated by encoder. (counted by byte)
+  // If the size is variant, this value should return 0.
+  virtual uint32_t GetAudioFrameSize() = 0;
+
+  // AudioSampleRate is the number of audio sample per second.
+  virtual uint32_t GetAudioSampleRate() = 0;
+
+  virtual uint32_t GetAudioChannels() = 0;
+};
+
+// The base class for video metadata.
+class VideoTrackMetadata : public TrackMetadataBase {
+ public:
+  // VideoHeight and VideoWidth are the frame size of the elementary stream.
+  virtual uint32_t GetVideoHeight() = 0;
+  virtual uint32_t GetVideoWidth() = 0;
+
+  // VideoDisplayHeight and VideoDisplayWidth are the display frame size.
+  virtual uint32_t GetVideoDisplayHeight() = 0;
+  virtual uint32_t GetVideoDisplayWidth() = 0;
+
+  // VideoClockRate is the number of samples per second in video frame's
+  // timestamp.
+  // For example, if VideoClockRate is 90k Hz and VideoFrameRate is
+  // 30 fps, each frame's sample duration will be 3000 Hz.
+  virtual uint32_t GetVideoClockRate() = 0;
+
+  // VideoFrameRate is numner of frames per second.
+  virtual uint32_t GetVideoFrameRate() = 0;
+};
+
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/encoder/VP8TrackEncoder.cpp b/dom/media/encoder/VP8TrackEncoder.cpp
new file mode 100644
index 0000000000..0c7f3de1f4
--- /dev/null
+++ b/dom/media/encoder/VP8TrackEncoder.cpp
@@ -0,0 +1,721 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "VP8TrackEncoder.h"
+
+#include <vpx/vp8cx.h>
+#include <vpx/vpx_encoder.h>
+
+#include "DriftCompensation.h"
+#include "ImageToI420.h"
+#include "mozilla/gfx/2D.h"
+#include "prsystem.h"
+#include "VideoSegment.h"
+#include "VideoUtils.h"
+#include "WebMWriter.h"
+#include "mozilla/media/MediaUtils.h"
+#include "mozilla/dom/ImageUtils.h"
+#include "mozilla/dom/ImageBitmapBinding.h"
+#include "mozilla/ProfilerLabels.h"
+
+namespace mozilla {
+
+LazyLogModule gVP8TrackEncoderLog("VP8TrackEncoder");
+#define VP8LOG(level, msg, ...) \
+  MOZ_LOG(gVP8TrackEncoderLog, level, (msg, ##__VA_ARGS__))
+
+constexpr int DEFAULT_BITRATE_BPS = 2500000;
+constexpr int DEFAULT_KEYFRAME_INTERVAL_MS = 10000;
+constexpr int DYNAMIC_MAXKFDIST_CHECK_INTERVAL = 5;
+constexpr float DYNAMIC_MAXKFDIST_DIFFACTOR = 0.4;
+constexpr float DYNAMIC_MAXKFDIST_KFINTERVAL_FACTOR = 0.75;
+constexpr int I420_STRIDE_ALIGN = 16;
+
+using namespace mozilla::gfx;
+using namespace mozilla::layers;
+using namespace mozilla::media;
+using namespace mozilla::dom;
+
+namespace {
+
+template <int N>
+static int Aligned(int aValue) {
+  if (aValue < N) {
+    return N;
+  }
+
+  // The `- 1` avoids overreaching when `aValue % N == 0`.
+  return (((aValue - 1) / N) + 1) * N;
+}
+
+template <int Alignment>
+size_t I420Size(int aWidth, int aHeight) {
+  int yStride = Aligned<Alignment>(aWidth);
+  int yHeight = aHeight;
+  size_t yPlaneSize = yStride * yHeight;
+
+  int uvStride = Aligned<Alignment>((aWidth + 1) / 2);
+  int uvHeight = (aHeight + 1) / 2;
+  size_t uvPlaneSize = uvStride * uvHeight;
+
+  return yPlaneSize + uvPlaneSize * 2;
+}
+
+nsresult CreateEncoderConfig(int32_t aWidth, int32_t aHeight,
+                             uint32_t aVideoBitrate, TrackRate aTrackRate,
+                             int32_t aMaxKeyFrameDistance,
+                             vpx_codec_enc_cfg_t* config) {
+  // Encoder configuration structure.
+  memset(config, 0, sizeof(vpx_codec_enc_cfg_t));
+  if (vpx_codec_enc_config_default(vpx_codec_vp8_cx(), config, 0)) {
+    VP8LOG(LogLevel::Error, "Failed to get default configuration");
+    return NS_ERROR_FAILURE;
+  }
+
+  config->g_w = aWidth;
+  config->g_h = aHeight;
+  // TODO: Maybe we should have various aFrameRate bitrate pair for each
+  // devices? or for different platform
+
+  // rc_target_bitrate needs kbit/s
+  config->rc_target_bitrate = std::max(
+      1U, (aVideoBitrate != 0 ? aVideoBitrate : DEFAULT_BITRATE_BPS) / 1000);
+
+  // Setting the time base of the codec
+  config->g_timebase.num = 1;
+  config->g_timebase.den = aTrackRate;
+
+  // No error resilience as this is not intended for UDP transports
+  config->g_error_resilient = 0;
+
+  // Allow some frame lagging for large timeslices (when low latency is not
+  // needed)
+  /*std::min(10U, mKeyFrameInterval / 200)*/
+  config->g_lag_in_frames = 0;
+
+  int32_t number_of_cores = PR_GetNumberOfProcessors();
+  if (aWidth * aHeight > 1920 * 1080 && number_of_cores >= 8) {
+    config->g_threads = 4;  // 4 threads for > 1080p.
+  } else if (aWidth * aHeight > 1280 * 960 && number_of_cores >= 6) {
+    config->g_threads = 3;  // 3 threads for 1080p.
+  } else if (aWidth * aHeight > 640 * 480 && number_of_cores >= 3) {
+    config->g_threads = 2;  // 2 threads for qHD/HD.
+  } else {
+    config->g_threads = 1;  // 1 thread for VGA or less
+  }
+
+  // rate control settings
+
+  // No frame dropping
+  config->rc_dropframe_thresh = 0;
+  // Variable bitrate
+  config->rc_end_usage = VPX_VBR;
+  // Single pass encoding
+  config->g_pass = VPX_RC_ONE_PASS;
+  // ffmpeg doesn't currently support streams that use resize.
+  // Therefore, for safety, we should turn it off until it does.
+  config->rc_resize_allowed = 0;
+  // Allows 100% under target bitrate to compensate for prior overshoot
+  config->rc_undershoot_pct = 100;
+  // Allows 15% over target bitrate to compensate for prior undershoot
+  config->rc_overshoot_pct = 15;
+  // Tells the decoding application to buffer 500ms before beginning playback
+  config->rc_buf_initial_sz = 500;
+  // The decoding application will try to keep 600ms of buffer during playback
+  config->rc_buf_optimal_sz = 600;
+  // The decoding application may buffer 1000ms worth of encoded data
+  config->rc_buf_sz = 1000;
+
+  // We set key frame interval to automatic and try to set kf_max_dist so that
+  // the encoder chooses to put keyframes slightly more often than
+  // mKeyFrameInterval (which will encode with VPX_EFLAG_FORCE_KF when reached).
+  config->kf_mode = VPX_KF_AUTO;
+  config->kf_max_dist = aMaxKeyFrameDistance;
+
+  return NS_OK;
+}
+}  // namespace
+
+VP8TrackEncoder::VP8TrackEncoder(RefPtr<DriftCompensator> aDriftCompensator,
+                                 TrackRate aTrackRate,
+                                 MediaQueue<EncodedFrame>& aEncodedDataQueue,
+                                 FrameDroppingMode aFrameDroppingMode,
+                                 Maybe<float> aKeyFrameIntervalFactor)
+    : VideoTrackEncoder(std::move(aDriftCompensator), aTrackRate,
+                        aEncodedDataQueue, aFrameDroppingMode),
+      mKeyFrameInterval(
+          TimeDuration::FromMilliseconds(DEFAULT_KEYFRAME_INTERVAL_MS)),
+      mKeyFrameIntervalFactor(aKeyFrameIntervalFactor.valueOr(
+          DYNAMIC_MAXKFDIST_KFINTERVAL_FACTOR)) {
+  MOZ_COUNT_CTOR(VP8TrackEncoder);
+  CalculateMaxKeyFrameDistance().apply(
+      [&](auto aKfd) { SetMaxKeyFrameDistance(aKfd); });
+}
+
+VP8TrackEncoder::~VP8TrackEncoder() {
+  Destroy();
+  MOZ_COUNT_DTOR(VP8TrackEncoder);
+}
+
+void VP8TrackEncoder::Destroy() {
+  if (mInitialized) {
+    vpx_codec_destroy(&mVPXContext);
+  }
+
+  mInitialized = false;
+}
+
+Maybe<int32_t> VP8TrackEncoder::CalculateMaxKeyFrameDistance(
+    Maybe<float> aEstimatedFrameRate /* = Nothing() */) const {
+  if (!aEstimatedFrameRate && mMeanFrameDuration.empty()) {
+    // Not enough data to make a new calculation.
+    return Nothing();
+  }
+
+  // Calculate an estimation of our current framerate
+  const float estimatedFrameRate = aEstimatedFrameRate.valueOrFrom(
+      [&] { return 1.0f / mMeanFrameDuration.mean().ToSeconds(); });
+  // Set a kf_max_dist that should avoid triggering the VPX_EFLAG_FORCE_KF flag
+  return Some(std::max(
+      1, static_cast<int32_t>(estimatedFrameRate * mKeyFrameIntervalFactor *
+                              mKeyFrameInterval.ToSeconds())));
+}
+
+void VP8TrackEncoder::SetMaxKeyFrameDistance(int32_t aMaxKeyFrameDistance) {
+  if (mInitialized) {
+    VP8LOG(
+        LogLevel::Debug,
+        "%p SetMaxKeyFrameDistance() set kf_max_dist to %d based on estimated "
+        "framerate %.2ffps keyframe-factor %.2f and keyframe-interval %.2fs",
+        this, aMaxKeyFrameDistance, 1 / mMeanFrameDuration.mean().ToSeconds(),
+        mKeyFrameIntervalFactor, mKeyFrameInterval.ToSeconds());
+    DebugOnly<nsresult> rv =
+        Reconfigure(mFrameWidth, mFrameHeight, aMaxKeyFrameDistance);
+    MOZ_ASSERT(
+        NS_SUCCEEDED(rv),
+        "Reconfig for new key frame distance with proven size should succeed");
+  } else {
+    VP8LOG(LogLevel::Debug, "%p SetMaxKeyFrameDistance() distance=%d", this,
+           aMaxKeyFrameDistance);
+    mMaxKeyFrameDistance = Some(aMaxKeyFrameDistance);
+  }
+}
+
+nsresult VP8TrackEncoder::Init(int32_t aWidth, int32_t aHeight,
+                               int32_t aDisplayWidth, int32_t aDisplayHeight,
+                               float aEstimatedFrameRate) {
+  if (aDisplayWidth < 1 || aDisplayHeight < 1) {
+    return NS_ERROR_FAILURE;
+  }
+
+  if (aEstimatedFrameRate <= 0) {
+    return NS_ERROR_FAILURE;
+  }
+
+  int32_t maxKeyFrameDistance =
+      *CalculateMaxKeyFrameDistance(Some(aEstimatedFrameRate));
+
+  nsresult rv = InitInternal(aWidth, aHeight, maxKeyFrameDistance);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  MOZ_ASSERT(!mI420Frame);
+  MOZ_ASSERT(mI420FrameSize == 0);
+  const size_t neededSize = I420Size<I420_STRIDE_ALIGN>(aWidth, aHeight);
+  mI420Frame.reset(new (fallible) uint8_t[neededSize]);
+  mI420FrameSize = mI420Frame ? neededSize : 0;
+  if (!mI420Frame) {
+    VP8LOG(LogLevel::Warning, "Allocating I420 frame of size %zu failed",
+           neededSize);
+    return NS_ERROR_FAILURE;
+  }
+  vpx_img_wrap(&mVPXImageWrapper, VPX_IMG_FMT_I420, aWidth, aHeight,
+               I420_STRIDE_ALIGN, mI420Frame.get());
+
+  if (!mMetadata) {
+    mMetadata = MakeAndAddRef<VP8Metadata>();
+    mMetadata->mWidth = aWidth;
+    mMetadata->mHeight = aHeight;
+    mMetadata->mDisplayWidth = aDisplayWidth;
+    mMetadata->mDisplayHeight = aDisplayHeight;
+
+    VP8LOG(LogLevel::Info,
+           "%p Init() created metadata. width=%d, height=%d, displayWidth=%d, "
+           "displayHeight=%d, framerate=%.2f",
+           this, mMetadata->mWidth, mMetadata->mHeight,
+           mMetadata->mDisplayWidth, mMetadata->mDisplayHeight,
+           aEstimatedFrameRate);
+
+    SetInitialized();
+  }
+
+  return NS_OK;
+}
+
+nsresult VP8TrackEncoder::InitInternal(int32_t aWidth, int32_t aHeight,
+                                       int32_t aMaxKeyFrameDistance) {
+  if (aWidth < 1 || aHeight < 1) {
+    return NS_ERROR_FAILURE;
+  }
+
+  if (mInitialized) {
+    MOZ_ASSERT(false);
+    return NS_ERROR_FAILURE;
+  }
+
+  VP8LOG(LogLevel::Debug,
+         "%p InitInternal(). width=%d, height=%d, kf_max_dist=%d", this, aWidth,
+         aHeight, aMaxKeyFrameDistance);
+
+  // Encoder configuration structure.
+  vpx_codec_enc_cfg_t config;
+  nsresult rv = CreateEncoderConfig(aWidth, aHeight, mVideoBitrate, mTrackRate,
+                                    aMaxKeyFrameDistance, &config);
+  NS_ENSURE_SUCCESS(rv, NS_ERROR_FAILURE);
+
+  vpx_codec_flags_t flags = 0;
+  flags |= VPX_CODEC_USE_OUTPUT_PARTITION;
+  if (vpx_codec_enc_init(&mVPXContext, vpx_codec_vp8_cx(), &config, flags)) {
+    return NS_ERROR_FAILURE;
+  }
+
+  vpx_codec_control(&mVPXContext, VP8E_SET_STATIC_THRESHOLD, 1);
+  vpx_codec_control(&mVPXContext, VP8E_SET_CPUUSED, 15);
+  vpx_codec_control(&mVPXContext, VP8E_SET_TOKEN_PARTITIONS,
+                    VP8_TWO_TOKENPARTITION);
+
+  mFrameWidth = aWidth;
+  mFrameHeight = aHeight;
+  mMaxKeyFrameDistance = Some(aMaxKeyFrameDistance);
+
+  return NS_OK;
+}
+
+nsresult VP8TrackEncoder::Reconfigure(int32_t aWidth, int32_t aHeight,
+                                      int32_t aMaxKeyFrameDistance) {
+  if (aWidth <= 0 || aHeight <= 0) {
+    MOZ_ASSERT(false);
+    return NS_ERROR_FAILURE;
+  }
+
+  if (!mInitialized) {
+    MOZ_ASSERT(false);
+    return NS_ERROR_FAILURE;
+  }
+
+  bool needsReInit = aMaxKeyFrameDistance != *mMaxKeyFrameDistance;
+
+  if (aWidth != mFrameWidth || aHeight != mFrameHeight) {
+    VP8LOG(LogLevel::Info, "Dynamic resolution change (%dx%d -> %dx%d).",
+           mFrameWidth, mFrameHeight, aWidth, aHeight);
+    const size_t neededSize = I420Size<I420_STRIDE_ALIGN>(aWidth, aHeight);
+    if (neededSize > mI420FrameSize) {
+      needsReInit = true;
+      mI420Frame.reset(new (fallible) uint8_t[neededSize]);
+      mI420FrameSize = mI420Frame ? neededSize : 0;
+    }
+    if (!mI420Frame) {
+      VP8LOG(LogLevel::Warning, "Allocating I420 frame of size %zu failed",
+             neededSize);
+      return NS_ERROR_FAILURE;
+    }
+    vpx_img_wrap(&mVPXImageWrapper, VPX_IMG_FMT_I420, aWidth, aHeight,
+                 I420_STRIDE_ALIGN, mI420Frame.get());
+  }
+
+  if (needsReInit) {
+    Destroy();
+    mMaxKeyFrameDistance = Some(aMaxKeyFrameDistance);
+    nsresult rv = InitInternal(aWidth, aHeight, aMaxKeyFrameDistance);
+    NS_ENSURE_SUCCESS(rv, NS_ERROR_FAILURE);
+    mInitialized = true;
+    return NS_OK;
+  }
+
+  // Encoder configuration structure.
+  vpx_codec_enc_cfg_t config;
+  nsresult rv = CreateEncoderConfig(aWidth, aHeight, mVideoBitrate, mTrackRate,
+                                    aMaxKeyFrameDistance, &config);
+  NS_ENSURE_SUCCESS(rv, NS_ERROR_FAILURE);
+  // Set new configuration
+  if (vpx_codec_enc_config_set(&mVPXContext, &config) != VPX_CODEC_OK) {
+    VP8LOG(LogLevel::Error, "Failed to set new configuration");
+    return NS_ERROR_FAILURE;
+  }
+
+  mFrameWidth = aWidth;
+  mFrameHeight = aHeight;
+
+  return NS_OK;
+}
+
+already_AddRefed<TrackMetadataBase> VP8TrackEncoder::GetMetadata() {
+  AUTO_PROFILER_LABEL("VP8TrackEncoder::GetMetadata", OTHER);
+
+  MOZ_ASSERT(mInitialized);
+
+  if (!mInitialized) {
+    return nullptr;
+  }
+
+  MOZ_ASSERT(mMetadata);
+  return do_AddRef(mMetadata);
+}
+
+Result<RefPtr<EncodedFrame>, nsresult> VP8TrackEncoder::ExtractEncodedData() {
+  vpx_codec_iter_t iter = nullptr;
+  EncodedFrame::FrameType frameType = EncodedFrame::VP8_P_FRAME;
+  auto frameData = MakeRefPtr<EncodedFrame::FrameData>();
+  const vpx_codec_cx_pkt_t* pkt = nullptr;
+  while ((pkt = vpx_codec_get_cx_data(&mVPXContext, &iter)) != nullptr) {
+    switch (pkt->kind) {
+      case VPX_CODEC_CX_FRAME_PKT: {
+        // Copy the encoded data from libvpx to frameData
+        frameData->AppendElements((uint8_t*)pkt->data.frame.buf,
+                                  pkt->data.frame.sz);
+        break;
+      }
+      default: {
+        break;
+      }
+    }
+    // End of frame
+    if ((pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT) == 0) {
+      if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
+        frameType = EncodedFrame::VP8_I_FRAME;
+      }
+      break;
+    }
+  }
+
+  if (frameData->IsEmpty()) {
+    return RefPtr<EncodedFrame>(nullptr);
+  }
+
+  if (!pkt) {
+    // This check silences a coverity warning about accessing a null pkt below.
+    return RefPtr<EncodedFrame>(nullptr);
+  }
+
+  if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
+    // Update the since-last-keyframe counter, and account for this frame's
+    // time.
+    TrackTime frameTime = pkt->data.frame.pts;
+    DebugOnly<TrackTime> frameDuration = pkt->data.frame.duration;
+    MOZ_ASSERT(frameTime + frameDuration <= mEncodedTimestamp);
+    mDurationSinceLastKeyframe =
+        std::min(mDurationSinceLastKeyframe, mEncodedTimestamp - frameTime);
+  }
+
+  // Convert the timestamp and duration to Usecs.
+  media::TimeUnit timestamp = media::TimeUnit(pkt->data.frame.pts, mTrackRate);
+  if (!timestamp.IsValid()) {
+    NS_ERROR("Microsecond timestamp overflow");
+    return Err(NS_ERROR_DOM_MEDIA_OVERFLOW_ERR);
+  }
+
+  mExtractedDuration += pkt->data.frame.duration;
+  if (!mExtractedDuration.isValid()) {
+    NS_ERROR("Duration overflow");
+    return Err(NS_ERROR_DOM_MEDIA_OVERFLOW_ERR);
+  }
+
+  media::TimeUnit totalDuration =
+      media::TimeUnit(mExtractedDuration.value(), mTrackRate);
+  if (!totalDuration.IsValid()) {
+    NS_ERROR("Duration overflow");
+    return Err(NS_ERROR_DOM_MEDIA_OVERFLOW_ERR);
+  }
+
+  media::TimeUnit duration = totalDuration - mExtractedDurationUs;
+  if (!duration.IsValid()) {
+    NS_ERROR("Duration overflow");
+    return Err(NS_ERROR_DOM_MEDIA_OVERFLOW_ERR);
+  }
+
+  mExtractedDurationUs = totalDuration;
+
+  VP8LOG(LogLevel::Verbose,
+         "ExtractEncodedData TimeStamp %.2f, Duration %.2f, FrameType %d",
+         timestamp.ToSeconds(), duration.ToSeconds(), frameType);
+
+  if (static_cast<int>(totalDuration.ToSeconds()) /
+          DYNAMIC_MAXKFDIST_CHECK_INTERVAL >
+      static_cast<int>(mLastKeyFrameDistanceUpdate.ToSeconds()) /
+          DYNAMIC_MAXKFDIST_CHECK_INTERVAL) {
+    // The interval has passed since the last keyframe update. Update again.
+    mLastKeyFrameDistanceUpdate = totalDuration;
+    const int32_t maxKfDistance =
+        CalculateMaxKeyFrameDistance().valueOr(*mMaxKeyFrameDistance);
+    const float diffFactor =
+        static_cast<float>(maxKfDistance) / *mMaxKeyFrameDistance;
+    VP8LOG(LogLevel::Debug, "maxKfDistance: %d, factor: %.2f", maxKfDistance,
+           diffFactor);
+    if (std::abs(1.0 - diffFactor) > DYNAMIC_MAXKFDIST_DIFFACTOR) {
+      SetMaxKeyFrameDistance(maxKfDistance);
+    }
+  }
+
+  return MakeRefPtr<EncodedFrame>(timestamp, duration.ToMicroseconds(),
+                                  PR_USEC_PER_SEC, frameType,
+                                  std::move(frameData));
+}
+
+/**
+ * Encoding flow in Encode():
+ * 1: Assert valid state.
+ * 2: Encode the video chunks in mSourceSegment in a for-loop.
+ * 2.1: The duration is taken straight from the video chunk's duration.
+ * 2.2: Setup the video chunk with mVPXImageWrapper by PrepareRawFrame().
+ * 2.3: Pass frame to vp8 encoder by vpx_codec_encode().
+ * 2.4: Extract the encoded frame from encoder by ExtractEncodedData().
+ * 2.5: Set the nextEncodeOperation for the next frame.
+ * 2.6: If we are not skipping the next frame, add the encoded frame to
+ *      mEncodedDataQueue. If we are skipping the next frame, extend the encoded
+ *      frame's duration in the next run of the loop.
+ * 3. Clear aSegment.
+ */
+nsresult VP8TrackEncoder::Encode(VideoSegment* aSegment) {
+  MOZ_ASSERT(mInitialized);
+  MOZ_ASSERT(!IsEncodingComplete());
+
+  AUTO_PROFILER_LABEL("VP8TrackEncoder::Encode", OTHER);
+
+  EncodeOperation nextEncodeOperation = ENCODE_NORMAL_FRAME;
+
+  RefPtr<EncodedFrame> encodedFrame;
+  for (VideoSegment::ChunkIterator iter(*aSegment); !iter.IsEnded();
+       iter.Next()) {
+    VideoChunk& chunk = *iter;
+
+    VP8LOG(LogLevel::Verbose,
+           "nextEncodeOperation is %d for frame of duration %" PRId64,
+           nextEncodeOperation, chunk.GetDuration());
+
+    TimeStamp timebase = TimeStamp::Now();
+
+    // Encode frame.
+    if (nextEncodeOperation != SKIP_FRAME) {
+      MOZ_ASSERT(!encodedFrame);
+      nsresult rv = PrepareRawFrame(chunk);
+      NS_ENSURE_SUCCESS(rv, NS_ERROR_FAILURE);
+
+      // Encode the data with VP8 encoder
+      int flags = 0;
+      if (nextEncodeOperation == ENCODE_I_FRAME) {
+        VP8LOG(LogLevel::Warning,
+               "MediaRecorder lagging behind. Encoding keyframe.");
+        flags |= VPX_EFLAG_FORCE_KF;
+      }
+
+      // Sum duration of non-key frames and force keyframe if exceeded the
+      // given keyframe interval
+      if (mKeyFrameInterval > TimeDuration::FromSeconds(0)) {
+        if (media::TimeUnit(mDurationSinceLastKeyframe, mTrackRate)
+                .ToTimeDuration() >= mKeyFrameInterval) {
+          VP8LOG(LogLevel::Warning,
+                 "Reached mKeyFrameInterval without seeing a keyframe. Forcing "
+                 "one. time: %.2f, interval: %.2f",
+                 media::TimeUnit(mDurationSinceLastKeyframe, mTrackRate)
+                     .ToSeconds(),
+                 mKeyFrameInterval.ToSeconds());
+          mDurationSinceLastKeyframe = 0;
+          flags |= VPX_EFLAG_FORCE_KF;
+        }
+        mDurationSinceLastKeyframe += chunk.GetDuration();
+      }
+
+      if (vpx_codec_encode(&mVPXContext, &mVPXImageWrapper, mEncodedTimestamp,
+                           (unsigned long)chunk.GetDuration(), flags,
+                           VPX_DL_REALTIME)) {
+        VP8LOG(LogLevel::Error, "vpx_codec_encode failed to encode the frame.");
+        return NS_ERROR_FAILURE;
+      }
+
+      // Move forward the mEncodedTimestamp.
+      mEncodedTimestamp += chunk.GetDuration();
+
+      // Extract the encoded data from the underlying encoder and push it to
+      // mEncodedDataQueue.
+      auto result = ExtractEncodedData();
+      if (result.isErr()) {
+        VP8LOG(LogLevel::Error, "ExtractEncodedData failed.");
+        return NS_ERROR_FAILURE;
+      }
+
+      MOZ_ASSERT(result.inspect(),
+                 "We expected a frame here. EOS is handled explicitly later");
+      encodedFrame = result.unwrap();
+    } else {
+      // SKIP_FRAME
+
+      MOZ_DIAGNOSTIC_ASSERT(encodedFrame);
+
+      if (mKeyFrameInterval > TimeDuration::FromSeconds(0)) {
+        mDurationSinceLastKeyframe += chunk.GetDuration();
+      }
+
+      // Move forward the mEncodedTimestamp.
+      mEncodedTimestamp += chunk.GetDuration();
+
+      // Extend the duration of the last encoded frame in mEncodedDataQueue
+      // because this frame will be skipped.
+      VP8LOG(LogLevel::Warning,
+             "MediaRecorder lagging behind. Skipping a frame.");
+
+      mExtractedDuration += chunk.mDuration;
+      if (!mExtractedDuration.isValid()) {
+        NS_ERROR("skipped duration overflow");
+        return NS_ERROR_DOM_MEDIA_OVERFLOW_ERR;
+      }
+
+      media::TimeUnit totalDuration =
+          media::TimeUnit(mExtractedDuration.value(), mTrackRate);
+      media::TimeUnit skippedDuration = totalDuration - mExtractedDurationUs;
+      mExtractedDurationUs = totalDuration;
+      if (!skippedDuration.IsValid()) {
+        NS_ERROR("skipped duration overflow");
+        return NS_ERROR_DOM_MEDIA_OVERFLOW_ERR;
+      }
+
+      encodedFrame = MakeRefPtr<EncodedFrame>(
+          encodedFrame->mTime,
+          encodedFrame->mDuration + skippedDuration.ToMicroseconds(),
+          encodedFrame->mDurationBase, encodedFrame->mFrameType,
+          encodedFrame->mFrameData);
+    }
+
+    mMeanFrameEncodeDuration.insert(TimeStamp::Now() - timebase);
+    mMeanFrameDuration.insert(
+        media::TimeUnit(chunk.GetDuration(), mTrackRate).ToTimeDuration());
+    nextEncodeOperation = GetNextEncodeOperation(
+        mMeanFrameEncodeDuration.mean(), mMeanFrameDuration.mean());
+
+    if (nextEncodeOperation != SKIP_FRAME) {
+      // Note that the next operation might be SKIP_FRAME even if there is no
+      // next frame.
+      mEncodedDataQueue.Push(encodedFrame.forget());
+    }
+  }
+
+  if (encodedFrame) {
+    // Push now if we ended on a SKIP_FRAME before.
+    mEncodedDataQueue.Push(encodedFrame.forget());
+  }
+
+  // Remove the chunks we have processed.
+  aSegment->Clear();
+
+  if (mEndOfStream) {
+    // EOS: Extract the remaining frames from the underlying encoder.
+    VP8LOG(LogLevel::Debug, "mEndOfStream is true");
+    // No more frames will be encoded. Clearing temporary frames saves some
+    // memory.
+    if (mI420Frame) {
+      mI420Frame = nullptr;
+      mI420FrameSize = 0;
+    }
+    // mMuteFrame must be released before gfx shutdown. We do it now since it
+    // may be too late when this VP8TrackEncoder gets destroyed.
+    mMuteFrame = nullptr;
+    // Bug 1243611, keep calling vpx_codec_encode and vpx_codec_get_cx_data
+    // until vpx_codec_get_cx_data return null.
+    while (true) {
+      if (vpx_codec_encode(&mVPXContext, nullptr, mEncodedTimestamp, 0, 0,
+                           VPX_DL_REALTIME)) {
+        return NS_ERROR_FAILURE;
+      }
+      auto result = ExtractEncodedData();
+      if (result.isErr()) {
+        return NS_ERROR_FAILURE;
+      }
+      if (!result.inspect()) {
+        // Null means end-of-stream.
+        break;
+      }
+      mEncodedDataQueue.Push(result.unwrap().forget());
+    }
+    mEncodedDataQueue.Finish();
+  }
+
+  return NS_OK;
+}
+
+nsresult VP8TrackEncoder::PrepareRawFrame(VideoChunk& aChunk) {
+  gfx::IntSize intrinsicSize = aChunk.mFrame.GetIntrinsicSize();
+  RefPtr<Image> img;
+  if (aChunk.mFrame.GetForceBlack() || aChunk.IsNull()) {
+    if (!mMuteFrame || mMuteFrame->GetSize() != intrinsicSize) {
+      mMuteFrame = mozilla::VideoFrame::CreateBlackImage(intrinsicSize);
+    }
+    if (!mMuteFrame) {
+      VP8LOG(LogLevel::Warning, "Failed to allocate black image of size %dx%d",
+             intrinsicSize.width, intrinsicSize.height);
+      return NS_OK;
+    }
+    img = mMuteFrame;
+  } else {
+    img = aChunk.mFrame.GetImage();
+  }
+
+  gfx::IntSize imgSize = img->GetSize();
+  if (imgSize != IntSize(mFrameWidth, mFrameHeight)) {
+    nsresult rv =
+        Reconfigure(imgSize.width, imgSize.height, *mMaxKeyFrameDistance);
+    NS_ENSURE_SUCCESS(rv, rv);
+  }
+
+  MOZ_ASSERT(mFrameWidth == imgSize.width);
+  MOZ_ASSERT(mFrameHeight == imgSize.height);
+
+  nsresult rv = ConvertToI420(img, mVPXImageWrapper.planes[VPX_PLANE_Y],
+                              mVPXImageWrapper.stride[VPX_PLANE_Y],
+                              mVPXImageWrapper.planes[VPX_PLANE_U],
+                              mVPXImageWrapper.stride[VPX_PLANE_U],
+                              mVPXImageWrapper.planes[VPX_PLANE_V],
+                              mVPXImageWrapper.stride[VPX_PLANE_V]);
+  if (NS_FAILED(rv)) {
+    VP8LOG(LogLevel::Error, "Converting to I420 failed");
+    return rv;
+  }
+
+  return NS_OK;
+}
+
+// These two define value used in GetNextEncodeOperation to determine the
+// EncodeOperation for next target frame.
+#define I_FRAME_RATIO (0.85)  // Effectively disabled, because perceived quality
+#define SKIP_FRAME_RATIO (0.85)
+
+/**
+ * Compares the elapsed time from the beginning of GetEncodedTrack and
+ * the processed frame duration in mSourceSegment
+ * in order to set the nextEncodeOperation for next target frame.
+ */
+VP8TrackEncoder::EncodeOperation VP8TrackEncoder::GetNextEncodeOperation(
+    TimeDuration aTimeElapsed, TimeDuration aProcessedDuration) {
+  if (mFrameDroppingMode == FrameDroppingMode::DISALLOW) {
+    return ENCODE_NORMAL_FRAME;
+  }
+
+  if (aTimeElapsed.ToSeconds() >
+      aProcessedDuration.ToSeconds() * SKIP_FRAME_RATIO) {
+    // The encoder is too slow.
+    // We should skip next frame to consume the mSourceSegment.
+    return SKIP_FRAME;
+  }
+
+  if (aTimeElapsed.ToSeconds() >
+      aProcessedDuration.ToSeconds() * I_FRAME_RATIO) {
+    // The encoder is a little slow.
+    // We force the encoder to encode an I-frame to accelerate.
+    return ENCODE_I_FRAME;
+  }
+
+  return ENCODE_NORMAL_FRAME;
+}
+
+}  // namespace mozilla
+
+#undef VP8LOG
diff --git a/dom/media/encoder/VP8TrackEncoder.h b/dom/media/encoder/VP8TrackEncoder.h
new file mode 100644
index 0000000000..0f8b2deb57
--- /dev/null
+++ b/dom/media/encoder/VP8TrackEncoder.h
@@ -0,0 +1,168 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef VP8TrackEncoder_h_
+#define VP8TrackEncoder_h_
+
+#include "TrackEncoder.h"
+
+#include <vpx/vpx_codec.h>
+
+#include "mozilla/RollingMean.h"
+#include "TimeUnits.h"
+
+namespace mozilla {
+
+typedef struct vpx_codec_ctx vpx_codec_ctx_t;
+typedef struct vpx_codec_enc_cfg vpx_codec_enc_cfg_t;
+typedef struct vpx_image vpx_image_t;
+
+class VP8Metadata;
+
+/**
+ * VP8TrackEncoder implements VideoTrackEncoder by using the libvpx library.
+ * We implement a realtime and variable frame rate encoder. In order to achieve
+ * that, there is a frame-drop encoding policy implemented in Encode().
+ */
+class VP8TrackEncoder : public VideoTrackEncoder {
+  enum EncodeOperation {
+    ENCODE_NORMAL_FRAME,  // VP8 track encoder works normally.
+    ENCODE_I_FRAME,       // The next frame will be encoded as I-Frame.
+    SKIP_FRAME,           // Skip the next frame.
+  };
+
+ public:
+  VP8TrackEncoder(RefPtr<DriftCompensator> aDriftCompensator,
+                  TrackRate aTrackRate,
+                  MediaQueue<EncodedFrame>& aEncodedDataQueue,
+                  FrameDroppingMode aFrameDroppingMode,
+                  Maybe<float> aKeyFrameIntervalFactor = Nothing());
+  virtual ~VP8TrackEncoder();
+
+  already_AddRefed<TrackMetadataBase> GetMetadata() final;
+
+ protected:
+  nsresult Init(int32_t aWidth, int32_t aHeight, int32_t aDisplayWidth,
+                int32_t aDisplayHeight, float aEstimatedFrameRate) final;
+
+ private:
+  // Initiates the underlying vpx encoder.
+  nsresult InitInternal(int32_t aWidth, int32_t aHeight,
+                        int32_t aMaxKeyFrameDistance);
+
+  // Get the EncodeOperation for next target frame.
+  EncodeOperation GetNextEncodeOperation(TimeDuration aTimeElapsed,
+                                         TimeDuration aProcessedDuration);
+
+  // Extracts the encoded data from the underlying encoder and returns it.
+  // Return value: An EncodedFrame if a frame was extracted.
+  //               nullptr if we reached end-of-stream or nothing was available
+  //                       from the underlying encoder.
+  //               An error nsresult otherwise.
+  Result<RefPtr<EncodedFrame>, nsresult> ExtractEncodedData();
+
+  // Takes the data in aSegment, encodes it, extracts it, and pushes it to
+  // mEncodedDataQueue.
+  nsresult Encode(VideoSegment* aSegment) final;
+
+  // Prepare the input data to the mVPXImageWrapper for encoding.
+  nsresult PrepareRawFrame(VideoChunk& aChunk);
+
+  // Re-configures an existing encoder with a new frame size.
+  nsresult Reconfigure(int32_t aWidth, int32_t aHeight,
+                       int32_t aMaxKeyFrameDistance);
+
+  // Destroys the context and image wrapper. Does not de-allocate the structs.
+  void Destroy();
+
+  // Helper that calculates the desired max keyframe distance (vp8 config's
+  // max_kf_dist) based on configured key frame interval and recent framerate.
+  // Returns Nothing if not enough input data is available.
+  Maybe<int32_t> CalculateMaxKeyFrameDistance(
+      Maybe<float> aEstimatedFrameRate = Nothing()) const;
+
+  void SetMaxKeyFrameDistance(int32_t aMaxKeyFrameDistance);
+
+  // VP8 Metadata, set on successfuly Init and never modified again.
+  RefPtr<VP8Metadata> mMetadata;
+
+  // The width the encoder is currently configured with. The input frames to the
+  // underlying encoder must match this width, i.e., the underlying encoder will
+  // not do any resampling.
+  int mFrameWidth = 0;
+
+  // The height the encoder is currently configured with. The input frames to
+  // the underlying encoder must match this height, i.e., the underlying encoder
+  // will not do any resampling.
+  int mFrameHeight = 0;
+
+  // Encoded timestamp.
+  TrackTime mEncodedTimestamp = 0;
+
+  // Total duration in mTrackRate extracted from the underlying encoder.
+  CheckedInt64 mExtractedDuration;
+
+  // Total duration extracted from the underlying encoder.
+  media::TimeUnit mExtractedDurationUs;
+
+  // Muted frame, we only create it once.
+  RefPtr<layers::Image> mMuteFrame;
+
+  // I420 frame, for converting to I420.
+  UniquePtr<uint8_t[]> mI420Frame;
+  size_t mI420FrameSize = 0;
+
+  /**
+   * A duration of non-key frames in mTrackRate.
+   */
+  TrackTime mDurationSinceLastKeyframe = 0;
+
+  /**
+   * The max interval at which a keyframe gets forced (causing video quality
+   * degradation). The encoder is configured to encode keyframes more often than
+   * this, though it can vary based on frame rate.
+   */
+  const TimeDuration mKeyFrameInterval;
+
+  /**
+   * A factor used to multiply the estimated key-frame-interval based on
+   * mKeyFrameInterval (ms) with when configuring kf_max_dist in the encoder.
+   * The goal is to set it a bit below 1.0 to avoid falling back to forcing
+   * keyframes.
+   * NB that for purposes of testing the mKeyFrameInterval fallback this may be
+   *    set to values higher than 1.0.
+   */
+  float mKeyFrameIntervalFactor;
+
+  /**
+   * Time when we last updated the key-frame-distance.
+   */
+  media::TimeUnit mLastKeyFrameDistanceUpdate;
+
+  /**
+   * The frame duration value last used to configure kf_max_dist.
+   */
+  Maybe<int32_t> mMaxKeyFrameDistance;
+
+  /**
+   * The mean duration of recent frames.
+   */
+  RollingMean<TimeDuration, TimeDuration> mMeanFrameDuration{30};
+
+  /**
+   * The mean wall-clock time it took to encode recent frames.
+   */
+  RollingMean<TimeDuration, TimeDuration> mMeanFrameEncodeDuration{30};
+
+  // VP8 relative members.
+  // Codec context structure.
+  vpx_codec_ctx_t mVPXContext;
+  // Image Descriptor.
+  vpx_image_t mVPXImageWrapper;
+};
+
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/encoder/moz.build b/dom/media/encoder/moz.build
new file mode 100644
index 0000000000..f995ecdc1c
--- /dev/null
+++ b/dom/media/encoder/moz.build
@@ -0,0 +1,42 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+with Files("*"):
+    BUG_COMPONENT = ("Core", "Audio/Video: Recording")
+
+EXPORTS += [
+    "ContainerWriter.h",
+    "EncodedFrame.h",
+    "MediaEncoder.h",
+    "OpusTrackEncoder.h",
+    "TrackEncoder.h",
+    "TrackMetadataBase.h",
+    "VP8TrackEncoder.h",
+]
+
+UNIFIED_SOURCES += [
+    "MediaEncoder.cpp",
+    "Muxer.cpp",
+    "OpusTrackEncoder.cpp",
+    "TrackEncoder.cpp",
+    "VP8TrackEncoder.cpp",
+]
+
+FINAL_LIBRARY = "xul"
+
+LOCAL_INCLUDES += [
+    "/dom/media",
+    "/ipc/chromium/src",
+    "/media/libyuv/libyuv/include",
+]
+
+include("/ipc/chromium/chromium-config.mozbuild")
+
+# Suppress some GCC warnings being treated as errors:
+#  - about attributes on forward declarations for types that are already
+#    defined, which complains about an important MOZ_EXPORT for android::AString
+if CONFIG["CC_TYPE"] in ("clang", "gcc"):
+    CXXFLAGS += ["-Wno-error=attributes"]