From d8bbc7858622b6d9c278469aab701ca0b609cddf Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Wed, 15 May 2024 05:35:49 +0200
Subject: Merging upstream version 126.0.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 dom/media/platforms/EncoderConfig.cpp              |  27 +
 dom/media/platforms/EncoderConfig.h                | 190 +++++++
 dom/media/platforms/PlatformEncoderModule.cpp      |  33 +-
 dom/media/platforms/PlatformEncoderModule.h        | 193 +------
 .../platforms/agnostic/AgnosticDecoderModule.cpp   |  12 +-
 dom/media/platforms/agnostic/bytestreams/H264.cpp  |   3 +-
 dom/media/platforms/agnostic/bytestreams/H264.h    |  36 +-
 dom/media/platforms/apple/AppleDecoderModule.cpp   |  49 +-
 dom/media/platforms/apple/AppleDecoderModule.h     |   1 +
 dom/media/platforms/apple/AppleVTDecoder.cpp       |  32 +-
 dom/media/platforms/apple/AppleVTDecoder.h         |   2 +-
 dom/media/platforms/apple/AppleVTEncoder.cpp       |  56 +-
 dom/media/platforms/apple/AppleVTEncoder.h         |   5 +-
 dom/media/platforms/ffmpeg/FFmpegAudioEncoder.cpp  | 458 +++++++++++++++++
 dom/media/platforms/ffmpeg/FFmpegAudioEncoder.h    |  70 +++
 dom/media/platforms/ffmpeg/FFmpegDataDecoder.cpp   |  20 +-
 dom/media/platforms/ffmpeg/FFmpegDataEncoder.cpp   | 495 ++++++++++++++++++
 dom/media/platforms/ffmpeg/FFmpegDataEncoder.h     | 107 ++++
 dom/media/platforms/ffmpeg/FFmpegEncoderModule.cpp |  18 +
 dom/media/platforms/ffmpeg/FFmpegEncoderModule.h   |   4 +
 dom/media/platforms/ffmpeg/FFmpegLibWrapper.cpp    |   3 +
 dom/media/platforms/ffmpeg/FFmpegLibWrapper.h      |   5 +
 dom/media/platforms/ffmpeg/FFmpegLog.h             |  11 +-
 dom/media/platforms/ffmpeg/FFmpegUtils.cpp         |  23 +
 dom/media/platforms/ffmpeg/FFmpegUtils.h           |  56 ++
 dom/media/platforms/ffmpeg/FFmpegVideoDecoder.cpp  |   8 +-
 dom/media/platforms/ffmpeg/FFmpegVideoEncoder.cpp  | 571 ++-------------------
 dom/media/platforms/ffmpeg/FFmpegVideoEncoder.h    |  73 +--
 dom/media/platforms/ffmpeg/ffmpeg57/moz.build      |   2 +
 dom/media/platforms/ffmpeg/ffmpeg58/moz.build      |   2 +
 dom/media/platforms/ffmpeg/ffmpeg59/moz.build      |   2 +
 dom/media/platforms/ffmpeg/ffmpeg60/moz.build      |   2 +
 dom/media/platforms/ffmpeg/ffvpx/moz.build         |   3 +
 dom/media/platforms/ffmpeg/libav53/moz.build       |   2 +
 dom/media/platforms/ffmpeg/libav54/moz.build       |   2 +
 dom/media/platforms/ffmpeg/libav55/moz.build       |   2 +
 dom/media/platforms/ffmpeg/moz.build               |   4 +-
 dom/media/platforms/moz.build                      |   2 +
 dom/media/platforms/wmf/DXVA2Manager.cpp           |  73 ++-
 dom/media/platforms/wmf/MFCDMSession.cpp           |   3 +-
 dom/media/platforms/wmf/MFMediaEngineStream.cpp    |   9 +
 dom/media/platforms/wmf/WMFDataEncoderUtils.h      |   1 -
 dom/media/platforms/wmf/WMFEncoderModule.cpp       |   7 +
 dom/media/platforms/wmf/WMFUtils.cpp               |   4 +-
 .../platforms/wrappers/MediaChangeMonitor.cpp      |   7 +
 dom/media/platforms/wrappers/MediaChangeMonitor.h  |  30 +-
 46 files changed, 1847 insertions(+), 871 deletions(-)
 create mode 100644 dom/media/platforms/EncoderConfig.cpp
 create mode 100644 dom/media/platforms/EncoderConfig.h
 create mode 100644 dom/media/platforms/ffmpeg/FFmpegAudioEncoder.cpp
 create mode 100644 dom/media/platforms/ffmpeg/FFmpegAudioEncoder.h
 create mode 100644 dom/media/platforms/ffmpeg/FFmpegDataEncoder.cpp
 create mode 100644 dom/media/platforms/ffmpeg/FFmpegDataEncoder.h
 create mode 100644 dom/media/platforms/ffmpeg/FFmpegUtils.cpp
 create mode 100644 dom/media/platforms/ffmpeg/FFmpegUtils.h

(limited to 'dom/media/platforms')

diff --git a/dom/media/platforms/EncoderConfig.cpp b/dom/media/platforms/EncoderConfig.cpp
new file mode 100644
index 0000000000..ed780b947c
--- /dev/null
+++ b/dom/media/platforms/EncoderConfig.cpp
@@ -0,0 +1,27 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "EncoderConfig.h"
+#include "MP4Decoder.h"
+#include "VPXDecoder.h"
+
+namespace mozilla {
+
+CodecType EncoderConfig::CodecTypeForMime(const nsACString& aMimeType) {
+  if (MP4Decoder::IsH264(aMimeType)) {
+    return CodecType::H264;
+  }
+  if (VPXDecoder::IsVPX(aMimeType, VPXDecoder::VP8)) {
+    return CodecType::VP8;
+  }
+  if (VPXDecoder::IsVPX(aMimeType, VPXDecoder::VP9)) {
+    return CodecType::VP9;
+  }
+  MOZ_ASSERT_UNREACHABLE("Unsupported Mimetype");
+  return CodecType::Unknown;
+}
+
+}  // namespace mozilla
diff --git a/dom/media/platforms/EncoderConfig.h b/dom/media/platforms/EncoderConfig.h
new file mode 100644
index 0000000000..e0da1709d6
--- /dev/null
+++ b/dom/media/platforms/EncoderConfig.h
@@ -0,0 +1,190 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_EncoderConfig_h_
+#define mozilla_EncoderConfig_h_
+
+#include "mozilla/dom/ImageBitmapBinding.h"
+#include "H264.h"
+
+namespace mozilla {
+
+enum class CodecType {
+  _BeginVideo_,
+  H264,
+  VP8,
+  VP9,
+  AV1,
+  _EndVideo_,
+  _BeginAudio_ = _EndVideo_,
+  Opus,
+  Vorbis,
+  Flac,
+  AAC,
+  PCM,
+  G722,
+  _EndAudio_,
+  Unknown,
+};
+
+enum class Usage {
+  Realtime,  // Low latency prefered
+  Record
+};
+enum class BitrateMode { Constant, Variable };
+// Scalable Video Coding (SVC) settings for WebCodecs:
+// https://www.w3.org/TR/webrtc-svc/
+enum class ScalabilityMode { None, L1T2, L1T3 };
+
+enum class HardwarePreference { RequireHardware, RequireSoftware, None };
+
+// TODO: Automatically generate this (Bug 1865896)
+const char* GetCodecTypeString(const CodecType& aCodecType);
+
+enum class H264BitStreamFormat { AVC, ANNEXB };
+
+struct H264Specific final {
+  const H264_PROFILE mProfile;
+  const H264_LEVEL mLevel;
+  const H264BitStreamFormat mFormat;
+
+  H264Specific(H264_PROFILE aProfile, H264_LEVEL aLevel,
+               H264BitStreamFormat aFormat)
+      : mProfile(aProfile), mLevel(aLevel), mFormat(aFormat) {}
+};
+
+enum class OpusBitstreamFormat { Opus, OGG };
+
+// The default values come from the Web Codecs specification.
+struct OpusSpecific final {
+  enum class Application { Unspecified, Voip, Audio, RestricedLowDelay };
+  Application mApplication = Application::Unspecified;
+  uint64_t mFrameDuration = 20000;  // microseconds
+  uint8_t mComplexity = 10;         // 0-10
+  OpusBitstreamFormat mFormat = OpusBitstreamFormat::Opus;
+  uint64_t mPacketLossPerc = 0;  // 0-100
+  bool mUseInBandFEC = false;
+  bool mUseDTX = false;
+};
+
+enum class VPXComplexity { Normal, High, Higher, Max };
+struct VP8Specific {
+  VP8Specific() = default;
+  // Ignore webrtc::VideoCodecVP8::errorConcealmentOn,
+  // for it's always false in the codebase (except libwebrtc test cases).
+  VP8Specific(const VPXComplexity aComplexity, const bool aResilience,
+              const uint8_t aNumTemporalLayers, const bool aDenoising,
+              const bool aAutoResize, const bool aFrameDropping)
+      : mComplexity(aComplexity),
+        mResilience(aResilience),
+        mNumTemporalLayers(aNumTemporalLayers),
+        mDenoising(aDenoising),
+        mAutoResize(aAutoResize),
+        mFrameDropping(aFrameDropping) {}
+  const VPXComplexity mComplexity{VPXComplexity::Normal};
+  const bool mResilience{true};
+  const uint8_t mNumTemporalLayers{1};
+  const bool mDenoising{true};
+  const bool mAutoResize{false};
+  const bool mFrameDropping{false};
+};
+
+struct VP9Specific : public VP8Specific {
+  VP9Specific() = default;
+  VP9Specific(const VPXComplexity aComplexity, const bool aResilience,
+              const uint8_t aNumTemporalLayers, const bool aDenoising,
+              const bool aAutoResize, const bool aFrameDropping,
+              const bool aAdaptiveQp, const uint8_t aNumSpatialLayers,
+              const bool aFlexible)
+      : VP8Specific(aComplexity, aResilience, aNumTemporalLayers, aDenoising,
+                    aAutoResize, aFrameDropping),
+        mAdaptiveQp(aAdaptiveQp),
+        mNumSpatialLayers(aNumSpatialLayers),
+        mFlexible(aFlexible) {}
+  const bool mAdaptiveQp{true};
+  const uint8_t mNumSpatialLayers{1};
+  const bool mFlexible{false};
+};
+
+// A class that holds the intial configuration of an encoder. For simplicity,
+// this is used for both audio and video encoding. Members irrelevant to the
+// instance are to be ignored, and are set at their default value.
+class EncoderConfig final {
+ public:
+  using PixelFormat = dom::ImageBitmapFormat;
+  using CodecSpecific =
+      Variant<H264Specific, OpusSpecific, VP8Specific, VP9Specific>;
+
+  EncoderConfig(const EncoderConfig& aConfig) = default;
+
+  // This constructor is used for video encoders
+  EncoderConfig(const CodecType aCodecType, gfx::IntSize aSize,
+                const Usage aUsage, const PixelFormat aPixelFormat,
+                const PixelFormat aSourcePixelFormat, const uint8_t aFramerate,
+                const size_t aKeyframeInterval, const uint32_t aBitrate,
+                const BitrateMode aBitrateMode,
+                const HardwarePreference aHardwarePreference,
+                const ScalabilityMode aScalabilityMode,
+                const Maybe<CodecSpecific>& aCodecSpecific)
+      : mCodec(aCodecType),
+        mSize(aSize),
+        mBitrateMode(aBitrateMode),
+        mBitrate(aBitrate),
+        mUsage(aUsage),
+        mHardwarePreference(aHardwarePreference),
+        mPixelFormat(aPixelFormat),
+        mSourcePixelFormat(aSourcePixelFormat),
+        mScalabilityMode(aScalabilityMode),
+        mFramerate(aFramerate),
+        mKeyframeInterval(aKeyframeInterval),
+        mCodecSpecific(aCodecSpecific) {
+    MOZ_ASSERT(IsVideo());
+  }
+
+  // This constructor is used for audio encoders
+  EncoderConfig(const CodecType aCodecType, uint32_t aNumberOfChannels,
+                const BitrateMode aBitrateMode, uint32_t aSampleRate,
+                uint32_t aBitrate, const Maybe<CodecSpecific>& aCodecSpecific)
+      : mCodec(aCodecType),
+        mBitrateMode(aBitrateMode),
+        mBitrate(aBitrate),
+        mNumberOfChannels(aNumberOfChannels),
+        mSampleRate(aSampleRate),
+        mCodecSpecific(aCodecSpecific) {
+    MOZ_ASSERT(IsAudio());
+  }
+
+  static CodecType CodecTypeForMime(const nsACString& aMimeType);
+
+  bool IsVideo() const {
+    return mCodec > CodecType::_BeginVideo_ && mCodec < CodecType::_EndVideo_;
+  }
+
+  bool IsAudio() const {
+    return mCodec > CodecType::_BeginAudio_ && mCodec < CodecType::_EndAudio_;
+  }
+
+  CodecType mCodec{};
+  gfx::IntSize mSize{};
+  BitrateMode mBitrateMode{};
+  uint32_t mBitrate{};
+  Usage mUsage{};
+  // Video-only
+  HardwarePreference mHardwarePreference{};
+  PixelFormat mPixelFormat{};
+  PixelFormat mSourcePixelFormat{};
+  ScalabilityMode mScalabilityMode{};
+  uint8_t mFramerate{};
+  size_t mKeyframeInterval{};
+  // Audio-only
+  uint32_t mNumberOfChannels{};
+  uint32_t mSampleRate{};
+  Maybe<CodecSpecific> mCodecSpecific{};
+};
+
+}  // namespace mozilla
+
+#endif  // mozilla_EncoderConfig_h_
diff --git a/dom/media/platforms/PlatformEncoderModule.cpp b/dom/media/platforms/PlatformEncoderModule.cpp
index 3eb4abd511..525729e756 100644
--- a/dom/media/platforms/PlatformEncoderModule.cpp
+++ b/dom/media/platforms/PlatformEncoderModule.cpp
@@ -32,6 +32,15 @@ const char* GetCodecTypeString(const CodecType& aCodecType) {
       return "_EndVideo_/_BeginAudio_";
     case CodecType::Opus:
       return "Opus";
+    case CodecType::Vorbis:
+      return "Vorbis";
+    case CodecType::Flac:
+      return "Flac";
+    case CodecType::AAC:
+      return "AAC";
+    case CodecType::PCM:
+      return "PCM";
+      break;
     case CodecType::G722:
       return "G722";
     case CodecType::_EndAudio_:
@@ -100,22 +109,28 @@ struct ConfigurationChangeToString {
     return nsPrintfCString("Framerate: %lfHz", aFramerateChange.get().value());
   }
   nsCString operator()(const BitrateModeChange& aBitrateModeChange) {
-    return nsPrintfCString(
-        "Bitrate mode: %s",
-        aBitrateModeChange.get() == MediaDataEncoder::BitrateMode::Constant
-            ? "Constant"
-            : "Variable");
+    return nsPrintfCString("Bitrate mode: %s",
+                           aBitrateModeChange.get() == BitrateMode::Constant
+                               ? "Constant"
+                               : "Variable");
   }
   nsCString operator()(const UsageChange& aUsageChange) {
     return nsPrintfCString(
         "Usage mode: %s",
-        aUsageChange.get() == MediaDataEncoder::Usage::Realtime ? "Realtime"
-                                                                : "Recoding");
+        aUsageChange.get() == Usage::Realtime ? "Realtime" : "Recoding");
   }
   nsCString operator()(const ContentHintChange& aContentHintChange) {
     return nsPrintfCString("Content hint: %s",
                            MaybeToString(aContentHintChange.get()).get());
   }
+  nsCString operator()(const SampleRateChange& aSampleRateChange) {
+    return nsPrintfCString("Sample rate %" PRIu32 "Hz",
+                           aSampleRateChange.get());
+  }
+  nsCString operator()(const NumberOfChannelsChange& aNumberOfChannelsChange) {
+    return nsPrintfCString("Channels: %" PRIu32 "Hz",
+                           aNumberOfChannelsChange.get());
+  }
 };
 
 nsString EncoderConfigurationChangeList::ToString() const {
@@ -132,7 +147,9 @@ bool CanLikelyEncode(const EncoderConfig& aConfig) {
   if (aConfig.mCodec == CodecType::H264) {
     if (!aConfig.mCodecSpecific ||
         !aConfig.mCodecSpecific->is<H264Specific>()) {
-      LOGD("Error: asking for support codec for h264 without h264 specific config.");
+      LOGD(
+          "Error: asking for support codec for h264 without h264 specific "
+          "config.");
       return false;
     }
     H264Specific specific = aConfig.mCodecSpecific->as<H264Specific>();
diff --git a/dom/media/platforms/PlatformEncoderModule.h b/dom/media/platforms/PlatformEncoderModule.h
index 72dad430e6..222a9bb48c 100644
--- a/dom/media/platforms/PlatformEncoderModule.h
+++ b/dom/media/platforms/PlatformEncoderModule.h
@@ -8,11 +8,8 @@
 #  define PlatformEncoderModule_h_
 
 #  include "MP4Decoder.h"
-#  include "MediaData.h"
-#  include "MediaInfo.h"
 #  include "MediaResult.h"
 #  include "VPXDecoder.h"
-#  include "mozilla/Attributes.h"
 #  include "mozilla/Maybe.h"
 #  include "mozilla/MozPromise.h"
 #  include "mozilla/RefPtr.h"
@@ -20,93 +17,14 @@
 #  include "mozilla/dom/ImageBitmapBinding.h"
 #  include "nsISupportsImpl.h"
 #  include "VideoUtils.h"
+#  include "EncoderConfig.h"
 
 namespace mozilla {
 
 class MediaDataEncoder;
-class EncoderConfig;
+class MediaData;
 struct EncoderConfigurationChangeList;
 
-enum class CodecType {
-  _BeginVideo_,
-  H264,
-  VP8,
-  VP9,
-  AV1,
-  _EndVideo_,
-  _BeginAudio_ = _EndVideo_,
-  Opus,
-  G722,
-  _EndAudio_,
-  Unknown,
-};
-
-// TODO: Automatically generate this (Bug 1865896)
-const char* GetCodecTypeString(const CodecType& aCodecType);
-
-enum class H264BitStreamFormat { AVC, ANNEXB };
-
-struct H264Specific final {
-  const H264_PROFILE mProfile;
-  const H264_LEVEL mLevel;
-  const H264BitStreamFormat mFormat;
-
-  H264Specific(H264_PROFILE aProfile, H264_LEVEL aLevel,
-               H264BitStreamFormat aFormat)
-      : mProfile(aProfile), mLevel(aLevel), mFormat(aFormat) {}
-};
-
-struct OpusSpecific final {
-  enum class Application { Voip, Audio, RestricedLowDelay };
-
-  const Application mApplication;
-  const uint8_t mComplexity;  // from 0-10
-
-  OpusSpecific(const Application aApplication, const uint8_t aComplexity)
-      : mApplication(aApplication), mComplexity(aComplexity) {
-    MOZ_ASSERT(mComplexity <= 10);
-  }
-};
-
-enum class VPXComplexity { Normal, High, Higher, Max };
-struct VP8Specific {
-  VP8Specific() = default;
-  // Ignore webrtc::VideoCodecVP8::errorConcealmentOn,
-  // for it's always false in the codebase (except libwebrtc test cases).
-  VP8Specific(const VPXComplexity aComplexity, const bool aResilience,
-              const uint8_t aNumTemporalLayers, const bool aDenoising,
-              const bool aAutoResize, const bool aFrameDropping)
-      : mComplexity(aComplexity),
-        mResilience(aResilience),
-        mNumTemporalLayers(aNumTemporalLayers),
-        mDenoising(aDenoising),
-        mAutoResize(aAutoResize),
-        mFrameDropping(aFrameDropping) {}
-  const VPXComplexity mComplexity{VPXComplexity::Normal};
-  const bool mResilience{true};
-  const uint8_t mNumTemporalLayers{1};
-  const bool mDenoising{true};
-  const bool mAutoResize{false};
-  const bool mFrameDropping{false};
-};
-
-struct VP9Specific : public VP8Specific {
-  VP9Specific() = default;
-  VP9Specific(const VPXComplexity aComplexity, const bool aResilience,
-              const uint8_t aNumTemporalLayers, const bool aDenoising,
-              const bool aAutoResize, const bool aFrameDropping,
-              const bool aAdaptiveQp, const uint8_t aNumSpatialLayers,
-              const bool aFlexible)
-      : VP8Specific(aComplexity, aResilience, aNumTemporalLayers, aDenoising,
-                    aAutoResize, aFrameDropping),
-        mAdaptiveQp(aAdaptiveQp),
-        mNumSpatialLayers(aNumSpatialLayers),
-        mFlexible(aFlexible) {}
-  const bool mAdaptiveQp{true};
-  const uint8_t mNumSpatialLayers{1};
-  const bool mFlexible{false};
-};
-
 class PlatformEncoderModule {
  public:
   NS_INLINE_DECL_THREADSAFE_REFCOUNTING(PlatformEncoderModule)
@@ -144,18 +62,6 @@ class MediaDataEncoder {
  public:
   NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MediaDataEncoder)
 
-  enum class Usage {
-    Realtime,  // Low latency prefered
-    Record
-  };
-  using PixelFormat = dom::ImageBitmapFormat;
-  enum class BitrateMode { Constant, Variable };
-  // Scalable Video Coding (SVC) settings for WebCodecs:
-  // https://www.w3.org/TR/webrtc-svc/
-  enum class ScalabilityMode { None, L1T2, L1T3 };
-
-  enum class HardwarePreference { RequireHardware, RequireSoftware, None };
-
   static bool IsVideo(const CodecType aCodec) {
     return aCodec > CodecType::_BeginVideo_ && aCodec < CodecType::_EndVideo_;
   }
@@ -163,8 +69,7 @@ class MediaDataEncoder {
     return aCodec > CodecType::_BeginAudio_ && aCodec < CodecType::_EndAudio_;
   }
 
-  using InitPromise =
-      MozPromise<TrackInfo::TrackType, MediaResult, /* IsExclusive = */ true>;
+  using InitPromise = MozPromise<bool, MediaResult, /* IsExclusive = */ true>;
   using EncodedData = nsTArray<RefPtr<MediaRawData>>;
   using EncodePromise =
       MozPromise<EncodedData, MediaResult, /* IsExclusive = */ true>;
@@ -229,85 +134,6 @@ class MediaDataEncoder {
   virtual ~MediaDataEncoder() = default;
 };
 
-class EncoderConfig final {
- public:
-  using CodecSpecific =
-      Variant<H264Specific, OpusSpecific, VP8Specific, VP9Specific>;
-
-  EncoderConfig(const EncoderConfig& aConfig)
-      : mCodec(aConfig.mCodec),
-        mSize(aConfig.mSize),
-        mUsage(aConfig.mUsage),
-        mHardwarePreference(aConfig.mHardwarePreference),
-        mPixelFormat(aConfig.mPixelFormat),
-        mSourcePixelFormat(aConfig.mSourcePixelFormat),
-        mScalabilityMode(aConfig.mScalabilityMode),
-        mFramerate(aConfig.mFramerate),
-        mKeyframeInterval(aConfig.mKeyframeInterval),
-        mBitrate(aConfig.mBitrate),
-        mBitrateMode(aConfig.mBitrateMode),
-        mCodecSpecific(aConfig.mCodecSpecific) {}
-
-  template <typename... Ts>
-  EncoderConfig(const CodecType aCodecType, gfx::IntSize aSize,
-                const MediaDataEncoder::Usage aUsage,
-                const MediaDataEncoder::PixelFormat aPixelFormat,
-                const MediaDataEncoder::PixelFormat aSourcePixelFormat,
-                const uint8_t aFramerate, const size_t aKeyframeInterval,
-                const uint32_t aBitrate,
-                const MediaDataEncoder::BitrateMode aBitrateMode,
-                const MediaDataEncoder::HardwarePreference aHardwarePreference,
-                const MediaDataEncoder::ScalabilityMode aScalabilityMode,
-                const Maybe<CodecSpecific>& aCodecSpecific)
-      : mCodec(aCodecType),
-        mSize(aSize),
-        mUsage(aUsage),
-        mHardwarePreference(aHardwarePreference),
-        mPixelFormat(aPixelFormat),
-        mSourcePixelFormat(aSourcePixelFormat),
-        mScalabilityMode(aScalabilityMode),
-        mFramerate(aFramerate),
-        mKeyframeInterval(aKeyframeInterval),
-        mBitrate(aBitrate),
-        mBitrateMode(aBitrateMode),
-        mCodecSpecific(aCodecSpecific) {}
-
-  static CodecType CodecTypeForMime(const nsACString& aMimeType) {
-    if (MP4Decoder::IsH264(aMimeType)) {
-      return CodecType::H264;
-    }
-    if (VPXDecoder::IsVPX(aMimeType, VPXDecoder::VP8)) {
-      return CodecType::VP8;
-    }
-    if (VPXDecoder::IsVPX(aMimeType, VPXDecoder::VP9)) {
-      return CodecType::VP9;
-    }
-    MOZ_ASSERT_UNREACHABLE("Unsupported Mimetype");
-    return CodecType::Unknown;
-  }
-
-  bool IsVideo() const {
-    return mCodec > CodecType::_BeginVideo_ && mCodec < CodecType::_EndVideo_;
-  }
-
-  bool IsAudio() const {
-    return mCodec > CodecType::_BeginAudio_ && mCodec < CodecType::_EndAudio_;
-  }
-
-  CodecType mCodec;
-  gfx::IntSize mSize;
-  MediaDataEncoder::Usage mUsage;
-  MediaDataEncoder::HardwarePreference mHardwarePreference;
-  MediaDataEncoder::PixelFormat mPixelFormat;
-  MediaDataEncoder::PixelFormat mSourcePixelFormat;
-  MediaDataEncoder::ScalabilityMode mScalabilityMode;
-  uint8_t mFramerate{};
-  size_t mKeyframeInterval{};
-  uint32_t mBitrate{};
-  MediaDataEncoder::BitrateMode mBitrateMode{};
-  Maybe<CodecSpecific> mCodecSpecific;
-};
-
 // Wrap a type to make it unique. This allows using ergonomically in the Variant
 // below. Simply aliasing with `using` isn't enough, because typedefs in C++
 // don't produce strong types, so two integer variants result in
@@ -341,20 +167,25 @@ using FramerateChange =
     StrongTypedef<Maybe<double>, struct FramerateChangeType>;
 // The bitrate mode (variable, constant) of the encoding
 using BitrateModeChange =
-    StrongTypedef<MediaDataEncoder::BitrateMode, struct BitrateModeChangeType>;
+    StrongTypedef<BitrateMode, struct BitrateModeChangeType>;
 // The usage for the encoded stream, this influence latency, ordering, etc.
-using UsageChange =
-    StrongTypedef<MediaDataEncoder::Usage, struct UsageChangeType>;
+using UsageChange = StrongTypedef<Usage, struct UsageChangeType>;
 // If present, the expected content of the video frames (screen, movie, etc.).
 // The value the string can have isn't decided just yet. When absent, the
 // encoder uses generic settings.
 using ContentHintChange =
     StrongTypedef<Maybe<nsString>, struct ContentHintTypeType>;
+// If present, the new sample-rate of the audio
+using SampleRateChange = StrongTypedef<uint32_t, struct SampleRateChangeType>;
+// If present, the new sample-rate of the audio
+using NumberOfChannelsChange =
+    StrongTypedef<uint32_t, struct NumberOfChannelsChangeType>;
 
 // A change to a parameter of an encoder instance.
 using EncoderConfigurationItem =
     Variant<DimensionsChange, DisplayDimensionsChange, BitrateModeChange,
-            BitrateChange, FramerateChange, UsageChange, ContentHintChange>;
+            BitrateChange, FramerateChange, UsageChange, ContentHintChange,
+            SampleRateChange, NumberOfChannelsChange>;
 
 // A list of changes to an encoder configuration, that _might_ be able to change
 // on the fly. Not all encoder modules can adjust their configuration on the
diff --git a/dom/media/platforms/agnostic/AgnosticDecoderModule.cpp b/dom/media/platforms/agnostic/AgnosticDecoderModule.cpp
index 7bdc30b432..753dee0238 100644
--- a/dom/media/platforms/agnostic/AgnosticDecoderModule.cpp
+++ b/dom/media/platforms/agnostic/AgnosticDecoderModule.cpp
@@ -36,8 +36,9 @@ static bool IsAvailableInDefault(DecoderType type) {
     case DecoderType::AV1:
       return StaticPrefs::media_av1_enabled();
 #endif
-    case DecoderType::Opus:
     case DecoderType::Theora:
+      return StaticPrefs::media_theora_enabled();
+    case DecoderType::Opus:
     case DecoderType::Vorbis:
     case DecoderType::VPX:
     case DecoderType::Wave:
@@ -56,7 +57,8 @@ static bool IsAvailableInRdd(DecoderType type) {
     case DecoderType::Opus:
       return StaticPrefs::media_rdd_opus_enabled();
     case DecoderType::Theora:
-      return StaticPrefs::media_rdd_theora_enabled();
+      return StaticPrefs::media_rdd_theora_enabled() &&
+             StaticPrefs::media_theora_enabled();
     case DecoderType::Vorbis:
 #if defined(__MINGW32__)
       // If this is a MinGW build we need to force AgnosticDecoderModule to
@@ -129,7 +131,8 @@ media::DecodeSupportSet AgnosticDecoderModule::Supports(
       (AOMDecoder::IsAV1(mimeType) && IsAvailable(DecoderType::AV1)) ||
 #endif
       (VPXDecoder::IsVPX(mimeType) && IsAvailable(DecoderType::VPX)) ||
-      (TheoraDecoder::IsTheora(mimeType) && IsAvailable(DecoderType::Theora));
+      (TheoraDecoder::IsTheora(mimeType) && IsAvailable(DecoderType::Theora) &&
+       StaticPrefs::media_theora_enabled());
   MOZ_LOG(sPDMLog, LogLevel::Debug,
           ("Agnostic decoder %s requested type '%s'",
            supports ? "supports" : "rejects", mimeType.BeginReading()));
@@ -164,7 +167,8 @@ already_AddRefed<MediaDataDecoder> AgnosticDecoderModule::CreateVideoDecoder(
     }
   }
 #endif
-  else if (TheoraDecoder::IsTheora(aParams.mConfig.mMimeType)) {
+  else if (TheoraDecoder::IsTheora(aParams.mConfig.mMimeType) &&
+           StaticPrefs::media_theora_enabled()) {
     m = new TheoraDecoder(aParams);
   }
 
diff --git a/dom/media/platforms/agnostic/bytestreams/H264.cpp b/dom/media/platforms/agnostic/bytestreams/H264.cpp
index 113be67d0e..ba8d15dc40 100644
--- a/dom/media/platforms/agnostic/bytestreams/H264.cpp
+++ b/dom/media/platforms/agnostic/bytestreams/H264.cpp
@@ -3,16 +3,17 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "H264.h"
-#include <limits>
 #include "AnnexB.h"
 #include "BitReader.h"
 #include "BitWriter.h"
 #include "BufferReader.h"
 #include "ByteStreamsUtils.h"
 #include "ByteWriter.h"
+#include "MediaInfo.h"
 #include "mozilla/PodOperations.h"
 #include "mozilla/ResultExtensions.h"
 #include "mozilla/Try.h"
+#include <limits>
 
 #define READSE(var, min, max)     \
   {                               \
diff --git a/dom/media/platforms/agnostic/bytestreams/H264.h b/dom/media/platforms/agnostic/bytestreams/H264.h
index c3651d1a0f..6207a26113 100644
--- a/dom/media/platforms/agnostic/bytestreams/H264.h
+++ b/dom/media/platforms/agnostic/bytestreams/H264.h
@@ -6,11 +6,45 @@
 #define MP4_DEMUXER_H264_H_
 
 #include <stdint.h>
-#include "DecoderData.h"
+#include "ErrorList.h"
+#include "mozilla/AlreadyAddRefed.h"
+#include "mozilla/Result.h"
+#include "mozilla/Span.h"
+#include "mozilla/gfx/Point.h"
 #include "mozilla/gfx/Types.h"
 
 namespace mozilla {
 class BitReader;
+class MediaByteBuffer;
+class MediaRawData;
+
+enum H264_PROFILE {
+  H264_PROFILE_UNKNOWN = 0,
+  H264_PROFILE_BASE = 0x42,
+  H264_PROFILE_MAIN = 0x4D,
+  H264_PROFILE_EXTENDED = 0x58,
+  H264_PROFILE_HIGH = 0x64,
+};
+
+enum H264_LEVEL {
+  H264_LEVEL_1 = 10,
+  H264_LEVEL_1_b = 11,
+  H264_LEVEL_1_1 = 11,
+  H264_LEVEL_1_2 = 12,
+  H264_LEVEL_1_3 = 13,
+  H264_LEVEL_2 = 20,
+  H264_LEVEL_2_1 = 21,
+  H264_LEVEL_2_2 = 22,
+  H264_LEVEL_3 = 30,
+  H264_LEVEL_3_1 = 31,
+  H264_LEVEL_3_2 = 32,
+  H264_LEVEL_4 = 40,
+  H264_LEVEL_4_1 = 41,
+  H264_LEVEL_4_2 = 42,
+  H264_LEVEL_5 = 50,
+  H264_LEVEL_5_1 = 51,
+  H264_LEVEL_5_2 = 52
+};
 
 // Spec 7.4.2.1
 #define MAX_SPS_COUNT 32
diff --git a/dom/media/platforms/apple/AppleDecoderModule.cpp b/dom/media/platforms/apple/AppleDecoderModule.cpp
index c54593a495..b92369601c 100644
--- a/dom/media/platforms/apple/AppleDecoderModule.cpp
+++ b/dom/media/platforms/apple/AppleDecoderModule.cpp
@@ -13,6 +13,7 @@
 #include "MP4Decoder.h"
 #include "VideoUtils.h"
 #include "VPXDecoder.h"
+#include "AOMDecoder.h"
 #include "mozilla/Logging.h"
 #include "mozilla/StaticPrefs_media.h"
 #include "mozilla/gfx/gfxVars.h"
@@ -34,6 +35,7 @@ using media::MediaCodec;
 
 bool AppleDecoderModule::sInitialized = false;
 bool AppleDecoderModule::sCanUseVP9Decoder = false;
+bool AppleDecoderModule::sCanUseAV1Decoder = false;
 
 /* static */
 void AppleDecoderModule::Init() {
@@ -45,6 +47,7 @@ void AppleDecoderModule::Init() {
   if (RegisterSupplementalVP9Decoder()) {
     sCanUseVP9Decoder = CanCreateHWDecoder(MediaCodec::VP9);
   }
+  sCanUseAV1Decoder = CanCreateHWDecoder(MediaCodec::AV1);
 }
 
 nsresult AppleDecoderModule::Startup() {
@@ -83,7 +86,8 @@ DecodeSupportSet AppleDecoderModule::SupportsMimeType(
     const nsACString& aMimeType, DecoderDoctorDiagnostics* aDiagnostics) const {
   bool checkSupport = aMimeType.EqualsLiteral("audio/mp4a-latm") ||
                       MP4Decoder::IsH264(aMimeType) ||
-                      VPXDecoder::IsVP9(aMimeType);
+                      VPXDecoder::IsVP9(aMimeType) ||
+                      AOMDecoder::IsAV1(aMimeType);
   DecodeSupportSet supportType{};
 
   if (checkSupport) {
@@ -142,6 +146,35 @@ bool AppleDecoderModule::IsVideoSupported(
   if (MP4Decoder::IsH264(aConfig.mMimeType)) {
     return true;
   }
+  if (AOMDecoder::IsAV1(aConfig.mMimeType)) {
+    if (!sCanUseAV1Decoder ||
+        aOptions.contains(
+            CreateDecoderParams::Option::HardwareDecoderNotAllowed)) {
+      return false;
+    }
+
+    // HW AV1 decoder only supports 8 or 10 bit color.
+    if (aConfig.mColorDepth != gfx::ColorDepth::COLOR_8 &&
+        aConfig.mColorDepth != gfx::ColorDepth::COLOR_10) {
+      return false;
+    }
+
+    if (aConfig.mColorSpace.isSome()) {
+      if (*aConfig.mColorSpace == gfx::YUVColorSpace::Identity) {
+        // HW AV1 decoder doesn't support RGB
+        return false;
+      }
+    }
+
+    if (aConfig.mExtraData && aConfig.mExtraData->Length() < 2) {
+      return true;  // Assume it's okay.
+    }
+    // top 3 bits are the profile.
+    int profile = aConfig.mExtraData->ElementAt(1) >> 5;
+    // 0 is main profile
+    return profile == 0;
+  }
+
   if (!VPXDecoder::IsVP9(aConfig.mMimeType) || !sCanUseVP9Decoder ||
       aOptions.contains(
           CreateDecoderParams::Option::HardwareDecoderNotAllowed)) {
@@ -187,6 +220,20 @@ bool AppleDecoderModule::CanCreateHWDecoder(MediaCodec aCodec) {
     return false;
   }
   switch (aCodec) {
+    case MediaCodec::AV1: {
+      info.mMimeType = "video/av1";
+
+      // Build up a fake CBox
+      bool hasSeqHdr;
+      AOMDecoder::AV1SequenceInfo seqInfo;
+      AOMDecoder::OperatingPoint op;
+      seqInfo.mOperatingPoints.AppendElement(op);
+      seqInfo.mImage = {1920, 1080};
+      AOMDecoder::WriteAV1CBox(seqInfo, info.mExtraData, hasSeqHdr);
+
+      vtReportsSupport = VTIsHardwareDecodeSupported(kCMVideoCodecType_AV1);
+      break;
+    }
     case MediaCodec::VP9:
       info.mMimeType = "video/vp9";
       VPXDecoder::GetVPCCBox(info.mExtraData, VPXDecoder::VPXStreamInfo());
diff --git a/dom/media/platforms/apple/AppleDecoderModule.h b/dom/media/platforms/apple/AppleDecoderModule.h
index f869243a5c..46b0223d75 100644
--- a/dom/media/platforms/apple/AppleDecoderModule.h
+++ b/dom/media/platforms/apple/AppleDecoderModule.h
@@ -39,6 +39,7 @@ class AppleDecoderModule : public PlatformDecoderModule {
   static void Init();
 
   static bool sCanUseVP9Decoder;
+  static bool sCanUseAV1Decoder;
 
   static constexpr int kCMVideoCodecType_H264{'avc1'};
   static constexpr int kCMVideoCodecType_VP9{'vp09'};
diff --git a/dom/media/platforms/apple/AppleVTDecoder.cpp b/dom/media/platforms/apple/AppleVTDecoder.cpp
index ae34c2d142..6a70ed19d5 100644
--- a/dom/media/platforms/apple/AppleVTDecoder.cpp
+++ b/dom/media/platforms/apple/AppleVTDecoder.cpp
@@ -18,6 +18,7 @@
 #include "MacIOSurfaceImage.h"
 #include "MediaData.h"
 #include "VPXDecoder.h"
+#include "AOMDecoder.h"
 #include "VideoUtils.h"
 #include "gfxMacUtils.h"
 #include "mozilla/ArrayUtils.h"
@@ -55,6 +56,7 @@ AppleVTDecoder::AppleVTDecoder(const VideoInfo& aConfig,
       mColorDepth(aConfig.mColorDepth),
       mStreamType(MP4Decoder::IsH264(aConfig.mMimeType)  ? StreamType::H264
                   : VPXDecoder::IsVP9(aConfig.mMimeType) ? StreamType::VP9
+                  : AOMDecoder::IsAV1(aConfig.mMimeType) ? StreamType::AV1
                                                          : StreamType::Unknown),
       mTaskQueue(TaskQueue::Create(
           GetMediaThreadPool(MediaThreadType::PLATFORM_DECODER),
@@ -89,7 +91,10 @@ AppleVTDecoder::AppleVTDecoder(const VideoInfo& aConfig,
   MOZ_ASSERT(mStreamType != StreamType::Unknown);
   // TODO: Verify aConfig.mime_type.
   LOG("Creating AppleVTDecoder for %dx%d %s video", mDisplayWidth,
-      mDisplayHeight, mStreamType == StreamType::H264 ? "H.264" : "VP9");
+      mDisplayHeight,
+      mStreamType == StreamType::H264  ? "H.264"
+      : mStreamType == StreamType::VP9 ? "VP9"
+                                       : "AV1");
 }
 
 AppleVTDecoder::~AppleVTDecoder() { MOZ_COUNT_DTOR(AppleVTDecoder); }
@@ -177,6 +182,9 @@ void AppleVTDecoder::ProcessDecode(MediaRawData* aSample) {
       case StreamType::VP9:
         flag |= MediaInfoFlag::VIDEO_VP9;
         break;
+      case StreamType::AV1:
+        flag |= MediaInfoFlag::VIDEO_AV1;
+        break;
       default:
         break;
     }
@@ -377,6 +385,8 @@ nsCString AppleVTDecoder::GetCodecName() const {
       return "h264"_ns;
     case StreamType::VP9:
       return "vp9"_ns;
+    case StreamType::AV1:
+      return "av1"_ns;
     default:
       return "unknown"_ns;
   }
@@ -598,13 +608,17 @@ MediaResult AppleVTDecoder::InitializeSession() {
   OSStatus rv;
 
   AutoCFRelease<CFDictionaryRef> extensions = CreateDecoderExtensions();
+  CMVideoCodecType streamType;
+  if (mStreamType == StreamType::H264) {
+    streamType = kCMVideoCodecType_H264;
+  } else if (mStreamType == StreamType::VP9) {
+    streamType = CMVideoCodecType(AppleDecoderModule::kCMVideoCodecType_VP9);
+  } else {
+    streamType = kCMVideoCodecType_AV1;
+  }
 
   rv = CMVideoFormatDescriptionCreate(
-      kCFAllocatorDefault,
-      mStreamType == StreamType::H264
-          ? kCMVideoCodecType_H264
-          : CMVideoCodecType(AppleDecoderModule::kCMVideoCodecType_VP9),
-      AssertedCast<int32_t>(mPictureWidth),
+      kCFAllocatorDefault, streamType, AssertedCast<int32_t>(mPictureWidth),
       AssertedCast<int32_t>(mPictureHeight), extensions, &mFormat);
   if (rv != noErr) {
     return MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
@@ -626,6 +640,7 @@ MediaResult AppleVTDecoder::InitializeSession() {
                                    &cb, &mSession);
 
   if (rv != noErr) {
+    LOG("AppleVTDecoder: VTDecompressionSessionCreate failed: %d", rv);
     return MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
                        RESULT_DETAIL("Couldn't create decompression session!"));
   }
@@ -656,7 +671,10 @@ CFDictionaryRef AppleVTDecoder::CreateDecoderExtensions() {
                    AssertedCast<CFIndex>(mExtraData->Length()));
 
   const void* atomsKey[1];
-  atomsKey[0] = mStreamType == StreamType::H264 ? CFSTR("avcC") : CFSTR("vpcC");
+  atomsKey[0] = mStreamType == StreamType::H264  ? CFSTR("avcC")
+                : mStreamType == StreamType::VP9 ? CFSTR("vpcC")
+                                                 : CFSTR("av1C");
+  ;
   const void* atomsValue[] = {data};
   static_assert(ArrayLength(atomsKey) == ArrayLength(atomsValue),
                 "Non matching keys/values array size");
diff --git a/dom/media/platforms/apple/AppleVTDecoder.h b/dom/media/platforms/apple/AppleVTDecoder.h
index 5b8f02b86f..a32bec112e 100644
--- a/dom/media/platforms/apple/AppleVTDecoder.h
+++ b/dom/media/platforms/apple/AppleVTDecoder.h
@@ -111,7 +111,7 @@ class AppleVTDecoder final : public MediaDataDecoder,
   CFDictionaryRef CreateDecoderSpecification();
   CFDictionaryRef CreateDecoderExtensions();
 
-  enum class StreamType { Unknown, H264, VP9 };
+  enum class StreamType { Unknown, H264, VP9, AV1 };
   const StreamType mStreamType;
   const RefPtr<TaskQueue> mTaskQueue;
   const uint32_t mMaxRefFrames;
diff --git a/dom/media/platforms/apple/AppleVTEncoder.cpp b/dom/media/platforms/apple/AppleVTEncoder.cpp
index 5ec9abebe2..c464ddd6f3 100644
--- a/dom/media/platforms/apple/AppleVTEncoder.cpp
+++ b/dom/media/platforms/apple/AppleVTEncoder.cpp
@@ -80,9 +80,8 @@ static bool SetConstantBitrate(VTCompressionSessionRef& aSession,
 }
 
 static bool SetBitrateAndMode(VTCompressionSessionRef& aSession,
-                              MediaDataEncoder::BitrateMode aBitrateMode,
-                              uint32_t aBitsPerSec) {
-  if (aBitrateMode == MediaDataEncoder::BitrateMode::Variable) {
+                              BitrateMode aBitrateMode, uint32_t aBitsPerSec) {
+  if (aBitrateMode == BitrateMode::Variable) {
     return SetAverageBitrate(aSession, aBitsPerSec);
   }
   return SetConstantBitrate(aSession, aBitsPerSec);
@@ -177,9 +176,8 @@ RefPtr<MediaDataEncoder::InitPromise> AppleVTEncoder::Init() {
   if (mConfig.mBitrate) {
     if (!SetBitrateAndMode(mSession, mConfig.mBitrateMode, mConfig.mBitrate)) {
       LOGE("failed to set bitrate to %d and mode to %s", mConfig.mBitrate,
-           mConfig.mBitrateMode == MediaDataEncoder::BitrateMode::Constant
-               ? "constant"
-               : "variable");
+           mConfig.mBitrateMode == BitrateMode::Constant ? "constant"
+                                                         : "variable");
       return InitPromise::CreateAndReject(
           MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
                       "fail to configurate bitrate"),
@@ -228,26 +226,25 @@ RefPtr<MediaDataEncoder::InitPromise> AppleVTEncoder::Init() {
   }
 
   mError = NS_OK;
-  return InitPromise::CreateAndResolve(TrackInfo::TrackType::kVideoTrack,
-                                       __func__);
+  return InitPromise::CreateAndResolve(true, __func__);
 }
 
-static Maybe<OSType> MapPixelFormat(MediaDataEncoder::PixelFormat aFormat) {
+static Maybe<OSType> MapPixelFormat(dom::ImageBitmapFormat aFormat) {
   switch (aFormat) {
-    case MediaDataEncoder::PixelFormat::RGBA32:
-    case MediaDataEncoder::PixelFormat::BGRA32:
+    case dom::ImageBitmapFormat::RGBA32:
+    case dom::ImageBitmapFormat::BGRA32:
       return Some(kCVPixelFormatType_32BGRA);
-    case MediaDataEncoder::PixelFormat::RGB24:
+    case dom::ImageBitmapFormat::RGB24:
       return Some(kCVPixelFormatType_24RGB);
-    case MediaDataEncoder::PixelFormat::BGR24:
+    case dom::ImageBitmapFormat::BGR24:
       return Some(kCVPixelFormatType_24BGR);
-    case MediaDataEncoder::PixelFormat::GRAY8:
+    case dom::ImageBitmapFormat::GRAY8:
       return Some(kCVPixelFormatType_OneComponent8);
-    case MediaDataEncoder::PixelFormat::YUV444P:
+    case dom::ImageBitmapFormat::YUV444P:
       return Some(kCVPixelFormatType_444YpCbCr8);
-    case MediaDataEncoder::PixelFormat::YUV420P:
+    case dom::ImageBitmapFormat::YUV420P:
       return Some(kCVPixelFormatType_420YpCbCr8PlanarFullRange);
-    case MediaDataEncoder::PixelFormat::YUV420SP_NV12:
+    case dom::ImageBitmapFormat::YUV420SP_NV12:
       return Some(kCVPixelFormatType_420YpCbCr8BiPlanarFullRange);
     default:
       return Nothing();
@@ -459,11 +456,10 @@ void AppleVTEncoder::OutputFrame(CMSampleBufferRef aBuffer) {
   LOGD("::OutputFrame");
   RefPtr<MediaRawData> output(new MediaRawData());
 
-
   bool forceAvcc = false;
   if (mConfig.mCodecSpecific->is<H264Specific>()) {
     forceAvcc = mConfig.mCodecSpecific->as<H264Specific>().mFormat ==
-      H264BitStreamFormat::AVC;
+                H264BitStreamFormat::AVC;
   }
   bool asAnnexB = mConfig.mUsage == Usage::Realtime && !forceAvcc;
   bool succeeded = WriteExtraData(output, aBuffer, asAnnexB) &&
@@ -590,7 +586,9 @@ AppleVTEncoder::ProcessReconfigure(
           mConfig.mUsage = aChange.get();
           return SetRealtime(mSession, aChange.get() == Usage::Realtime);
         },
-        [&](const ContentHintChange& aChange) -> bool { return false; });
+        [&](const ContentHintChange& aChange) -> bool { return false; },
+        [&](const SampleRateChange& aChange) -> bool { return false; },
+        [&](const NumberOfChannelsChange& aChange) -> bool { return false; });
   };
   using P = MediaDataEncoder::ReconfigurationPromise;
   if (ok) {
@@ -599,18 +597,18 @@ AppleVTEncoder::ProcessReconfigure(
   return P::CreateAndReject(NS_ERROR_DOM_MEDIA_FATAL_ERR, __func__);
 }
 
-static size_t NumberOfPlanes(MediaDataEncoder::PixelFormat aPixelFormat) {
+static size_t NumberOfPlanes(dom::ImageBitmapFormat aPixelFormat) {
   switch (aPixelFormat) {
-    case MediaDataEncoder::PixelFormat::RGBA32:
-    case MediaDataEncoder::PixelFormat::BGRA32:
-    case MediaDataEncoder::PixelFormat::RGB24:
-    case MediaDataEncoder::PixelFormat::BGR24:
-    case MediaDataEncoder::PixelFormat::GRAY8:
+    case dom::ImageBitmapFormat::RGBA32:
+    case dom::ImageBitmapFormat::BGRA32:
+    case dom::ImageBitmapFormat::RGB24:
+    case dom::ImageBitmapFormat::BGR24:
+    case dom::ImageBitmapFormat::GRAY8:
       return 1;
-    case MediaDataEncoder::PixelFormat::YUV444P:
-    case MediaDataEncoder::PixelFormat::YUV420P:
+    case dom::ImageBitmapFormat::YUV444P:
+    case dom::ImageBitmapFormat::YUV420P:
       return 3;
-    case MediaDataEncoder::PixelFormat::YUV420SP_NV12:
+    case dom::ImageBitmapFormat::YUV420SP_NV12:
       return 2;
     default:
       LOGE("Unsupported input pixel format");
diff --git a/dom/media/platforms/apple/AppleVTEncoder.h b/dom/media/platforms/apple/AppleVTEncoder.h
index eded46c8c8..c7985a454c 100644
--- a/dom/media/platforms/apple/AppleVTEncoder.h
+++ b/dom/media/platforms/apple/AppleVTEncoder.h
@@ -24,9 +24,8 @@ class AppleVTEncoder final : public MediaDataEncoder {
                  const RefPtr<TaskQueue>& aTaskQueue)
       : mConfig(aConfig),
         mTaskQueue(aTaskQueue),
-        mHardwareNotAllowed(
-            aConfig.mHardwarePreference ==
-            MediaDataEncoder::HardwarePreference::RequireSoftware),
+        mHardwareNotAllowed(aConfig.mHardwarePreference ==
+                            HardwarePreference::RequireSoftware),
         mFramesCompleted(false),
         mError(NS_OK),
         mSession(nullptr) {
diff --git a/dom/media/platforms/ffmpeg/FFmpegAudioEncoder.cpp b/dom/media/platforms/ffmpeg/FFmpegAudioEncoder.cpp
new file mode 100644
index 0000000000..28db667732
--- /dev/null
+++ b/dom/media/platforms/ffmpeg/FFmpegAudioEncoder.cpp
@@ -0,0 +1,458 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "FFmpegAudioEncoder.h"
+
+#include "FFmpegRuntimeLinker.h"
+#include "FFmpegLog.h"
+#include "FFmpegUtils.h"
+#include "MediaData.h"
+
+#include "AudioSegment.h"
+
+namespace mozilla {
+
+FFmpegAudioEncoder<LIBAV_VER>::FFmpegAudioEncoder(
+    const FFmpegLibWrapper* aLib, AVCodecID aCodecID,
+    const RefPtr<TaskQueue>& aTaskQueue, const EncoderConfig& aConfig)
+    : FFmpegDataEncoder(aLib, aCodecID, aTaskQueue, aConfig) {}
+
+nsCString FFmpegAudioEncoder<LIBAV_VER>::GetDescriptionName() const {
+#ifdef USING_MOZFFVPX
+  return "ffvpx audio encoder"_ns;
+#else
+  const char* lib =
+#  if defined(MOZ_FFMPEG)
+      FFmpegRuntimeLinker::LinkStatusLibraryName();
+#  else
+      "no library: ffmpeg disabled during build";
+#  endif
+  return nsPrintfCString("ffmpeg audio encoder (%s)", lib);
+#endif
+}
+
+void FFmpegAudioEncoder<LIBAV_VER>::ResamplerDestroy::operator()(
+    SpeexResamplerState* aResampler) {
+  speex_resampler_destroy(aResampler);
+}
+
+nsresult FFmpegAudioEncoder<LIBAV_VER>::InitSpecific() {
+  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
+
+  FFMPEG_LOG("FFmpegAudioEncoder::InitInternal");
+
+  // Initialize the common members of the encoder instance
+  AVCodec* codec = FFmpegDataEncoder<LIBAV_VER>::InitCommon();
+  if (!codec) {
+    FFMPEG_LOG("FFmpegDataEncoder::InitCommon failed");
+    return NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR;
+  }
+
+  // Find a compatible input rate for the codec, update the encoder config, and
+  // note the rate at which this instance was configured.
+  mInputSampleRate = AssertedCast<int>(mConfig.mSampleRate);
+  if (codec->supported_samplerates) {
+    // Ensure the sample-rate list is sorted, iterate and either find that the
+    // sample rate is supported, or pick the same rate just above the audio
+    // input sample-rate (as to not lose information). If the audio is higher
+    // than the highest supported sample-rate, down-sample to the highest
+    // sample-rate supported by the codec. This is the case when encoding high
+    // samplerate audio to opus.
+    AutoTArray<int, 16> supportedSampleRates;
+    IterateZeroTerminated(codec->supported_samplerates,
+                          [&supportedSampleRates](int aRate) mutable {
+                            supportedSampleRates.AppendElement(aRate);
+                          });
+    supportedSampleRates.Sort();
+
+    for (const auto& rate : supportedSampleRates) {
+      if (mInputSampleRate == rate) {
+        mConfig.mSampleRate = rate;
+        break;
+      }
+      if (mInputSampleRate < rate) {
+        // This rate is the smallest supported rate above the content's rate.
+        mConfig.mSampleRate = rate;
+        break;
+      }
+      if (mInputSampleRate > rate) {
+        mConfig.mSampleRate = rate;
+      }
+    }
+  }
+
+  if (mConfig.mSampleRate != AssertedCast<uint32_t>(mInputSampleRate)) {
+    // Need to resample to targetRate
+    int err;
+    SpeexResamplerState* resampler = speex_resampler_init(
+        mConfig.mNumberOfChannels, mInputSampleRate, mConfig.mSampleRate,
+        SPEEX_RESAMPLER_QUALITY_DEFAULT, &err);
+    if (!err) {
+      mResampler.reset(resampler);
+    } else {
+      FFMPEG_LOG(
+          "Error creating resampler in FFmpegAudioEncoder %dHz -> %dHz (%dch)",
+          mInputSampleRate, mConfig.mSampleRate, mConfig.mNumberOfChannels);
+    }
+  }
+
+  // And now the audio-specific part
+  mCodecContext->sample_rate = AssertedCast<int>(mConfig.mSampleRate);
+  mCodecContext->channels = AssertedCast<int>(mConfig.mNumberOfChannels);
+
+#if LIBAVCODEC_VERSION_MAJOR >= 60
+  // Gecko's ordering intentionnally matches ffmepg's ordering
+  mLib->av_channel_layout_default(&mCodecContext->ch_layout,
+                                  AssertedCast<int>(mCodecContext->channels));
+#endif
+
+  switch (mConfig.mCodec) {
+    case CodecType::Opus:
+      // When using libopus, ffmpeg supports interleaved float and s16 input.
+      mCodecContext->sample_fmt = AV_SAMPLE_FMT_FLT;
+      break;
+    case CodecType::Vorbis:
+      // When using libvorbis, ffmpeg only supports planar f32 input.
+      mCodecContext->sample_fmt = AV_SAMPLE_FMT_FLTP;
+      break;
+    default:
+      MOZ_ASSERT_UNREACHABLE("Not supported");
+  }
+
+  if (mConfig.mCodec == CodecType::Opus) {
+    // Default is VBR
+    if (mConfig.mBitrateMode == BitrateMode::Constant) {
+      mLib->av_opt_set(mCodecContext->priv_data, "vbr", "off", 0);
+    }
+    if (mConfig.mCodecSpecific.isSome()) {
+      MOZ_ASSERT(mConfig.mCodecSpecific->is<OpusSpecific>());
+      const OpusSpecific& specific = mConfig.mCodecSpecific->as<OpusSpecific>();
+      // This attribute maps directly to complexity
+      mCodecContext->compression_level = specific.mComplexity;
+      FFMPEG_LOG("Opus complexity set to %d", specific.mComplexity);
+      float frameDurationMs =
+          AssertedCast<float>(specific.mFrameDuration) / 1000.f;
+      if (mLib->av_opt_set_double(mCodecContext->priv_data, "frame_duration",
+                                  frameDurationMs, 0)) {
+        FFMPEG_LOG("Error setting the frame duration on Opus encoder");
+        return NS_ERROR_FAILURE;
+      }
+      FFMPEG_LOG("Opus frame duration set to %0.2f", frameDurationMs);
+      if (specific.mPacketLossPerc) {
+        if (mLib->av_opt_set_int(
+                mCodecContext->priv_data, "packet_loss",
+                AssertedCast<int64_t>(specific.mPacketLossPerc), 0)) {
+          FFMPEG_LOG("Error setting the packet loss percentage to %" PRIu64
+                     " on Opus encoder",
+                     specific.mPacketLossPerc);
+          return NS_ERROR_FAILURE;
+        }
+        FFMPEG_LOGV("Packet loss set to %d%% in Opus encoder",
+                    AssertedCast<int>(specific.mPacketLossPerc));
+      }
+      if (specific.mUseInBandFEC) {
+        if (mLib->av_opt_set(mCodecContext->priv_data, "fec", "on", 0)) {
+          FFMPEG_LOG("Error %s FEC on Opus encoder",
+                     specific.mUseInBandFEC ? "enabling" : "disabling");
+          return NS_ERROR_FAILURE;
+        }
+        FFMPEG_LOGV("In-band FEC enabled for Opus encoder.");
+      }
+      if (specific.mUseDTX) {
+        if (mLib->av_opt_set(mCodecContext->priv_data, "dtx", "on", 0)) {
+          FFMPEG_LOG("Error %s DTX on Opus encoder",
+                     specific.mUseDTX ? "enabling" : "disabling");
+          return NS_ERROR_FAILURE;
+        }
+        // DTX packets are a TOC byte, and possibly one byte of length, packets
+        // 3 bytes and larger are to be returned.
+        mDtxThreshold = 3;
+      }
+      // TODO: format
+      // https://bugzilla.mozilla.org/show_bug.cgi?id=1876066
+    }
+  }
+  // Override the time base: always the sample-rate the encoder is running at
+  mCodecContext->time_base =
+      AVRational{.num = 1, .den = mCodecContext->sample_rate};
+
+  MediaResult rv = FinishInitCommon(codec);
+  if (NS_FAILED(rv)) {
+    FFMPEG_LOG("FFmpeg encode initialization failure.");
+    return rv.Code();
+  }
+
+  return NS_OK;
+}
+
+// avcodec_send_frame and avcodec_receive_packet were introduced in version 58.
+#if LIBAVCODEC_VERSION_MAJOR >= 58
+
+Result<MediaDataEncoder::EncodedData, nsresult>
+FFmpegAudioEncoder<LIBAV_VER>::EncodeOnePacket(Span<float> aSamples,
+                                               media::TimeUnit aPts) {
+  // Allocate AVFrame.
+  if (!PrepareFrame()) {
+    FFMPEG_LOG("failed to allocate frame");
+    return Err(NS_ERROR_OUT_OF_MEMORY);
+  }
+
+  uint32_t frameCount = aSamples.Length() / mConfig.mNumberOfChannels;
+
+  // This method assumes that the audio has been packetized appropriately --
+  // packets smaller than the packet size are allowed when draining.
+  MOZ_ASSERT(AssertedCast<int>(frameCount) <= mCodecContext->frame_size);
+
+  mFrame->channels = AssertedCast<int>(mConfig.mNumberOfChannels);
+
+#  if LIBAVCODEC_VERSION_MAJOR >= 60
+  int rv = mLib->av_channel_layout_copy(&mFrame->ch_layout,
+                                        &mCodecContext->ch_layout);
+  if (rv < 0) {
+    FFMPEG_LOG("channel layout copy error: %s",
+               MakeErrorString(mLib, rv).get());
+    return Err(NS_ERROR_DOM_MEDIA_FATAL_ERR);
+  }
+#  endif
+
+  mFrame->sample_rate = AssertedCast<int>(mConfig.mSampleRate);
+  // Not a mistake, nb_samples is per channel in ffmpeg
+  mFrame->nb_samples = AssertedCast<int>(frameCount);
+  // Audio is converted below if needed
+  mFrame->format = mCodecContext->sample_fmt;
+  // Set presentation timestamp and duration of the AVFrame.
+#  if LIBAVCODEC_VERSION_MAJOR >= 59
+  mFrame->time_base =
+      AVRational{.num = 1, .den = static_cast<int>(mConfig.mSampleRate)};
+#  endif
+  mFrame->pts = aPts.ToTicksAtRate(mConfig.mSampleRate);
+  mFrame->pkt_duration = frameCount;
+#  if LIBAVCODEC_VERSION_MAJOR >= 60
+  mFrame->duration = frameCount;
+#  else
+  // Save duration in the time_base unit.
+  mDurationMap.Insert(mFrame->pts, mFrame->pkt_duration);
+#  endif
+
+  if (int ret = mLib->av_frame_get_buffer(mFrame, 16); ret < 0) {
+    FFMPEG_LOG("failed to allocate frame data: %s",
+               MakeErrorString(mLib, ret).get());
+    return Err(NS_ERROR_OUT_OF_MEMORY);
+  }
+
+  // Make sure AVFrame is writable.
+  if (int ret = mLib->av_frame_make_writable(mFrame); ret < 0) {
+    FFMPEG_LOG("failed to make frame writable: %s",
+               MakeErrorString(mLib, ret).get());
+    return Err(NS_ERROR_DOM_MEDIA_FATAL_ERR);
+  }
+
+  // The input is always in f32 interleaved for now
+  if (mCodecContext->sample_fmt == AV_SAMPLE_FMT_FLT) {
+    PodCopy(reinterpret_cast<float*>(mFrame->data[0]), aSamples.data(),
+            aSamples.Length());
+  } else {
+    MOZ_ASSERT(mCodecContext->sample_fmt == AV_SAMPLE_FMT_FLTP);
+    for (uint32_t i = 0; i < mConfig.mNumberOfChannels; i++) {
+      DeinterleaveAndConvertBuffer(aSamples.data(), mFrame->nb_samples,
+                                   mFrame->channels, mFrame->data);
+    }
+  }
+
+  // Now send the AVFrame to ffmpeg for encoding, same code for audio and video.
+  return FFmpegDataEncoder<LIBAV_VER>::EncodeWithModernAPIs();
+}
+
+Result<MediaDataEncoder::EncodedData, nsresult> FFmpegAudioEncoder<
+    LIBAV_VER>::EncodeInputWithModernAPIs(RefPtr<const MediaData> aSample) {
+  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
+  MOZ_ASSERT(mCodecContext);
+  MOZ_ASSERT(aSample);
+
+  RefPtr<const AudioData> sample(aSample->As<AudioData>());
+
+  FFMPEG_LOG("Encoding %" PRIu32 " frames of audio at pts: %s",
+             sample->Frames(), sample->mTime.ToString().get());
+
+  if ((!mResampler && sample->mRate != mConfig.mSampleRate) ||
+      (mResampler &&
+       sample->mRate != AssertedCast<uint32_t>(mInputSampleRate)) ||
+      sample->mChannels != mConfig.mNumberOfChannels) {
+    FFMPEG_LOG(
+        "Rate or sample-rate at the inputof the encoder different from what "
+        "has been configured initially, erroring out");
+    return Result<MediaDataEncoder::EncodedData, nsresult>(
+        NS_ERROR_DOM_ENCODING_NOT_SUPPORTED_ERR);
+  }
+
+  // ffmpeg expects exactly sized input audio packets most of the time.
+  // Packetization is performed if needed, and audio packets of the correct size
+  // are fed to ffmpeg, with timestamps extrapolated the timestamp found on
+  // the input MediaData.
+
+  if (!mPacketizer) {
+    media::TimeUnit basePts = media::TimeUnit::Zero(mConfig.mSampleRate);
+    basePts += sample->mTime;
+    mPacketizer.emplace(mCodecContext->frame_size, sample->mChannels,
+                        basePts.ToTicksAtRate(mConfig.mSampleRate),
+                        mConfig.mSampleRate);
+  }
+
+  if (!mFirstPacketPts.IsValid()) {
+    mFirstPacketPts = sample->mTime;
+  }
+
+  Span<float> audio = sample->Data();
+
+  if (mResampler) {
+    // Ensure that all input frames are consumed each time by oversizing the
+    // output buffer.
+    int bufferLengthGuess = std::ceil(2. * static_cast<float>(audio.size()) *
+                                      mConfig.mSampleRate / mInputSampleRate);
+    mTempBuffer.SetLength(bufferLengthGuess);
+    uint32_t inputFrames = audio.size() / mConfig.mNumberOfChannels;
+    uint32_t inputFramesProcessed = inputFrames;
+    uint32_t outputFrames = bufferLengthGuess / mConfig.mNumberOfChannels;
+    DebugOnly<int> rv = speex_resampler_process_interleaved_float(
+        mResampler.get(), audio.data(), &inputFramesProcessed,
+        mTempBuffer.Elements(), &outputFrames);
+    audio = Span<float>(mTempBuffer.Elements(),
+                        outputFrames * mConfig.mNumberOfChannels);
+    MOZ_ASSERT(inputFrames == inputFramesProcessed,
+               "increate the buffer to consume all input each time");
+    MOZ_ASSERT(rv == RESAMPLER_ERR_SUCCESS);
+  }
+
+  EncodedData output;
+  MediaResult rv = NS_OK;
+
+  mPacketizer->Input(audio.data(), audio.Length() / mConfig.mNumberOfChannels);
+
+  // Dequeue and encode each packet
+  while (mPacketizer->PacketsAvailable() && rv.Code() == NS_OK) {
+    mTempBuffer.SetLength(mCodecContext->frame_size *
+                          mConfig.mNumberOfChannels);
+    media::TimeUnit pts = mPacketizer->Output(mTempBuffer.Elements());
+    auto audio = Span(mTempBuffer.Elements(), mTempBuffer.Length());
+    FFMPEG_LOG("Encoding %" PRIu32 " frames, pts: %s",
+               mPacketizer->PacketSize(), pts.ToString().get());
+    auto encodeResult = EncodeOnePacket(audio, pts);
+    if (encodeResult.isOk()) {
+      output.AppendElements(std::move(encodeResult.unwrap()));
+    } else {
+      return encodeResult;
+    }
+    pts += media::TimeUnit(mPacketizer->PacketSize(), mConfig.mSampleRate);
+  }
+  return Result<MediaDataEncoder::EncodedData, nsresult>(std::move(output));
+}
+
+Result<MediaDataEncoder::EncodedData, nsresult>
+FFmpegAudioEncoder<LIBAV_VER>::DrainWithModernAPIs() {
+  // If there's no packetizer, or it's empty, we can proceed immediately.
+  if (!mPacketizer || mPacketizer->FramesAvailable() == 0) {
+    return FFmpegDataEncoder<LIBAV_VER>::DrainWithModernAPIs();
+  }
+  EncodedData output;
+  MediaResult rv = NS_OK;
+  // Dequeue and encode each packet
+  mTempBuffer.SetLength(mCodecContext->frame_size *
+                        mPacketizer->ChannelCount());
+  uint32_t written;
+  media::TimeUnit pts = mPacketizer->Drain(mTempBuffer.Elements(), written);
+  auto audio =
+      Span(mTempBuffer.Elements(), written * mPacketizer->ChannelCount());
+  auto encodeResult = EncodeOnePacket(audio, pts);
+  if (encodeResult.isOk()) {
+    auto array = encodeResult.unwrap();
+    output.AppendElements(std::move(array));
+  } else {
+    return encodeResult;
+  }
+  // Now, drain the encoder
+  auto drainResult = FFmpegDataEncoder<LIBAV_VER>::DrainWithModernAPIs();
+  if (drainResult.isOk()) {
+    auto array = drainResult.unwrap();
+    output.AppendElements(std::move(array));
+  } else {
+    return drainResult;
+  }
+  return Result<MediaDataEncoder::EncodedData, nsresult>(std::move(output));
+}
+#endif  // if LIBAVCODEC_VERSION_MAJOR >= 58
+
+RefPtr<MediaRawData> FFmpegAudioEncoder<LIBAV_VER>::ToMediaRawData(
+    AVPacket* aPacket) {
+  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
+  MOZ_ASSERT(aPacket);
+
+  if (aPacket->size < mDtxThreshold) {
+    FFMPEG_LOG(
+        "DTX enabled and packet is %d bytes (threshold %d), not returning.",
+        aPacket->size, mDtxThreshold);
+    return nullptr;
+  }
+
+  RefPtr<MediaRawData> data = ToMediaRawDataCommon(aPacket);
+
+  data->mTime = media::TimeUnit(aPacket->pts, mConfig.mSampleRate);
+  data->mTimecode = data->mTime;
+  data->mDuration =
+      media::TimeUnit(mCodecContext->frame_size, mConfig.mSampleRate);
+
+  // Handle encoder delay
+  // Tracked in https://github.com/w3c/webcodecs/issues/626 because not quite
+  // specced yet.
+  if (mFirstPacketPts > data->mTime) {
+    data->mOriginalPresentationWindow =
+        Some(media::TimeInterval{data->mTime, data->GetEndTime()});
+    // Duration is likely to be ajusted when the above spec issue is fixed. For
+    // now, leave it as-is
+    //  data->mDuration -= (mFirstPacketPts - data->mTime);
+    // if (data->mDuration.IsNegative()) {
+    //   data->mDuration = media::TimeUnit::Zero();
+    // }
+    data->mTime = mFirstPacketPts;
+  }
+
+  if (mPacketsDelivered++ == 0) {
+    // Attach extradata, and the config (including any channel / samplerate
+    // modification to fit the encoder requirements), if needed.
+    if (auto r = GetExtraData(aPacket); r.isOk()) {
+      data->mExtraData = r.unwrap();
+    }
+    data->mConfig = MakeUnique<EncoderConfig>(mConfig);
+  }
+
+  if (data->mExtraData) {
+    FFMPEG_LOG(
+        "FFmpegAudioEncoder out: [%s,%s] (%zu bytes, extradata %zu bytes)",
+        data->mTime.ToString().get(), data->mDuration.ToString().get(),
+        data->Size(), data->mExtraData->Length());
+  } else {
+    FFMPEG_LOG("FFmpegAudioEncoder out: [%s,%s] (%zu bytes)",
+               data->mTime.ToString().get(), data->mDuration.ToString().get(),
+               data->Size());
+  }
+
+  return data;
+}
+
+Result<already_AddRefed<MediaByteBuffer>, nsresult>
+FFmpegAudioEncoder<LIBAV_VER>::GetExtraData(AVPacket* /* aPacket */) {
+  if (!mCodecContext->extradata_size) {
+    return Err(NS_ERROR_NOT_AVAILABLE);
+  }
+  // Create extra data -- they are on the context.
+  auto extraData = MakeRefPtr<MediaByteBuffer>();
+  extraData->SetLength(mCodecContext->extradata_size);
+  MOZ_ASSERT(extraData);
+  PodCopy(extraData->Elements(), mCodecContext->extradata,
+          mCodecContext->extradata_size);
+  return extraData.forget();
+}
+
+}  // namespace mozilla
diff --git a/dom/media/platforms/ffmpeg/FFmpegAudioEncoder.h b/dom/media/platforms/ffmpeg/FFmpegAudioEncoder.h
new file mode 100644
index 0000000000..51b0bfa44e
--- /dev/null
+++ b/dom/media/platforms/ffmpeg/FFmpegAudioEncoder.h
@@ -0,0 +1,70 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef DOM_MEDIA_PLATFORMS_FFMPEG_FFMPEGAUDIOENCODER_H_
+#define DOM_MEDIA_PLATFORMS_FFMPEG_FFMPEGAUDIOENCODER_H_
+
+#include "FFmpegDataEncoder.h"
+#include "FFmpegLibWrapper.h"
+#include "PlatformEncoderModule.h"
+#include "TimedPacketizer.h"
+
+// This must be the last header included
+#include "FFmpegLibs.h"
+#include "speex/speex_resampler.h"
+
+namespace mozilla {
+
+template <int V>
+class FFmpegAudioEncoder : public MediaDataEncoder {};
+
+template <>
+class FFmpegAudioEncoder<LIBAV_VER> : public FFmpegDataEncoder<LIBAV_VER> {
+ public:
+  FFmpegAudioEncoder(const FFmpegLibWrapper* aLib, AVCodecID aCodecID,
+                     const RefPtr<TaskQueue>& aTaskQueue,
+                     const EncoderConfig& aConfig);
+
+  nsCString GetDescriptionName() const override;
+
+ protected:
+  // Methods only called on mTaskQueue.
+  virtual nsresult InitSpecific() override;
+#if LIBAVCODEC_VERSION_MAJOR >= 58
+  Result<EncodedData, nsresult> EncodeOnePacket(Span<float> aSamples,
+                                                media::TimeUnit aPts);
+  Result<EncodedData, nsresult> EncodeInputWithModernAPIs(
+      RefPtr<const MediaData> aSample) override;
+  Result<MediaDataEncoder::EncodedData, nsresult> DrainWithModernAPIs()
+      override;
+#endif
+  virtual RefPtr<MediaRawData> ToMediaRawData(AVPacket* aPacket) override;
+  Result<already_AddRefed<MediaByteBuffer>, nsresult> GetExtraData(
+      AVPacket* aPacket) override;
+  // Most audio codecs (except PCM) require a very specific frame size.
+  Maybe<TimedPacketizer<float, float>> mPacketizer;
+  // A temporary buffer kept around for shuffling audio frames, resampling,
+  // packetization, etc.
+  nsTArray<float> mTempBuffer;
+  // The pts of the first packet this encoder has seen, to be able to properly
+  // mark encoder delay as such.
+  media::TimeUnit mFirstPacketPts{media::TimeUnit::Invalid()};
+  struct ResamplerDestroy {
+    void operator()(SpeexResamplerState* aResampler);
+  };
+  // Rate at which this instance has been configured, which might be different
+  // from the rate the underlying encoder is running at.
+  int mInputSampleRate = 0;
+  UniquePtr<SpeexResamplerState, ResamplerDestroy> mResampler;
+  uint64_t mPacketsDelivered = 0;
+  // Threshold under which a packet isn't returned to the encoder user,
+  // because it is known to be silent and DTX is enabled.
+  int mDtxThreshold = 0;
+};
+
+}  // namespace mozilla
+
+#endif  // DOM_MEDIA_PLATFORMS_FFMPEG_FFMPEGAUDIOENCODER_H_
diff --git a/dom/media/platforms/ffmpeg/FFmpegDataDecoder.cpp b/dom/media/platforms/ffmpeg/FFmpegDataDecoder.cpp
index 1acfc26a4c..30422987cf 100644
--- a/dom/media/platforms/ffmpeg/FFmpegDataDecoder.cpp
+++ b/dom/media/platforms/ffmpeg/FFmpegDataDecoder.cpp
@@ -17,30 +17,14 @@
 #include "mozilla/TaskQueue.h"
 #include "prsystem.h"
 #include "VideoUtils.h"
+#include "FFmpegUtils.h"
+
 #include "FFmpegLibs.h"
 
 namespace mozilla {
 
 StaticMutex FFmpegDataDecoder<LIBAV_VER>::sMutex;
 
-static bool IsVideoCodec(AVCodecID aCodecID) {
-  switch (aCodecID) {
-    case AV_CODEC_ID_H264:
-#if LIBAVCODEC_VERSION_MAJOR >= 54
-    case AV_CODEC_ID_VP8:
-#endif
-#if LIBAVCODEC_VERSION_MAJOR >= 55
-    case AV_CODEC_ID_VP9:
-#endif
-#if LIBAVCODEC_VERSION_MAJOR >= 59
-    case AV_CODEC_ID_AV1:
-#endif
-      return true;
-    default:
-      return false;
-  }
-}
-
 FFmpegDataDecoder<LIBAV_VER>::FFmpegDataDecoder(FFmpegLibWrapper* aLib,
                                                 AVCodecID aCodecID)
     : mLib(aLib),
diff --git a/dom/media/platforms/ffmpeg/FFmpegDataEncoder.cpp b/dom/media/platforms/ffmpeg/FFmpegDataEncoder.cpp
new file mode 100644
index 0000000000..6b97a48156
--- /dev/null
+++ b/dom/media/platforms/ffmpeg/FFmpegDataEncoder.cpp
@@ -0,0 +1,495 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "FFmpegDataEncoder.h"
+#include "PlatformEncoderModule.h"
+
+#include <utility>
+
+#include "FFmpegLog.h"
+#include "libavutil/error.h"
+#include "mozilla/StaticMutex.h"
+
+#include "FFmpegUtils.h"
+
+namespace mozilla {
+
+// TODO: Remove this function and simply use `avcodec_find_encoder` once
+// libopenh264 is supported.
+static AVCodec* FindEncoderWithPreference(const FFmpegLibWrapper* aLib,
+                                          AVCodecID aCodecId) {
+  MOZ_ASSERT(aLib);
+
+  AVCodec* codec = nullptr;
+
+  // Prioritize libx264 for now since it's the only h264 codec we tested.
+  if (aCodecId == AV_CODEC_ID_H264) {
+    codec = aLib->avcodec_find_encoder_by_name("libx264");
+    if (codec) {
+      FFMPEGV_LOG("Prefer libx264 for h264 codec");
+      return codec;
+    }
+    FFMPEGV_LOG("Fallback to other h264 library. Fingers crossed");
+  }
+
+  return aLib->avcodec_find_encoder(aCodecId);
+}
+
+template <>
+AVCodecID GetFFmpegEncoderCodecId<LIBAV_VER>(CodecType aCodec) {
+#if LIBAVCODEC_VERSION_MAJOR >= 58
+  if (aCodec == CodecType::VP8) {
+    return AV_CODEC_ID_VP8;
+  }
+
+  if (aCodec == CodecType::VP9) {
+    return AV_CODEC_ID_VP9;
+  }
+
+#  if !defined(USING_MOZFFVPX)
+  if (aCodec == CodecType::H264) {
+    return AV_CODEC_ID_H264;
+  }
+#  endif
+
+  if (aCodec == CodecType::AV1) {
+    return AV_CODEC_ID_AV1;
+  }
+
+  if (aCodec == CodecType::Opus) {
+    return AV_CODEC_ID_OPUS;
+  }
+
+  if (aCodec == CodecType::Vorbis) {
+    return AV_CODEC_ID_VORBIS;
+  }
+#endif
+  return AV_CODEC_ID_NONE;
+}
+
+StaticMutex FFmpegDataEncoder<LIBAV_VER>::sMutex;
+
+FFmpegDataEncoder<LIBAV_VER>::FFmpegDataEncoder(
+    const FFmpegLibWrapper* aLib, AVCodecID aCodecID,
+    const RefPtr<TaskQueue>& aTaskQueue, const EncoderConfig& aConfig)
+    : mLib(aLib),
+      mCodecID(aCodecID),
+      mTaskQueue(aTaskQueue),
+      mConfig(aConfig),
+      mCodecName(EmptyCString()),
+      mCodecContext(nullptr),
+      mFrame(nullptr),
+      mVideoCodec(IsVideoCodec(aCodecID)) {
+  MOZ_ASSERT(mLib);
+  MOZ_ASSERT(mTaskQueue);
+#if LIBAVCODEC_VERSION_MAJOR < 58
+  MOZ_CRASH("FFmpegDataEncoder needs ffmpeg 58 at least.");
+#endif
+};
+
+RefPtr<MediaDataEncoder::InitPromise> FFmpegDataEncoder<LIBAV_VER>::Init() {
+  FFMPEG_LOG("Init");
+  return InvokeAsync(mTaskQueue, this, __func__,
+                     &FFmpegDataEncoder::ProcessInit);
+}
+
+RefPtr<MediaDataEncoder::EncodePromise> FFmpegDataEncoder<LIBAV_VER>::Encode(
+    const MediaData* aSample) {
+  MOZ_ASSERT(aSample != nullptr);
+
+  FFMPEG_LOG("Encode");
+  return InvokeAsync(mTaskQueue, __func__,
+                     [self = RefPtr<FFmpegDataEncoder<LIBAV_VER>>(this),
+                      sample = RefPtr<const MediaData>(aSample)]() {
+                       return self->ProcessEncode(sample);
+                     });
+}
+
+RefPtr<MediaDataEncoder::ReconfigurationPromise>
+FFmpegDataEncoder<LIBAV_VER>::Reconfigure(
+    const RefPtr<const EncoderConfigurationChangeList>& aConfigurationChanges) {
+  return InvokeAsync<const RefPtr<const EncoderConfigurationChangeList>>(
+      mTaskQueue, this, __func__,
+      &FFmpegDataEncoder<LIBAV_VER>::ProcessReconfigure, aConfigurationChanges);
+}
+
+RefPtr<MediaDataEncoder::EncodePromise> FFmpegDataEncoder<LIBAV_VER>::Drain() {
+  FFMPEG_LOG("Drain");
+  return InvokeAsync(mTaskQueue, this, __func__,
+                     &FFmpegDataEncoder::ProcessDrain);
+}
+
+RefPtr<ShutdownPromise> FFmpegDataEncoder<LIBAV_VER>::Shutdown() {
+  FFMPEG_LOG("Shutdown");
+  return InvokeAsync(mTaskQueue, this, __func__,
+                     &FFmpegDataEncoder::ProcessShutdown);
+}
+
+RefPtr<GenericPromise> FFmpegDataEncoder<LIBAV_VER>::SetBitrate(
+    uint32_t aBitrate) {
+  FFMPEG_LOG("SetBitrate");
+  return GenericPromise::CreateAndReject(NS_ERROR_NOT_IMPLEMENTED, __func__);
+}
+
+RefPtr<MediaDataEncoder::InitPromise>
+FFmpegDataEncoder<LIBAV_VER>::ProcessInit() {
+  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
+
+  FFMPEG_LOG("ProcessInit");
+  nsresult rv = InitSpecific();
+  return NS_FAILED(rv) ? InitPromise::CreateAndReject(rv, __func__)
+                       : InitPromise::CreateAndResolve(true, __func__);
+}
+
+RefPtr<MediaDataEncoder::EncodePromise>
+FFmpegDataEncoder<LIBAV_VER>::ProcessEncode(RefPtr<const MediaData> aSample) {
+  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
+
+  FFMPEG_LOG("ProcessEncode");
+
+#if LIBAVCODEC_VERSION_MAJOR < 58
+  // TODO(Bug 1868253): implement encode with avcodec_encode_video2().
+  MOZ_CRASH("FFmpegDataEncoder needs ffmpeg 58 at least.");
+  return EncodePromise::CreateAndReject(NS_ERROR_NOT_IMPLEMENTED, __func__);
+#else
+
+  auto rv = EncodeInputWithModernAPIs(std::move(aSample));
+  if (rv.isErr()) {
+    return EncodePromise::CreateAndReject(rv.inspectErr(), __func__);
+  }
+
+  return EncodePromise::CreateAndResolve(rv.unwrap(), __func__);
+#endif
+}
+
+RefPtr<MediaDataEncoder::ReconfigurationPromise>
+FFmpegDataEncoder<LIBAV_VER>::ProcessReconfigure(
+    const RefPtr<const EncoderConfigurationChangeList>& aConfigurationChanges) {
+  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
+
+  FFMPEG_LOG("ProcessReconfigure");
+
+  // Tracked in bug 1869583 -- for now this encoder always reports it cannot be
+  // reconfigured on the fly
+  return MediaDataEncoder::ReconfigurationPromise::CreateAndReject(
+      NS_ERROR_NOT_IMPLEMENTED, __func__);
+}
+
+RefPtr<MediaDataEncoder::EncodePromise>
+FFmpegDataEncoder<LIBAV_VER>::ProcessDrain() {
+  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
+
+  FFMPEG_LOG("ProcessDrain");
+
+#if LIBAVCODEC_VERSION_MAJOR < 58
+  MOZ_CRASH("FFmpegDataEncoder needs ffmpeg 58 at least.");
+  return EncodePromise::CreateAndReject(NS_ERROR_NOT_IMPLEMENTED, __func__);
+#else
+  auto rv = DrainWithModernAPIs();
+  if (rv.isErr()) {
+    return EncodePromise::CreateAndReject(rv.inspectErr(), __func__);
+  }
+  return EncodePromise::CreateAndResolve(rv.unwrap(), __func__);
+#endif
+}
+
+RefPtr<ShutdownPromise> FFmpegDataEncoder<LIBAV_VER>::ProcessShutdown() {
+  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
+
+  FFMPEG_LOG("ProcessShutdown");
+
+  ShutdownInternal();
+
+  // Don't shut mTaskQueue down since it's owned by others.
+  return ShutdownPromise::CreateAndResolve(true, __func__);
+}
+
+AVCodec* FFmpegDataEncoder<LIBAV_VER>::InitCommon() {
+  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
+
+  FFMPEG_LOG("FFmpegDataEncoder::InitCommon");
+
+  AVCodec* codec = FindEncoderWithPreference(mLib, mCodecID);
+  if (!codec) {
+    FFMPEG_LOG("failed to find ffmpeg encoder for codec id %d", mCodecID);
+    return nullptr;
+  }
+  FFMPEG_LOG("found codec: %s", codec->name);
+  mCodecName = codec->name;
+
+  ForceEnablingFFmpegDebugLogs();
+
+  MOZ_ASSERT(!mCodecContext);
+  if (!(mCodecContext = mLib->avcodec_alloc_context3(codec))) {
+    FFMPEG_LOG("failed to allocate ffmpeg context for codec %s", codec->name);
+    return nullptr;
+  }
+
+  return codec;
+}
+
+MediaResult FFmpegDataEncoder<LIBAV_VER>::FinishInitCommon(AVCodec* aCodec) {
+  mCodecContext->bit_rate = static_cast<FFmpegBitRate>(mConfig.mBitrate);
+#if LIBAVCODEC_VERSION_MAJOR >= 60
+  mCodecContext->flags |= AV_CODEC_FLAG_FRAME_DURATION;
+#endif
+
+  AVDictionary* options = nullptr;
+  if (int ret = OpenCodecContext(aCodec, &options); ret < 0) {
+    FFMPEG_LOG("failed to open %s avcodec: %s", aCodec->name,
+               MakeErrorString(mLib, ret).get());
+    return MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
+                       RESULT_DETAIL("avcodec_open2 error"));
+  }
+  mLib->av_dict_free(&options);
+
+  return MediaResult(NS_OK);
+}
+
+void FFmpegDataEncoder<LIBAV_VER>::ShutdownInternal() {
+  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
+
+  FFMPEG_LOG("ShutdownInternal");
+
+  DestroyFrame();
+
+  if (mCodecContext) {
+    CloseCodecContext();
+    mLib->av_freep(&mCodecContext);
+    mCodecContext = nullptr;
+  }
+}
+
+int FFmpegDataEncoder<LIBAV_VER>::OpenCodecContext(const AVCodec* aCodec,
+                                                   AVDictionary** aOptions) {
+  MOZ_ASSERT(mCodecContext);
+
+  StaticMutexAutoLock mon(sMutex);
+  return mLib->avcodec_open2(mCodecContext, aCodec, aOptions);
+}
+
+void FFmpegDataEncoder<LIBAV_VER>::CloseCodecContext() {
+  MOZ_ASSERT(mCodecContext);
+
+  StaticMutexAutoLock mon(sMutex);
+  mLib->avcodec_close(mCodecContext);
+}
+
+bool FFmpegDataEncoder<LIBAV_VER>::PrepareFrame() {
+  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
+
+  // TODO: Merge the duplicate part with FFmpegDataDecoder's PrepareFrame.
+#if LIBAVCODEC_VERSION_MAJOR >= 55
+  if (mFrame) {
+    mLib->av_frame_unref(mFrame);
+  } else {
+    mFrame = mLib->av_frame_alloc();
+  }
+#elif LIBAVCODEC_VERSION_MAJOR == 54
+  if (mFrame) {
+    mLib->avcodec_get_frame_defaults(mFrame);
+  } else {
+    mFrame = mLib->avcodec_alloc_frame();
+  }
+#else
+  mLib->av_freep(&mFrame);
+  mFrame = mLib->avcodec_alloc_frame();
+#endif
+  return !!mFrame;
+}
+
+void FFmpegDataEncoder<LIBAV_VER>::DestroyFrame() {
+  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
+  if (mFrame) {
+#if LIBAVCODEC_VERSION_MAJOR >= 55
+    mLib->av_frame_unref(mFrame);
+    mLib->av_frame_free(&mFrame);
+#elif LIBAVCODEC_VERSION_MAJOR == 54
+    mLib->avcodec_free_frame(&mFrame);
+#else
+    mLib->av_freep(&mFrame);
+#endif
+    mFrame = nullptr;
+  }
+}
+
+// avcodec_send_frame and avcodec_receive_packet were introduced in version 58.
+#if LIBAVCODEC_VERSION_MAJOR >= 58
+Result<MediaDataEncoder::EncodedData, nsresult>
+FFmpegDataEncoder<LIBAV_VER>::EncodeWithModernAPIs() {
+  // Initialize AVPacket.
+  AVPacket* pkt = mLib->av_packet_alloc();
+
+  if (!pkt) {
+    FFMPEG_LOG("failed to allocate packet");
+    return Err(NS_ERROR_OUT_OF_MEMORY);
+  }
+
+  auto freePacket = MakeScopeExit([this, &pkt] { mLib->av_packet_free(&pkt); });
+
+  // Send frame and receive packets.
+  if (int ret = mLib->avcodec_send_frame(mCodecContext, mFrame); ret < 0) {
+    // In theory, avcodec_send_frame could sent -EAGAIN to signal its internal
+    // buffers is full. In practice this can't happen as we only feed one frame
+    // at a time, and we immediately call avcodec_receive_packet right after.
+    // TODO: Create a NS_ERROR_DOM_MEDIA_ENCODE_ERR in ErrorList.py?
+    FFMPEG_LOG("avcodec_send_frame error: %s",
+               MakeErrorString(mLib, ret).get());
+    return Err(NS_ERROR_DOM_MEDIA_FATAL_ERR);
+  }
+
+  EncodedData output;
+  while (true) {
+    int ret = mLib->avcodec_receive_packet(mCodecContext, pkt);
+    if (ret == AVERROR(EAGAIN)) {
+      // The encoder is asking for more inputs.
+      FFMPEG_LOG("encoder is asking for more input!");
+      break;
+    }
+
+    if (ret < 0) {
+      // AVERROR_EOF is returned when the encoder has been fully flushed, but it
+      // shouldn't happen here.
+      FFMPEG_LOG("avcodec_receive_packet error: %s",
+                 MakeErrorString(mLib, ret).get());
+      return Err(NS_ERROR_DOM_MEDIA_FATAL_ERR);
+    }
+
+    RefPtr<MediaRawData> d = ToMediaRawData(pkt);
+    mLib->av_packet_unref(pkt);
+    if (!d) {
+      // This can happen if e.g. DTX is enabled
+      FFMPEG_LOG("No encoded packet output");
+      continue;
+    }
+    output.AppendElement(std::move(d));
+  }
+
+  FFMPEG_LOG("Got %zu encoded data", output.Length());
+  return std::move(output);
+}
+
+Result<MediaDataEncoder::EncodedData, nsresult>
+FFmpegDataEncoder<LIBAV_VER>::DrainWithModernAPIs() {
+  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
+  MOZ_ASSERT(mCodecContext);
+
+  // TODO: Create a Result<EncodedData, nsresult> EncodeWithModernAPIs(AVFrame
+  // *aFrame) to merge the duplicate code below with EncodeWithModernAPIs above.
+
+  // Initialize AVPacket.
+  AVPacket* pkt = mLib->av_packet_alloc();
+  if (!pkt) {
+    FFMPEG_LOG("failed to allocate packet");
+    return Err(NS_ERROR_DOM_MEDIA_FATAL_ERR);
+  }
+  auto freePacket = MakeScopeExit([this, &pkt] { mLib->av_packet_free(&pkt); });
+
+  // Enter draining mode by sending NULL to the avcodec_send_frame(). Note that
+  // this can leave the encoder in a permanent EOF state after draining. As a
+  // result, the encoder is unable to continue encoding. A new
+  // AVCodecContext/encoder creation is required if users need to encode after
+  // draining.
+  //
+  // TODO: Use `avcodec_flush_buffers` to drain the pending packets if
+  // AV_CODEC_CAP_ENCODER_FLUSH is set in mCodecContext->codec->capabilities.
+  if (int ret = mLib->avcodec_send_frame(mCodecContext, nullptr); ret < 0) {
+    if (ret == AVERROR_EOF) {
+      // The encoder has been flushed. Drain can be called multiple time.
+      FFMPEG_LOG("encoder has been flushed!");
+      return EncodedData();
+    }
+
+    FFMPEG_LOG("avcodec_send_frame error: %s",
+               MakeErrorString(mLib, ret).get());
+    return Err(NS_ERROR_DOM_MEDIA_FATAL_ERR);
+  }
+
+  EncodedData output;
+  while (true) {
+    int ret = mLib->avcodec_receive_packet(mCodecContext, pkt);
+    if (ret == AVERROR_EOF) {
+      FFMPEG_LOG("encoder has no more output packet!");
+      break;
+    }
+
+    if (ret < 0) {
+      // avcodec_receive_packet should not result in a -EAGAIN once it's in
+      // draining mode.
+      FFMPEG_LOG("avcodec_receive_packet error: %s",
+                 MakeErrorString(mLib, ret).get());
+      return Err(NS_ERROR_DOM_MEDIA_FATAL_ERR);
+    }
+
+    RefPtr<MediaRawData> d = ToMediaRawData(pkt);
+    mLib->av_packet_unref(pkt);
+    if (!d) {
+      FFMPEG_LOG("failed to create a MediaRawData from the AVPacket");
+      return Err(NS_ERROR_DOM_MEDIA_FATAL_ERR);
+    }
+    output.AppendElement(std::move(d));
+  }
+
+  FFMPEG_LOG("Encoding successful, %zu packets", output.Length());
+
+  // TODO: Evaluate a better solution (Bug 1869466)
+  // TODO: Only re-create AVCodecContext when avcodec_flush_buffers is
+  // unavailable.
+  ShutdownInternal();
+  nsresult r = InitSpecific();
+  return NS_FAILED(r) ? Result<MediaDataEncoder::EncodedData, nsresult>(
+                            NS_ERROR_DOM_MEDIA_FATAL_ERR)
+                      : Result<MediaDataEncoder::EncodedData, nsresult>(
+                            std::move(output));
+}
+#endif  // LIBAVCODEC_VERSION_MAJOR >= 58
+
+RefPtr<MediaRawData> FFmpegDataEncoder<LIBAV_VER>::ToMediaRawDataCommon(
+    AVPacket* aPacket) {
+  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
+  MOZ_ASSERT(aPacket);
+
+  // Copy frame data from AVPacket.
+  auto data = MakeRefPtr<MediaRawData>();
+  UniquePtr<MediaRawDataWriter> writer(data->CreateWriter());
+  if (!writer->Append(aPacket->data, static_cast<size_t>(aPacket->size))) {
+    FFMPEG_LOG("fail to allocate MediaRawData buffer");
+    return nullptr;  // OOM
+  }
+
+  data->mKeyframe = (aPacket->flags & AV_PKT_FLAG_KEY) != 0;
+  // TODO(bug 1869560): The unit of pts, dts, and duration is time_base, which
+  // is recommended to be the reciprocal of the frame rate, but we set it to
+  // microsecond for now.
+  data->mTime = media::TimeUnit::FromMicroseconds(aPacket->pts);
+#if LIBAVCODEC_VERSION_MAJOR >= 60
+  data->mDuration = media::TimeUnit::FromMicroseconds(aPacket->duration);
+#else
+  int64_t duration;
+  if (mDurationMap.Find(aPacket->pts, duration)) {
+    data->mDuration = media::TimeUnit::FromMicroseconds(duration);
+  } else {
+    data->mDuration = media::TimeUnit::FromMicroseconds(aPacket->duration);
+  }
+#endif
+  data->mTimecode = media::TimeUnit::FromMicroseconds(aPacket->dts);
+
+  if (auto r = GetExtraData(aPacket); r.isOk()) {
+    data->mExtraData = r.unwrap();
+  }
+
+  return data;
+}
+void FFmpegDataEncoder<LIBAV_VER>::ForceEnablingFFmpegDebugLogs() {
+#if DEBUG
+  if (!getenv("MOZ_AV_LOG_LEVEL") &&
+      MOZ_LOG_TEST(sFFmpegVideoLog, LogLevel::Debug)) {
+    mLib->av_log_set_level(AV_LOG_DEBUG);
+  }
+#endif  // DEBUG
+}
+
+}  // namespace mozilla
diff --git a/dom/media/platforms/ffmpeg/FFmpegDataEncoder.h b/dom/media/platforms/ffmpeg/FFmpegDataEncoder.h
new file mode 100644
index 0000000000..de80ed36ca
--- /dev/null
+++ b/dom/media/platforms/ffmpeg/FFmpegDataEncoder.h
@@ -0,0 +1,107 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef DOM_MEDIA_PLATFORMS_FFMPEG_FFMPEGDATAENCODER_H_
+#define DOM_MEDIA_PLATFORMS_FFMPEG_FFMPEGDATAENCODER_H_
+
+#include "FFmpegLibWrapper.h"
+#include "PlatformEncoderModule.h"
+#include "SimpleMap.h"
+#include "mozilla/ThreadSafety.h"
+
+// This must be the last header included
+#include "FFmpegLibs.h"
+
+namespace mozilla {
+
+template <int V>
+AVCodecID GetFFmpegEncoderCodecId(CodecType aCodec);
+
+template <>
+AVCodecID GetFFmpegEncoderCodecId<LIBAV_VER>(CodecType aCodec);
+
+template <int V>
+class FFmpegDataEncoder : public MediaDataEncoder {};
+
+template <>
+class FFmpegDataEncoder<LIBAV_VER> : public MediaDataEncoder {
+  using DurationMap = SimpleMap<int64_t>;
+
+ public:
+  FFmpegDataEncoder(const FFmpegLibWrapper* aLib, AVCodecID aCodecID,
+                    const RefPtr<TaskQueue>& aTaskQueue,
+                    const EncoderConfig& aConfig);
+
+  /* MediaDataEncoder Methods */
+  // All methods run on the task queue, except for GetDescriptionName.
+  RefPtr<InitPromise> Init() override;
+  RefPtr<EncodePromise> Encode(const MediaData* aSample) override;
+  RefPtr<ReconfigurationPromise> Reconfigure(
+      const RefPtr<const EncoderConfigurationChangeList>& aConfigurationChanges)
+      override;
+  RefPtr<EncodePromise> Drain() override;
+  RefPtr<ShutdownPromise> Shutdown() override;
+  RefPtr<GenericPromise> SetBitrate(uint32_t aBitRate) override;
+
+ protected:
+  // Methods only called on mTaskQueue.
+  RefPtr<InitPromise> ProcessInit();
+  RefPtr<EncodePromise> ProcessEncode(RefPtr<const MediaData> aSample);
+  RefPtr<ReconfigurationPromise> ProcessReconfigure(
+      const RefPtr<const EncoderConfigurationChangeList>&
+          aConfigurationChanges);
+  RefPtr<EncodePromise> ProcessDrain();
+  RefPtr<ShutdownPromise> ProcessShutdown();
+  // Initialize the audio or video-specific members of an encoder instance.
+  virtual nsresult InitSpecific() = 0;
+  // nullptr in case of failure. This is to be called by the
+  // audio/video-specific InitInternal methods in the sub-class, and initializes
+  // the common members.
+  AVCodec* InitCommon();
+  MediaResult FinishInitCommon(AVCodec* aCodec);
+  void ShutdownInternal();
+  int OpenCodecContext(const AVCodec* aCodec, AVDictionary** aOptions)
+      MOZ_EXCLUDES(sMutex);
+  void CloseCodecContext() MOZ_EXCLUDES(sMutex);
+  bool PrepareFrame();
+  void DestroyFrame();
+#if LIBAVCODEC_VERSION_MAJOR >= 58
+  virtual Result<EncodedData, nsresult> EncodeInputWithModernAPIs(
+      RefPtr<const MediaData> aSample) = 0;
+  Result<EncodedData, nsresult> EncodeWithModernAPIs();
+  virtual Result<EncodedData, nsresult> DrainWithModernAPIs();
+#endif
+  // Convert an AVPacket to a MediaRawData. This can return nullptr if a packet
+  // has been processed by the encoder, but is not to be returned to the caller,
+  // because DTX is enabled.
+  virtual RefPtr<MediaRawData> ToMediaRawData(AVPacket* aPacket) = 0;
+  RefPtr<MediaRawData> ToMediaRawDataCommon(AVPacket* aPacket);
+  virtual Result<already_AddRefed<MediaByteBuffer>, nsresult> GetExtraData(
+      AVPacket* aPacket) = 0;
+  void ForceEnablingFFmpegDebugLogs();
+
+  // This refers to a static FFmpegLibWrapper, so raw pointer is adequate.
+  const FFmpegLibWrapper* mLib;
+  const AVCodecID mCodecID;
+  const RefPtr<TaskQueue> mTaskQueue;
+
+  // set in constructor, modified when parameters change
+  EncoderConfig mConfig;
+
+  // mTaskQueue only.
+  nsCString mCodecName;
+  AVCodecContext* mCodecContext;
+  AVFrame* mFrame;
+  DurationMap mDurationMap;
+
+  // Provide critical-section for open/close mCodecContext.
+  static StaticMutex sMutex;
+  const bool mVideoCodec;
+};
+
+}  // namespace mozilla
+
+#endif /* DOM_MEDIA_PLATFORMS_FFMPEG_FFMPEGDATAENCODER_H_ */
diff --git a/dom/media/platforms/ffmpeg/FFmpegEncoderModule.cpp b/dom/media/platforms/ffmpeg/FFmpegEncoderModule.cpp
index 42c54a48ed..b6e734268d 100644
--- a/dom/media/platforms/ffmpeg/FFmpegEncoderModule.cpp
+++ b/dom/media/platforms/ffmpeg/FFmpegEncoderModule.cpp
@@ -7,6 +7,7 @@
 #include "FFmpegEncoderModule.h"
 
 #include "FFmpegLog.h"
+#include "FFmpegAudioEncoder.h"
 #include "FFmpegVideoEncoder.h"
 
 // This must be the last header included
@@ -44,6 +45,23 @@ already_AddRefed<MediaDataEncoder> FFmpegEncoderModule<V>::CreateVideoEncoder(
   return encoder.forget();
 }
 
+template <int V>
+already_AddRefed<MediaDataEncoder> FFmpegEncoderModule<V>::CreateAudioEncoder(
+    const EncoderConfig& aConfig, const RefPtr<TaskQueue>& aTaskQueue) const {
+  AVCodecID codecId = GetFFmpegEncoderCodecId<V>(aConfig.mCodec);
+  if (codecId == AV_CODEC_ID_NONE) {
+    FFMPEGV_LOG("No ffmpeg encoder for %s", GetCodecTypeString(aConfig.mCodec));
+    return nullptr;
+  }
+
+  RefPtr<MediaDataEncoder> encoder =
+      new FFmpegAudioEncoder<V>(mLib, codecId, aTaskQueue, aConfig);
+  FFMPEGA_LOG("ffmpeg %s encoder: %s has been created",
+              GetCodecTypeString(aConfig.mCodec),
+              encoder->GetDescriptionName().get());
+  return encoder.forget();
+}
+
 template class FFmpegEncoderModule<LIBAV_VER>;
 
 }  // namespace mozilla
diff --git a/dom/media/platforms/ffmpeg/FFmpegEncoderModule.h b/dom/media/platforms/ffmpeg/FFmpegEncoderModule.h
index 1c9e94b78f..6d0e4b1c30 100644
--- a/dom/media/platforms/ffmpeg/FFmpegEncoderModule.h
+++ b/dom/media/platforms/ffmpeg/FFmpegEncoderModule.h
@@ -30,6 +30,10 @@ class FFmpegEncoderModule final : public PlatformEncoderModule {
       const EncoderConfig& aConfig,
       const RefPtr<TaskQueue>& aTaskQueue) const override;
 
+  already_AddRefed<MediaDataEncoder> CreateAudioEncoder(
+      const EncoderConfig& aConfig,
+      const RefPtr<TaskQueue>& aTaskQueue) const override;
+
  protected:
   explicit FFmpegEncoderModule(FFmpegLibWrapper* aLib) : mLib(aLib) {
     MOZ_ASSERT(mLib);
diff --git a/dom/media/platforms/ffmpeg/FFmpegLibWrapper.cpp b/dom/media/platforms/ffmpeg/FFmpegLibWrapper.cpp
index bfb3105a57..5fd6102a34 100644
--- a/dom/media/platforms/ffmpeg/FFmpegLibWrapper.cpp
+++ b/dom/media/platforms/ffmpeg/FFmpegLibWrapper.cpp
@@ -200,6 +200,7 @@ FFmpegLibWrapper::LinkResult FFmpegLibWrapper::Link() {
   AV_FUNC(av_image_get_buffer_size, AV_FUNC_AVUTIL_ALL)
   AV_FUNC_OPTION(av_channel_layout_default, AV_FUNC_AVUTIL_60)
   AV_FUNC_OPTION(av_channel_layout_from_mask, AV_FUNC_AVUTIL_60)
+  AV_FUNC_OPTION(av_channel_layout_copy, AV_FUNC_AVUTIL_60)
   AV_FUNC_OPTION(av_buffer_get_opaque,
                  (AV_FUNC_AVUTIL_56 | AV_FUNC_AVUTIL_57 | AV_FUNC_AVUTIL_58 |
                   AV_FUNC_AVUTIL_59 | AV_FUNC_AVUTIL_60))
@@ -218,6 +219,8 @@ FFmpegLibWrapper::LinkResult FFmpegLibWrapper::Link() {
   AV_FUNC(av_dict_set, AV_FUNC_AVUTIL_ALL)
   AV_FUNC(av_dict_free, AV_FUNC_AVUTIL_ALL)
   AV_FUNC(av_opt_set, AV_FUNC_AVUTIL_ALL)
+  AV_FUNC(av_opt_set_double, AV_FUNC_AVUTIL_ALL)
+  AV_FUNC(av_opt_set_int, AV_FUNC_AVUTIL_ALL)
 
 #ifdef MOZ_WIDGET_GTK
   AV_FUNC_OPTION_SILENT(avcodec_get_hw_config,
diff --git a/dom/media/platforms/ffmpeg/FFmpegLibWrapper.h b/dom/media/platforms/ffmpeg/FFmpegLibWrapper.h
index eacbba286a..226b4fc8cb 100644
--- a/dom/media/platforms/ffmpeg/FFmpegLibWrapper.h
+++ b/dom/media/platforms/ffmpeg/FFmpegLibWrapper.h
@@ -161,11 +161,16 @@ struct MOZ_ONLY_USED_TO_AVOID_STATIC_CONSTRUCTORS FFmpegLibWrapper {
                                     int nb_channels);
   void (*av_channel_layout_from_mask)(AVChannelLayout* ch_layout,
                                       uint64_t mask);
+  int (*av_channel_layout_copy)(AVChannelLayout* dst, AVChannelLayout* src);
   int (*av_dict_set)(AVDictionary** pm, const char* key, const char* value,
                      int flags);
   void (*av_dict_free)(AVDictionary** m);
   int (*av_opt_set)(void* obj, const char* name, const char* val,
                     int search_flags);
+  int (*av_opt_set_double)(void* obj, const char* name, double val,
+                           int search_flags);
+  int (*av_opt_set_int)(void* obj, const char* name, int64_t val,
+                        int search_flags);
 
   // libavutil v55 and later only
   AVFrame* (*av_frame_alloc)();
diff --git a/dom/media/platforms/ffmpeg/FFmpegLog.h b/dom/media/platforms/ffmpeg/FFmpegLog.h
index 45ea700936..676c5e4ba1 100644
--- a/dom/media/platforms/ffmpeg/FFmpegLog.h
+++ b/dom/media/platforms/ffmpeg/FFmpegLog.h
@@ -19,6 +19,9 @@ static mozilla::LazyLogModule sFFmpegAudioLog("FFmpegAudio");
 #  define FFMPEGV_LOG(str, ...)                        \
     MOZ_LOG(sFFmpegVideoLog, mozilla::LogLevel::Debug, \
             ("FFVPX: " str, ##__VA_ARGS__))
+#  define FFMPEGA_LOG(str, ...)                        \
+    MOZ_LOG(sFFmpegAudioLog, mozilla::LogLevel::Debug, \
+            ("FFVPX: " str, ##__VA_ARGS__))
 #  define FFMPEGP_LOG(str, ...) \
     MOZ_LOG(sPDMLog, mozilla::LogLevel::Debug, ("FFVPX: " str, ##__VA_ARGS__))
 #else
@@ -28,11 +31,15 @@ static mozilla::LazyLogModule sFFmpegAudioLog("FFmpegAudio");
 #  define FFMPEGV_LOG(str, ...)                        \
     MOZ_LOG(sFFmpegVideoLog, mozilla::LogLevel::Debug, \
             ("FFMPEG: " str, ##__VA_ARGS__))
+#  define FFMPEGA_LOG(str, ...)                        \
+    MOZ_LOG(sFFmpegAudioLog, mozilla::LogLevel::Debug, \
+            ("FFMPEG: " str, ##__VA_ARGS__))
 #  define FFMPEGP_LOG(str, ...) \
     MOZ_LOG(sPDMLog, mozilla::LogLevel::Debug, ("FFMPEG: " str, ##__VA_ARGS__))
 #endif
 
-#define FFMPEG_LOGV(...) \
-  MOZ_LOG(sFFmpegVideoLog, mozilla::LogLevel::Verbose, (__VA_ARGS__))
+#define FFMPEG_LOGV(...)                                   \
+  MOZ_LOG(mVideoCodec ? sFFmpegVideoLog : sFFmpegAudioLog, \
+          mozilla::LogLevel::Verbose, (__VA_ARGS__))
 
 #endif  // __FFmpegLog_h__
diff --git a/dom/media/platforms/ffmpeg/FFmpegUtils.cpp b/dom/media/platforms/ffmpeg/FFmpegUtils.cpp
new file mode 100644
index 0000000000..e209306133
--- /dev/null
+++ b/dom/media/platforms/ffmpeg/FFmpegUtils.cpp
@@ -0,0 +1,23 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "FFmpegUtils.h"
+
+#include "FFmpegLibWrapper.h"
+#include "mozilla/Assertions.h"
+#include "nsString.h"
+
+namespace mozilla {
+
+nsCString MakeErrorString(const FFmpegLibWrapper* aLib, int aErrNum) {
+  MOZ_ASSERT(aLib);
+
+  char errStr[FFmpegErrorMaxStringSize];
+  aLib->av_strerror(aErrNum, errStr, FFmpegErrorMaxStringSize);
+  return nsCString(errStr);
+}
+
+}  // namespace mozilla
diff --git a/dom/media/platforms/ffmpeg/FFmpegUtils.h b/dom/media/platforms/ffmpeg/FFmpegUtils.h
new file mode 100644
index 0000000000..fe588ed14c
--- /dev/null
+++ b/dom/media/platforms/ffmpeg/FFmpegUtils.h
@@ -0,0 +1,56 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef DOM_MEDIA_PLATFORMS_FFMPEG_FFMPEGUTILS_H_
+#define DOM_MEDIA_PLATFORMS_FFMPEG_FFMPEGUTILS_H_
+
+#include <cstddef>
+#include "nsStringFwd.h"
+#include "FFmpegLibWrapper.h"
+
+// This must be the last header included
+#include "FFmpegLibs.h"
+
+namespace mozilla {
+
+#if LIBAVCODEC_VERSION_MAJOR >= 57
+using FFmpegBitRate = int64_t;
+constexpr size_t FFmpegErrorMaxStringSize = AV_ERROR_MAX_STRING_SIZE;
+#else
+using FFmpegBitRate = int;
+constexpr size_t FFmpegErrorMaxStringSize = 64;
+#endif
+
+nsCString MakeErrorString(const FFmpegLibWrapper* aLib, int aErrNum);
+
+template <typename T, typename F>
+void IterateZeroTerminated(const T& aList, F&& aLambda) {
+  for (size_t i = 0; aList[i] != 0; i++) {
+    aLambda(aList[i]);
+  }
+}
+
+inline bool IsVideoCodec(AVCodecID aCodecID) {
+  switch (aCodecID) {
+    case AV_CODEC_ID_H264:
+#if LIBAVCODEC_VERSION_MAJOR >= 54
+    case AV_CODEC_ID_VP8:
+#endif
+#if LIBAVCODEC_VERSION_MAJOR >= 55
+    case AV_CODEC_ID_VP9:
+#endif
+#if LIBAVCODEC_VERSION_MAJOR >= 59
+    case AV_CODEC_ID_AV1:
+#endif
+      return true;
+    default:
+      return false;
+  }
+}
+
+}  // namespace mozilla
+
+#endif  // DOM_MEDIA_PLATFORMS_FFMPEG_FFMPEGUTILS_H_
diff --git a/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.cpp b/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.cpp
index 040b2e72a1..3fe46938fd 100644
--- a/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.cpp
+++ b/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.cpp
@@ -46,6 +46,7 @@
 #  define AV_PIX_FMT_YUV444P10LE PIX_FMT_YUV444P10LE
 #  define AV_PIX_FMT_GBRP PIX_FMT_GBRP
 #  define AV_PIX_FMT_NONE PIX_FMT_NONE
+#  define AV_PIX_FMT_VAAPI_VLD PIX_FMT_VAAPI_VLD
 #endif
 #if LIBAVCODEC_VERSION_MAJOR > 58
 #  define AV_PIX_FMT_VAAPI_VLD AV_PIX_FMT_VAAPI
@@ -618,6 +619,9 @@ static gfx::ColorDepth GetColorDepth(const AVPixelFormat& aFormat) {
     case AV_PIX_FMT_YUV444P12LE:
       return gfx::ColorDepth::COLOR_12;
 #endif
+    case AV_PIX_FMT_VAAPI_VLD:
+      // Placeholder, it could be deeper colors
+      return gfx::ColorDepth::COLOR_8;
     default:
       MOZ_ASSERT_UNREACHABLE("Not supported format?");
       return gfx::ColorDepth::COLOR_8;
@@ -662,7 +666,7 @@ static int GetVideoBufferWrapper(struct AVCodecContext* aCodecContext,
 
 static void ReleaseVideoBufferWrapper(void* opaque, uint8_t* data) {
   if (opaque) {
-    FFMPEG_LOGV("ReleaseVideoBufferWrapper: PlanarYCbCrImage=%p", opaque);
+    FFMPEGV_LOG("ReleaseVideoBufferWrapper: PlanarYCbCrImage=%p", opaque);
     RefPtr<ImageBufferWrapper> image = static_cast<ImageBufferWrapper*>(opaque);
     image->ReleaseBuffer();
   }
@@ -1199,6 +1203,8 @@ MediaResult FFmpegVideoDecoder<LIBAV_VER>::DoDecode(
             return Some(DecodeStage::YUV444P);
           case AV_PIX_FMT_GBRP:
             return Some(DecodeStage::GBRP);
+          case AV_PIX_FMT_VAAPI_VLD:
+            return Some(DecodeStage::VAAPI_SURFACE);
           default:
             return Nothing();
         }
diff --git a/dom/media/platforms/ffmpeg/FFmpegVideoEncoder.cpp b/dom/media/platforms/ffmpeg/FFmpegVideoEncoder.cpp
index a3cfdf1b1d..9d1dbcf80f 100644
--- a/dom/media/platforms/ffmpeg/FFmpegVideoEncoder.cpp
+++ b/dom/media/platforms/ffmpeg/FFmpegVideoEncoder.cpp
@@ -8,32 +8,21 @@
 
 #include "BufferReader.h"
 #include "FFmpegLog.h"
-#include "FFmpegRuntimeLinker.h"
+#include "FFmpegUtils.h"
 #include "H264.h"
 #include "ImageContainer.h"
 #include "libavutil/error.h"
 #include "libavutil/pixfmt.h"
-#include "mozilla/CheckedInt.h"
-#include "mozilla/PodOperations.h"
-#include "mozilla/StaticMutex.h"
-#include "mozilla/dom/ImageBitmapBinding.h"
 #include "mozilla/dom/ImageUtils.h"
 #include "nsPrintfCString.h"
 #include "ImageToI420.h"
 #include "libyuv.h"
+#include "FFmpegRuntimeLinker.h"
 
 // The ffmpeg namespace is introduced to avoid the PixelFormat's name conflicts
 // with MediaDataEncoder::PixelFormat in MediaDataEncoder class scope.
 namespace ffmpeg {
 
-#if LIBAVCODEC_VERSION_MAJOR >= 57
-using FFmpegBitRate = int64_t;
-constexpr size_t FFmpegErrorMaxStringSize = AV_ERROR_MAX_STRING_SIZE;
-#else
-using FFmpegBitRate = int;
-constexpr size_t FFmpegErrorMaxStringSize = 64;
-#endif
-
 // TODO: WebCodecs' I420A should map to MediaDataEncoder::PixelFormat and then
 // to AV_PIX_FMT_YUVA420P here.
 #if LIBAVCODEC_VERSION_MAJOR < 54
@@ -166,9 +155,9 @@ struct VPXSVCSetting {
   nsTArray<uint32_t> mTargetBitrates;
 };
 
-static Maybe<VPXSVCSetting> GetVPXSVCSetting(
-    const MediaDataEncoder::ScalabilityMode& aMode, uint32_t aBitPerSec) {
-  if (aMode == MediaDataEncoder::ScalabilityMode::None) {
+static Maybe<VPXSVCSetting> GetVPXSVCSetting(const ScalabilityMode& aMode,
+                                             uint32_t aBitPerSec) {
+  if (aMode == ScalabilityMode::None) {
     return Nothing();
   }
 
@@ -183,7 +172,7 @@ static Maybe<VPXSVCSetting> GetVPXSVCSetting(
   nsTArray<uint8_t> layerIds;
   nsTArray<uint8_t> rateDecimators;
   nsTArray<uint32_t> bitrates;
-  if (aMode == MediaDataEncoder::ScalabilityMode::L1T2) {
+  if (aMode == ScalabilityMode::L1T2) {
     // Two temporal layers. 0-1...
     //
     // Frame pattern:
@@ -208,7 +197,7 @@ static Maybe<VPXSVCSetting> GetVPXSVCSetting(
     bitrates.AppendElement(kbps * 3 / 5);
     bitrates.AppendElement(kbps);
   } else {
-    MOZ_ASSERT(aMode == MediaDataEncoder::ScalabilityMode::L1T3);
+    MOZ_ASSERT(aMode == ScalabilityMode::L1T3);
     // Three temporal layers. 0-2-1-2...
     //
     // Frame pattern:
@@ -245,59 +234,6 @@ static Maybe<VPXSVCSetting> GetVPXSVCSetting(
                             std::move(rateDecimators), std::move(bitrates)});
 }
 
-static nsCString MakeErrorString(const FFmpegLibWrapper* aLib, int aErrNum) {
-  MOZ_ASSERT(aLib);
-
-  char errStr[ffmpeg::FFmpegErrorMaxStringSize];
-  aLib->av_strerror(aErrNum, errStr, ffmpeg::FFmpegErrorMaxStringSize);
-  return nsCString(errStr);
-}
-
-// TODO: Remove this function and simply use `avcodec_find_encoder` once
-// libopenh264 is supported.
-static AVCodec* FindEncoderWithPreference(const FFmpegLibWrapper* aLib,
-                                          AVCodecID aCodecId) {
-  MOZ_ASSERT(aLib);
-
-  AVCodec* codec = nullptr;
-
-  // Prioritize libx264 for now since it's the only h264 codec we tested.
-  if (aCodecId == AV_CODEC_ID_H264) {
-    codec = aLib->avcodec_find_encoder_by_name("libx264");
-    if (codec) {
-      FFMPEGV_LOG("Prefer libx264 for h264 codec");
-      return codec;
-    }
-  }
-
-  FFMPEGV_LOG("Fallback to other h264 library. Fingers crossed");
-  return aLib->avcodec_find_encoder(aCodecId);
-}
-
-template <>
-AVCodecID GetFFmpegEncoderCodecId<LIBAV_VER>(CodecType aCodec) {
-#if LIBAVCODEC_VERSION_MAJOR >= 58
-  if (aCodec == CodecType::VP8) {
-    return AV_CODEC_ID_VP8;
-  }
-
-  if (aCodec == CodecType::VP9) {
-    return AV_CODEC_ID_VP9;
-  }
-
-#  if !defined(USING_MOZFFVPX)
-  if (aCodec == CodecType::H264) {
-    return AV_CODEC_ID_H264;
-  }
-#  endif
-
-  if (aCodec == CodecType::AV1) {
-    return AV_CODEC_ID_AV1;
-  }
-#endif
-  return AV_CODEC_ID_NONE;
-}
-
 uint8_t FFmpegVideoEncoder<LIBAV_VER>::SVCInfo::UpdateTemporalLayerId() {
   MOZ_ASSERT(!mTemporalLayerIds.IsEmpty());
 
@@ -306,70 +242,10 @@ uint8_t FFmpegVideoEncoder<LIBAV_VER>::SVCInfo::UpdateTemporalLayerId() {
   return static_cast<uint8_t>(mTemporalLayerIds[currentIndex]);
 }
 
-StaticMutex FFmpegVideoEncoder<LIBAV_VER>::sMutex;
-
 FFmpegVideoEncoder<LIBAV_VER>::FFmpegVideoEncoder(
     const FFmpegLibWrapper* aLib, AVCodecID aCodecID,
     const RefPtr<TaskQueue>& aTaskQueue, const EncoderConfig& aConfig)
-    : mLib(aLib),
-      mCodecID(aCodecID),
-      mTaskQueue(aTaskQueue),
-      mConfig(aConfig),
-      mCodecName(EmptyCString()),
-      mCodecContext(nullptr),
-      mFrame(nullptr),
-      mSVCInfo(Nothing()) {
-  MOZ_ASSERT(mLib);
-  MOZ_ASSERT(mTaskQueue);
-#if LIBAVCODEC_VERSION_MAJOR < 58
-  MOZ_CRASH("FFmpegVideoEncoder needs ffmpeg 58 at least.");
-#endif
-};
-
-RefPtr<MediaDataEncoder::InitPromise> FFmpegVideoEncoder<LIBAV_VER>::Init() {
-  FFMPEGV_LOG("Init");
-  return InvokeAsync(mTaskQueue, this, __func__,
-                     &FFmpegVideoEncoder::ProcessInit);
-}
-
-RefPtr<MediaDataEncoder::EncodePromise> FFmpegVideoEncoder<LIBAV_VER>::Encode(
-    const MediaData* aSample) {
-  MOZ_ASSERT(aSample != nullptr);
-
-  FFMPEGV_LOG("Encode");
-  return InvokeAsync(mTaskQueue, __func__,
-                     [self = RefPtr<FFmpegVideoEncoder<LIBAV_VER>>(this),
-                      sample = RefPtr<const MediaData>(aSample)]() {
-                       return self->ProcessEncode(std::move(sample));
-                     });
-}
-
-RefPtr<MediaDataEncoder::ReconfigurationPromise>
-FFmpegVideoEncoder<LIBAV_VER>::Reconfigure(
-    const RefPtr<const EncoderConfigurationChangeList>& aConfigurationChanges) {
-  return InvokeAsync<const RefPtr<const EncoderConfigurationChangeList>>(
-      mTaskQueue, this, __func__,
-      &FFmpegVideoEncoder<LIBAV_VER>::ProcessReconfigure,
-      aConfigurationChanges);
-}
-
-RefPtr<MediaDataEncoder::EncodePromise> FFmpegVideoEncoder<LIBAV_VER>::Drain() {
-  FFMPEGV_LOG("Drain");
-  return InvokeAsync(mTaskQueue, this, __func__,
-                     &FFmpegVideoEncoder::ProcessDrain);
-}
-
-RefPtr<ShutdownPromise> FFmpegVideoEncoder<LIBAV_VER>::Shutdown() {
-  FFMPEGV_LOG("Shutdown");
-  return InvokeAsync(mTaskQueue, this, __func__,
-                     &FFmpegVideoEncoder::ProcessShutdown);
-}
-
-RefPtr<GenericPromise> FFmpegVideoEncoder<LIBAV_VER>::SetBitrate(
-    uint32_t aBitrate) {
-  FFMPEGV_LOG("SetBitrate");
-  return GenericPromise::CreateAndReject(NS_ERROR_NOT_IMPLEMENTED, __func__);
-}
+    : FFmpegDataEncoder(aLib, aCodecID, aTaskQueue, aConfig) {}
 
 nsCString FFmpegVideoEncoder<LIBAV_VER>::GetDescriptionName() const {
 #ifdef USING_MOZFFVPX
@@ -385,112 +261,23 @@ nsCString FFmpegVideoEncoder<LIBAV_VER>::GetDescriptionName() const {
 #endif
 }
 
-RefPtr<MediaDataEncoder::InitPromise>
-FFmpegVideoEncoder<LIBAV_VER>::ProcessInit() {
+nsresult FFmpegVideoEncoder<LIBAV_VER>::InitSpecific() {
   MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
 
-  FFMPEGV_LOG("ProcessInit");
-  MediaResult r = InitInternal();
-  return NS_FAILED(r)
-             ? InitPromise::CreateAndReject(r, __func__)
-             : InitPromise::CreateAndResolve(TrackInfo::kVideoTrack, __func__);
-}
-
-RefPtr<MediaDataEncoder::EncodePromise>
-FFmpegVideoEncoder<LIBAV_VER>::ProcessEncode(RefPtr<const MediaData> aSample) {
-  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
+  FFMPEGV_LOG("FFmpegVideoEncoder::InitSpecific");
 
-  FFMPEGV_LOG("ProcessEncode");
-
-#if LIBAVCODEC_VERSION_MAJOR < 58
-  // TODO(Bug 1868253): implement encode with avcodec_encode_video2().
-  MOZ_CRASH("FFmpegVideoEncoder needs ffmpeg 58 at least.");
-  return EncodePromise::CreateAndReject(NS_ERROR_NOT_IMPLEMENTED, __func__);
-#else
-  RefPtr<const VideoData> sample(aSample->As<const VideoData>());
-  MOZ_ASSERT(sample);
-
-  return EncodeWithModernAPIs(sample);
-#endif
-}
-
-RefPtr<MediaDataEncoder::ReconfigurationPromise>
-FFmpegVideoEncoder<LIBAV_VER>::ProcessReconfigure(
-    const RefPtr<const EncoderConfigurationChangeList> aConfigurationChanges) {
-  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
-
-  FFMPEGV_LOG("ProcessReconfigure");
-
-  // Tracked in bug 1869583 -- for now this encoder always reports it cannot be
-  // reconfigured on the fly
-  return MediaDataEncoder::ReconfigurationPromise::CreateAndReject(
-      NS_ERROR_NOT_IMPLEMENTED, __func__);
-}
-
-RefPtr<MediaDataEncoder::EncodePromise>
-FFmpegVideoEncoder<LIBAV_VER>::ProcessDrain() {
-  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
-
-  FFMPEGV_LOG("ProcessDrain");
-
-#if LIBAVCODEC_VERSION_MAJOR < 58
-  MOZ_CRASH("FFmpegVideoEncoder needs ffmpeg 58 at least.");
-  return EncodePromise::CreateAndReject(NS_ERROR_NOT_IMPLEMENTED, __func__);
-#else
-  return DrainWithModernAPIs();
-#endif
-}
-
-RefPtr<ShutdownPromise> FFmpegVideoEncoder<LIBAV_VER>::ProcessShutdown() {
-  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
-
-  FFMPEGV_LOG("ProcessShutdown");
-
-  ShutdownInternal();
-
-  // Don't shut mTaskQueue down since it's owned by others.
-  return ShutdownPromise::CreateAndResolve(true, __func__);
-}
-
-MediaResult FFmpegVideoEncoder<LIBAV_VER>::InitInternal() {
-  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
-
-  FFMPEGV_LOG("InitInternal");
-
-  if (mCodecID == AV_CODEC_ID_H264) {
-    // H264Specific is required to get the format (avcc vs annexb).
-    if (!mConfig.mCodecSpecific ||
-        !mConfig.mCodecSpecific->is<H264Specific>()) {
-      return MediaResult(
-          NS_ERROR_DOM_MEDIA_FATAL_ERR,
-          RESULT_DETAIL("Unable to get H264 necessary encoding info"));
-    }
-  }
-
-  AVCodec* codec = FindEncoderWithPreference(mLib, mCodecID);
+  // Initialize the common members of the encoder instance
+  AVCodec* codec = FFmpegDataEncoder<LIBAV_VER>::InitCommon();
   if (!codec) {
-    FFMPEGV_LOG("failed to find ffmpeg encoder for codec id %d", mCodecID);
-    return MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
-                       RESULT_DETAIL("Unable to find codec"));
+    FFMPEGV_LOG("FFmpegDataEncoder::InitCommon failed");
+    return NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR;
   }
-  FFMPEGV_LOG("find codec: %s", codec->name);
-  mCodecName = codec->name;
 
-  ForceEnablingFFmpegDebugLogs();
-
-  MOZ_ASSERT(!mCodecContext);
-  if (!(mCodecContext = mLib->avcodec_alloc_context3(codec))) {
-    FFMPEGV_LOG("failed to allocate ffmpeg context for codec %s", codec->name);
-    return MediaResult(NS_ERROR_OUT_OF_MEMORY,
-                       RESULT_DETAIL("Failed to initialize ffmpeg context"));
-  }
-
-  // Set up AVCodecContext.
+  // And now the video-specific part
   mCodecContext->pix_fmt = ffmpeg::FFMPEG_PIX_FMT_YUV420P;
-  mCodecContext->bit_rate =
-      static_cast<ffmpeg::FFmpegBitRate>(mConfig.mBitrate);
   mCodecContext->width = static_cast<int>(mConfig.mSize.width);
   mCodecContext->height = static_cast<int>(mConfig.mSize.height);
+  mCodecContext->gop_size = static_cast<int>(mConfig.mKeyframeInterval);
   // TODO(bug 1869560): The recommended time_base is the reciprocal of the frame
   // rate, but we set it to microsecond for now.
   mCodecContext->time_base =
@@ -500,12 +287,13 @@ MediaResult FFmpegVideoEncoder<LIBAV_VER>::InitInternal() {
   mCodecContext->framerate =
       AVRational{.num = static_cast<int>(mConfig.mFramerate), .den = 1};
 #endif
+
 #if LIBAVCODEC_VERSION_MAJOR >= 60
   mCodecContext->flags |= AV_CODEC_FLAG_FRAME_DURATION;
 #endif
   mCodecContext->gop_size = static_cast<int>(mConfig.mKeyframeInterval);
 
-  if (mConfig.mUsage == MediaDataEncoder::Usage::Realtime) {
+  if (mConfig.mUsage == Usage::Realtime) {
     mLib->av_opt_set(mCodecContext->priv_data, "deadline", "realtime", 0);
     // Explicitly ask encoder do not keep in flight at any one time for
     // lookahead purposes.
@@ -578,14 +366,11 @@ MediaResult FFmpegVideoEncoder<LIBAV_VER>::InitInternal() {
   // encoder.
   mCodecContext->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
 
-  AVDictionary* options = nullptr;
-  if (int ret = OpenCodecContext(codec, &options); ret < 0) {
-    FFMPEGV_LOG("failed to open %s avcodec: %s", codec->name,
-                MakeErrorString(mLib, ret).get());
-    return MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
-                       RESULT_DETAIL("Unable to open avcodec"));
+  MediaResult rv = FinishInitCommon(codec);
+  if (NS_FAILED(rv)) {
+    FFMPEGV_LOG("FFmpeg video encoder initialization failure.");
+    return rv;
   }
-  mLib->av_dict_free(&options);
 
   FFMPEGV_LOG("%s has been initialized with format: %s, bitrate: %" PRIi64
               ", width: %d, height: %d, time_base: %d/%d%s",
@@ -595,74 +380,7 @@ MediaResult FFmpegVideoEncoder<LIBAV_VER>::InitInternal() {
               mCodecContext->time_base.num, mCodecContext->time_base.den,
               h264Log.IsEmpty() ? "" : h264Log.get());
 
-  return MediaResult(NS_OK);
-}
-
-void FFmpegVideoEncoder<LIBAV_VER>::ShutdownInternal() {
-  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
-
-  FFMPEGV_LOG("ShutdownInternal");
-
-  DestroyFrame();
-
-  if (mCodecContext) {
-    CloseCodecContext();
-    mLib->av_freep(&mCodecContext);
-    mCodecContext = nullptr;
-  }
-}
-
-int FFmpegVideoEncoder<LIBAV_VER>::OpenCodecContext(const AVCodec* aCodec,
-                                                    AVDictionary** aOptions) {
-  MOZ_ASSERT(mCodecContext);
-
-  StaticMutexAutoLock mon(sMutex);
-  return mLib->avcodec_open2(mCodecContext, aCodec, aOptions);
-}
-
-void FFmpegVideoEncoder<LIBAV_VER>::CloseCodecContext() {
-  MOZ_ASSERT(mCodecContext);
-
-  StaticMutexAutoLock mon(sMutex);
-  mLib->avcodec_close(mCodecContext);
-}
-
-bool FFmpegVideoEncoder<LIBAV_VER>::PrepareFrame() {
-  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
-
-  // TODO: Merge the duplicate part with FFmpegDataDecoder's PrepareFrame.
-#if LIBAVCODEC_VERSION_MAJOR >= 55
-  if (mFrame) {
-    mLib->av_frame_unref(mFrame);
-  } else {
-    mFrame = mLib->av_frame_alloc();
-  }
-#elif LIBAVCODEC_VERSION_MAJOR == 54
-  if (mFrame) {
-    mLib->avcodec_get_frame_defaults(mFrame);
-  } else {
-    mFrame = mLib->avcodec_alloc_frame();
-  }
-#else
-  mLib->av_freep(&mFrame);
-  mFrame = mLib->avcodec_alloc_frame();
-#endif
-  return !!mFrame;
-}
-
-void FFmpegVideoEncoder<LIBAV_VER>::DestroyFrame() {
-  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
-  if (mFrame) {
-#if LIBAVCODEC_VERSION_MAJOR >= 55
-    mLib->av_frame_unref(mFrame);
-    mLib->av_frame_free(&mFrame);
-#elif LIBAVCODEC_VERSION_MAJOR == 54
-    mLib->avcodec_free_frame(&mFrame);
-#else
-    mLib->av_freep(&mFrame);
-#endif
-    mFrame = nullptr;
-  }
+  return NS_OK;
 }
 
 bool FFmpegVideoEncoder<LIBAV_VER>::ScaleInputFrame() {
@@ -709,71 +427,62 @@ bool FFmpegVideoEncoder<LIBAV_VER>::ScaleInputFrame() {
 
 // avcodec_send_frame and avcodec_receive_packet were introduced in version 58.
 #if LIBAVCODEC_VERSION_MAJOR >= 58
-RefPtr<MediaDataEncoder::EncodePromise> FFmpegVideoEncoder<
-    LIBAV_VER>::EncodeWithModernAPIs(RefPtr<const VideoData> aSample) {
+Result<MediaDataEncoder::EncodedData, nsresult> FFmpegVideoEncoder<
+    LIBAV_VER>::EncodeInputWithModernAPIs(RefPtr<const MediaData> aSample) {
   MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
   MOZ_ASSERT(mCodecContext);
   MOZ_ASSERT(aSample);
 
+  RefPtr<const VideoData> sample(aSample->As<VideoData>());
+
   // Validate input.
-  if (!aSample->mImage) {
+  if (!sample->mImage) {
     FFMPEGV_LOG("No image");
-    return EncodePromise::CreateAndReject(
-        MediaResult(NS_ERROR_ILLEGAL_INPUT,
-                    RESULT_DETAIL("No image in sample")),
-        __func__);
-  } else if (aSample->mImage->GetSize().IsEmpty()) {
+    return Result<MediaDataEncoder::EncodedData, nsresult>(
+        NS_ERROR_DOM_MEDIA_FATAL_ERR);
+  }
+  if (sample->mImage->GetSize().IsEmpty()) {
     FFMPEGV_LOG("image width or height is invalid");
-    return EncodePromise::CreateAndReject(
-        MediaResult(NS_ERROR_ILLEGAL_INPUT,
-                    RESULT_DETAIL("Invalid image size")),
-        __func__);
+    return Result<MediaDataEncoder::EncodedData, nsresult>(
+        NS_ERROR_DOM_MEDIA_FATAL_ERR);
   }
 
   // Allocate AVFrame.
   if (!PrepareFrame()) {
     FFMPEGV_LOG("failed to allocate frame");
-    return EncodePromise::CreateAndReject(
-        MediaResult(NS_ERROR_OUT_OF_MEMORY,
-                    RESULT_DETAIL("Unable to allocate frame")),
-        __func__);
+    return Result<MediaDataEncoder::EncodedData, nsresult>(
+        NS_ERROR_DOM_MEDIA_FATAL_ERR);
   }
 
   // Set AVFrame properties for its internal data allocation. For now, we always
   // convert into ffmpeg's buffer.
   mFrame->format = ffmpeg::FFMPEG_PIX_FMT_YUV420P;
-  mFrame->width = static_cast<int>(aSample->mImage->GetSize().width);
-  mFrame->height = static_cast<int>(aSample->mImage->GetSize().height);
+  mFrame->width = static_cast<int>(sample->mImage->GetSize().width);
+  mFrame->height = static_cast<int>(sample->mImage->GetSize().height);
 
   // Allocate AVFrame data.
   if (int ret = mLib->av_frame_get_buffer(mFrame, 0); ret < 0) {
     FFMPEGV_LOG("failed to allocate frame data: %s",
                 MakeErrorString(mLib, ret).get());
-    return EncodePromise::CreateAndReject(
-        MediaResult(NS_ERROR_OUT_OF_MEMORY,
-                    RESULT_DETAIL("Unable to allocate frame data")),
-        __func__);
+    return Result<MediaDataEncoder::EncodedData, nsresult>(
+        NS_ERROR_DOM_MEDIA_FATAL_ERR);
   }
 
   // Make sure AVFrame is writable.
   if (int ret = mLib->av_frame_make_writable(mFrame); ret < 0) {
     FFMPEGV_LOG("failed to make frame writable: %s",
                 MakeErrorString(mLib, ret).get());
-    return EncodePromise::CreateAndReject(
-        MediaResult(NS_ERROR_NOT_AVAILABLE,
-                    RESULT_DETAIL("Unable to make frame writable")),
-        __func__);
+    return Result<MediaDataEncoder::EncodedData, nsresult>(
+        NS_ERROR_DOM_MEDIA_FATAL_ERR);
   }
 
   nsresult rv = ConvertToI420(
-      aSample->mImage, mFrame->data[0], mFrame->linesize[0], mFrame->data[1],
+      sample->mImage, mFrame->data[0], mFrame->linesize[0], mFrame->data[1],
       mFrame->linesize[1], mFrame->data[2], mFrame->linesize[2]);
   if (NS_FAILED(rv)) {
     FFMPEGV_LOG("Conversion error!");
-    return EncodePromise::CreateAndReject(
-        MediaResult(NS_ERROR_ILLEGAL_INPUT,
-                    RESULT_DETAIL("libyuv conversion error")),
-        __func__);
+    return Result<MediaDataEncoder::EncodedData, nsresult>(
+        NS_ERROR_DOM_MEDIA_FATAL_ERR);
   }
 
   // Scale the YUV input frame if needed -- the encoded frame will have the
@@ -781,10 +490,8 @@ RefPtr<MediaDataEncoder::EncodePromise> FFmpegVideoEncoder<
   if (mFrame->width != mConfig.mSize.Width() ||
       mFrame->height != mConfig.mSize.Height()) {
     if (!ScaleInputFrame()) {
-      return EncodePromise::CreateAndReject(
-          MediaResult(NS_ERROR_OUT_OF_MEMORY,
-                      RESULT_DETAIL("libyuv scaling error")),
-          __func__);
+      return Result<MediaDataEncoder::EncodedData, nsresult>(
+          NS_ERROR_DOM_MEDIA_FATAL_ERR);
     }
   }
 
@@ -805,193 +512,17 @@ RefPtr<MediaDataEncoder::EncodePromise> FFmpegVideoEncoder<
 #  endif
   mFrame->pkt_duration = aSample->mDuration.ToMicroseconds();
 
-  // Initialize AVPacket.
-  AVPacket* pkt = mLib->av_packet_alloc();
-
-  if (!pkt) {
-    FFMPEGV_LOG("failed to allocate packet");
-    return EncodePromise::CreateAndReject(
-        MediaResult(NS_ERROR_OUT_OF_MEMORY,
-                    RESULT_DETAIL("Unable to allocate packet")),
-        __func__);
-  }
-
-  auto freePacket = MakeScopeExit([this, &pkt] { mLib->av_packet_free(&pkt); });
-
-  // Send frame and receive packets.
-
-  if (int ret = mLib->avcodec_send_frame(mCodecContext, mFrame); ret < 0) {
-    // In theory, avcodec_send_frame could sent -EAGAIN to signal its internal
-    // buffers is full. In practice this can't happen as we only feed one frame
-    // at a time, and we immediately call avcodec_receive_packet right after.
-    // TODO: Create a NS_ERROR_DOM_MEDIA_ENCODE_ERR in ErrorList.py?
-    FFMPEGV_LOG("avcodec_send_frame error: %s",
-                MakeErrorString(mLib, ret).get());
-    return EncodePromise::CreateAndReject(
-        MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
-                    RESULT_DETAIL("avcodec_send_frame error")),
-        __func__);
-  }
-
-  EncodedData output;
-  while (true) {
-    int ret = mLib->avcodec_receive_packet(mCodecContext, pkt);
-    if (ret == AVERROR(EAGAIN)) {
-      // The encoder is asking for more inputs.
-      FFMPEGV_LOG("encoder is asking for more input!");
-      break;
-    }
-
-    if (ret < 0) {
-      // AVERROR_EOF is returned when the encoder has been fully flushed, but it
-      // shouldn't happen here.
-      FFMPEGV_LOG("avcodec_receive_packet error: %s",
-                  MakeErrorString(mLib, ret).get());
-      return EncodePromise::CreateAndReject(
-          MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
-                      RESULT_DETAIL("avcodec_receive_packet error")),
-          __func__);
-    }
-
-    RefPtr<MediaRawData> d = ToMediaRawData(pkt);
-    mLib->av_packet_unref(pkt);
-    if (!d) {
-      FFMPEGV_LOG("failed to create a MediaRawData from the AVPacket");
-      return EncodePromise::CreateAndReject(
-          MediaResult(
-              NS_ERROR_OUT_OF_MEMORY,
-              RESULT_DETAIL("Unable to get MediaRawData from AVPacket")),
-          __func__);
-    }
-    output.AppendElement(std::move(d));
-  }
-
-  FFMPEGV_LOG("get %zu encoded data", output.Length());
-  return EncodePromise::CreateAndResolve(std::move(output), __func__);
+  // Now send the AVFrame to ffmpeg for encoding, same code for audio and video.
+  return FFmpegDataEncoder<LIBAV_VER>::EncodeWithModernAPIs();
 }
-
-RefPtr<MediaDataEncoder::EncodePromise>
-FFmpegVideoEncoder<LIBAV_VER>::DrainWithModernAPIs() {
-  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
-  MOZ_ASSERT(mCodecContext);
-
-  // TODO: Create a Result<EncodedData, nsresult> EncodeWithModernAPIs(AVFrame
-  // *aFrame) to merge the duplicate code below with EncodeWithModernAPIs above.
-
-  // Initialize AVPacket.
-  AVPacket* pkt = mLib->av_packet_alloc();
-  if (!pkt) {
-    FFMPEGV_LOG("failed to allocate packet");
-    return EncodePromise::CreateAndReject(
-        MediaResult(NS_ERROR_OUT_OF_MEMORY,
-                    RESULT_DETAIL("Unable to allocate packet")),
-        __func__);
-  }
-  auto freePacket = MakeScopeExit([this, &pkt] { mLib->av_packet_free(&pkt); });
-
-  // Enter draining mode by sending NULL to the avcodec_send_frame(). Note that
-  // this can leave the encoder in a permanent EOF state after draining. As a
-  // result, the encoder is unable to continue encoding. A new
-  // AVCodecContext/encoder creation is required if users need to encode after
-  // draining.
-  //
-  // TODO: Use `avcodec_flush_buffers` to drain the pending packets if
-  // AV_CODEC_CAP_ENCODER_FLUSH is set in mCodecContext->codec->capabilities.
-  if (int ret = mLib->avcodec_send_frame(mCodecContext, nullptr); ret < 0) {
-    if (ret == AVERROR_EOF) {
-      // The encoder has been flushed. Drain can be called multiple time.
-      FFMPEGV_LOG("encoder has been flushed!");
-      return EncodePromise::CreateAndResolve(EncodedData(), __func__);
-    }
-
-    FFMPEGV_LOG("avcodec_send_frame error: %s",
-                MakeErrorString(mLib, ret).get());
-    return EncodePromise::CreateAndReject(
-        MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
-                    RESULT_DETAIL("avcodec_send_frame error")),
-        __func__);
-  }
-
-  EncodedData output;
-  while (true) {
-    int ret = mLib->avcodec_receive_packet(mCodecContext, pkt);
-    if (ret == AVERROR_EOF) {
-      FFMPEGV_LOG("encoder has no more output packet!");
-      break;
-    }
-
-    if (ret < 0) {
-      // avcodec_receive_packet should not result in a -EAGAIN once it's in
-      // draining mode.
-      FFMPEGV_LOG("avcodec_receive_packet error: %s",
-                  MakeErrorString(mLib, ret).get());
-      return EncodePromise::CreateAndReject(
-          MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
-                      RESULT_DETAIL("avcodec_receive_packet error")),
-          __func__);
-    }
-
-    RefPtr<MediaRawData> d = ToMediaRawData(pkt);
-    mLib->av_packet_unref(pkt);
-    if (!d) {
-      FFMPEGV_LOG("failed to create a MediaRawData from the AVPacket");
-      return EncodePromise::CreateAndReject(
-          MediaResult(
-              NS_ERROR_OUT_OF_MEMORY,
-              RESULT_DETAIL("Unable to get MediaRawData from AVPacket")),
-          __func__);
-    }
-    output.AppendElement(std::move(d));
-  }
-
-  FFMPEGV_LOG("get %zu encoded data", output.Length());
-
-  // TODO: Evaluate a better solution (Bug 1869466)
-  // TODO: Only re-create AVCodecContext when avcodec_flush_buffers is
-  // unavailable.
-  ShutdownInternal();
-  MediaResult r = InitInternal();
-  return NS_FAILED(r)
-             ? EncodePromise::CreateAndReject(r, __func__)
-             : EncodePromise::CreateAndResolve(std::move(output), __func__);
-}
-#endif
+#endif  // if LIBAVCODEC_VERSION_MAJOR >= 58
 
 RefPtr<MediaRawData> FFmpegVideoEncoder<LIBAV_VER>::ToMediaRawData(
     AVPacket* aPacket) {
   MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
   MOZ_ASSERT(aPacket);
 
-  // TODO: Do we need to check AV_PKT_FLAG_CORRUPT?
-
-  // Copy frame data from AVPacket.
-  auto data = MakeRefPtr<MediaRawData>();
-  UniquePtr<MediaRawDataWriter> writer(data->CreateWriter());
-  if (!writer->Append(aPacket->data, static_cast<size_t>(aPacket->size))) {
-    FFMPEGV_LOG("fail to allocate MediaRawData buffer");
-    return nullptr;  // OOM
-  }
-
-  data->mKeyframe = (aPacket->flags & AV_PKT_FLAG_KEY) != 0;
-  // TODO(bug 1869560): The unit of pts, dts, and duration is time_base, which
-  // is recommended to be the reciprocal of the frame rate, but we set it to
-  // microsecond for now.
-  data->mTime = media::TimeUnit::FromMicroseconds(aPacket->pts);
-#if LIBAVCODEC_VERSION_MAJOR >= 60
-  data->mDuration = media::TimeUnit::FromMicroseconds(aPacket->duration);
-#else
-  int64_t duration;
-  if (mDurationMap.Find(aPacket->pts, duration)) {
-    data->mDuration = media::TimeUnit::FromMicroseconds(duration);
-  } else {
-    data->mDuration = media::TimeUnit::FromMicroseconds(aPacket->duration);
-  }
-#endif
-  data->mTimecode = media::TimeUnit::FromMicroseconds(aPacket->dts);
-
-  if (auto r = GetExtraData(aPacket); r.isOk()) {
-    data->mExtraData = r.unwrap();
-  }
+  RefPtr<MediaRawData> data = ToMediaRawDataCommon(aPacket);
 
   // TODO: Is it possible to retrieve temporal layer id from underlying codec
   // instead?
diff --git a/dom/media/platforms/ffmpeg/FFmpegVideoEncoder.h b/dom/media/platforms/ffmpeg/FFmpegVideoEncoder.h
index 07c433ddd7..0ee5f52aec 100644
--- a/dom/media/platforms/ffmpeg/FFmpegVideoEncoder.h
+++ b/dom/media/platforms/ffmpeg/FFmpegVideoEncoder.h
@@ -7,28 +7,21 @@
 #ifndef DOM_MEDIA_PLATFORMS_FFMPEG_FFMPEGVIDEOENCODER_H_
 #define DOM_MEDIA_PLATFORMS_FFMPEG_FFMPEGVIDEOENCODER_H_
 
+#include "FFmpegDataEncoder.h"
 #include "FFmpegLibWrapper.h"
 #include "PlatformEncoderModule.h"
 #include "SimpleMap.h"
-#include "mozilla/ThreadSafety.h"
 
 // This must be the last header included
 #include "FFmpegLibs.h"
 
 namespace mozilla {
 
-template <int V>
-AVCodecID GetFFmpegEncoderCodecId(CodecType aCodec);
-
-template <>
-AVCodecID GetFFmpegEncoderCodecId<LIBAV_VER>(CodecType aCodec);
-
 template <int V>
 class FFmpegVideoEncoder : public MediaDataEncoder {};
 
-// TODO: Bug 1860925: FFmpegDataEncoder
 template <>
-class FFmpegVideoEncoder<LIBAV_VER> final : public MediaDataEncoder {
+class FFmpegVideoEncoder<LIBAV_VER> : public FFmpegDataEncoder<LIBAV_VER> {
   using DurationMap = SimpleMap<int64_t>;
 
  public:
@@ -36,44 +29,19 @@ class FFmpegVideoEncoder<LIBAV_VER> final : public MediaDataEncoder {
                      const RefPtr<TaskQueue>& aTaskQueue,
                      const EncoderConfig& aConfig);
 
-  /* MediaDataEncoder Methods */
-  // All methods run on the task queue, except for GetDescriptionName.
-  RefPtr<InitPromise> Init() override;
-  RefPtr<EncodePromise> Encode(const MediaData* aSample) override;
-  RefPtr<ReconfigurationPromise> Reconfigure(
-      const RefPtr<const EncoderConfigurationChangeList>& aConfigurationChanges)
-      override;
-  RefPtr<EncodePromise> Drain() override;
-  RefPtr<ShutdownPromise> Shutdown() override;
-  RefPtr<GenericPromise> SetBitrate(uint32_t aBitRate) override;
   nsCString GetDescriptionName() const override;
 
- private:
-  ~FFmpegVideoEncoder() = default;
-
+ protected:
   // Methods only called on mTaskQueue.
-  RefPtr<InitPromise> ProcessInit();
-  RefPtr<EncodePromise> ProcessEncode(RefPtr<const MediaData> aSample);
-  RefPtr<ReconfigurationPromise> ProcessReconfigure(
-      const RefPtr<const EncoderConfigurationChangeList> aConfigurationChanges);
-  RefPtr<EncodePromise> ProcessDrain();
-  RefPtr<ShutdownPromise> ProcessShutdown();
-  MediaResult InitInternal();
-  void ShutdownInternal();
-  // TODO: Share these with FFmpegDataDecoder.
-  int OpenCodecContext(const AVCodec* aCodec, AVDictionary** aOptions)
-      MOZ_EXCLUDES(sMutex);
-  void CloseCodecContext() MOZ_EXCLUDES(sMutex);
-  bool PrepareFrame();
-  void DestroyFrame();
-  bool ScaleInputFrame();
+  virtual nsresult InitSpecific() override;
 #if LIBAVCODEC_VERSION_MAJOR >= 58
-  RefPtr<EncodePromise> EncodeWithModernAPIs(RefPtr<const VideoData> aSample);
-  RefPtr<EncodePromise> DrainWithModernAPIs();
+  Result<EncodedData, nsresult> EncodeInputWithModernAPIs(
+      RefPtr<const MediaData> aSample) override;
 #endif
-  RefPtr<MediaRawData> ToMediaRawData(AVPacket* aPacket);
+  bool ScaleInputFrame();
+  virtual RefPtr<MediaRawData> ToMediaRawData(AVPacket* aPacket) override;
   Result<already_AddRefed<MediaByteBuffer>, nsresult> GetExtraData(
-      AVPacket* aPacket);
+      AVPacket* aPacket) override;
   void ForceEnablingFFmpegDebugLogs();
   struct SVCSettings {
     nsTArray<uint8_t> mTemporalLayerIds;
@@ -88,21 +56,6 @@ class FFmpegVideoEncoder<LIBAV_VER> final : public MediaDataEncoder {
     nsTArray<std::pair<nsCString, nsCString>> mSettingKeyValuePairs;
   };
   H264Settings GetH264Settings(const H264Specific& aH264Specific);
-
-  // This refers to a static FFmpegLibWrapper, so raw pointer is adequate.
-  const FFmpegLibWrapper* mLib;
-  const AVCodecID mCodecID;
-  const RefPtr<TaskQueue> mTaskQueue;
-
-  // set in constructor, modified when parameters change
-  EncoderConfig mConfig;
-
-  // mTaskQueue only.
-  nsCString mCodecName;
-  AVCodecContext* mCodecContext;
-  AVFrame* mFrame;
-  DurationMap mDurationMap;
-
   struct SVCInfo {
     explicit SVCInfo(nsTArray<uint8_t>&& aTemporalLayerIds)
         : mTemporalLayerIds(std::move(aTemporalLayerIds)), mNextIndex(0) {}
@@ -111,13 +64,9 @@ class FFmpegVideoEncoder<LIBAV_VER> final : public MediaDataEncoder {
     // Return the current temporal layer id and update the next.
     uint8_t UpdateTemporalLayerId();
   };
-  Maybe<SVCInfo> mSVCInfo;
-
-  // Provide critical-section for open/close mCodecContext.
-  // TODO: Merge this with FFmpegDataDecoder's one.
-  static StaticMutex sMutex;
+  Maybe<SVCInfo> mSVCInfo{};
 };
 
 }  // namespace mozilla
 
-#endif /* DOM_MEDIA_PLATFORMS_FFMPEG_FFMPEGVIDEOENCODER_H_ */
+#endif  // DOM_MEDIA_PLATFORMS_FFMPEG_FFMPEGVIDEOENCODER_H_
diff --git a/dom/media/platforms/ffmpeg/ffmpeg57/moz.build b/dom/media/platforms/ffmpeg/ffmpeg57/moz.build
index f26edcdc7f..db48b36f6b 100644
--- a/dom/media/platforms/ffmpeg/ffmpeg57/moz.build
+++ b/dom/media/platforms/ffmpeg/ffmpeg57/moz.build
@@ -6,7 +6,9 @@
 
 UNIFIED_SOURCES += [
     '../FFmpegAudioDecoder.cpp',
+    '../FFmpegAudioEncoder.cpp',
     '../FFmpegDataDecoder.cpp',
+    "../FFmpegDataEncoder.cpp",
     '../FFmpegDecoderModule.cpp',
     '../FFmpegEncoderModule.cpp',
     '../FFmpegVideoDecoder.cpp',
diff --git a/dom/media/platforms/ffmpeg/ffmpeg58/moz.build b/dom/media/platforms/ffmpeg/ffmpeg58/moz.build
index a22bf98abd..12e48c44f0 100644
--- a/dom/media/platforms/ffmpeg/ffmpeg58/moz.build
+++ b/dom/media/platforms/ffmpeg/ffmpeg58/moz.build
@@ -6,7 +6,9 @@
 
 UNIFIED_SOURCES += [
     '../FFmpegAudioDecoder.cpp',
+    '../FFmpegAudioEncoder.cpp',
     '../FFmpegDataDecoder.cpp',
+    "../FFmpegDataEncoder.cpp",
     '../FFmpegDecoderModule.cpp',
     '../FFmpegEncoderModule.cpp',
     '../FFmpegVideoDecoder.cpp',
diff --git a/dom/media/platforms/ffmpeg/ffmpeg59/moz.build b/dom/media/platforms/ffmpeg/ffmpeg59/moz.build
index e0c6c10ecd..c4f7b89951 100644
--- a/dom/media/platforms/ffmpeg/ffmpeg59/moz.build
+++ b/dom/media/platforms/ffmpeg/ffmpeg59/moz.build
@@ -6,7 +6,9 @@
 
 UNIFIED_SOURCES += [
     "../FFmpegAudioDecoder.cpp",
+    '../FFmpegAudioEncoder.cpp',
     "../FFmpegDataDecoder.cpp",
+    "../FFmpegDataEncoder.cpp",
     "../FFmpegDecoderModule.cpp",
     "../FFmpegEncoderModule.cpp",
     "../FFmpegVideoDecoder.cpp",
diff --git a/dom/media/platforms/ffmpeg/ffmpeg60/moz.build b/dom/media/platforms/ffmpeg/ffmpeg60/moz.build
index e0c6c10ecd..c4f7b89951 100644
--- a/dom/media/platforms/ffmpeg/ffmpeg60/moz.build
+++ b/dom/media/platforms/ffmpeg/ffmpeg60/moz.build
@@ -6,7 +6,9 @@
 
 UNIFIED_SOURCES += [
     "../FFmpegAudioDecoder.cpp",
+    '../FFmpegAudioEncoder.cpp',
     "../FFmpegDataDecoder.cpp",
+    "../FFmpegDataEncoder.cpp",
     "../FFmpegDecoderModule.cpp",
     "../FFmpegEncoderModule.cpp",
     "../FFmpegVideoDecoder.cpp",
diff --git a/dom/media/platforms/ffmpeg/ffvpx/moz.build b/dom/media/platforms/ffmpeg/ffvpx/moz.build
index 97a224b08b..bc72b6d1a7 100644
--- a/dom/media/platforms/ffmpeg/ffvpx/moz.build
+++ b/dom/media/platforms/ffmpeg/ffvpx/moz.build
@@ -11,9 +11,12 @@ EXPORTS += [
 
 UNIFIED_SOURCES += [
     "../FFmpegAudioDecoder.cpp",
+    "../FFmpegAudioEncoder.cpp",
     "../FFmpegDataDecoder.cpp",
+    "../FFmpegDataEncoder.cpp",
     "../FFmpegDecoderModule.cpp",
     "../FFmpegEncoderModule.cpp",
+    "../FFmpegUtils.cpp",
     "../FFmpegVideoDecoder.cpp",
     "../FFmpegVideoEncoder.cpp",
 ]
diff --git a/dom/media/platforms/ffmpeg/libav53/moz.build b/dom/media/platforms/ffmpeg/libav53/moz.build
index 06b226e1f1..81b8b8dcc6 100644
--- a/dom/media/platforms/ffmpeg/libav53/moz.build
+++ b/dom/media/platforms/ffmpeg/libav53/moz.build
@@ -6,7 +6,9 @@
 
 UNIFIED_SOURCES += [
     '../FFmpegAudioDecoder.cpp',
+    '../FFmpegAudioEncoder.cpp',
     '../FFmpegDataDecoder.cpp',
+    "../FFmpegDataEncoder.cpp",
     '../FFmpegDecoderModule.cpp',
     '../FFmpegEncoderModule.cpp',
     '../FFmpegVideoDecoder.cpp',
diff --git a/dom/media/platforms/ffmpeg/libav54/moz.build b/dom/media/platforms/ffmpeg/libav54/moz.build
index 06b226e1f1..81b8b8dcc6 100644
--- a/dom/media/platforms/ffmpeg/libav54/moz.build
+++ b/dom/media/platforms/ffmpeg/libav54/moz.build
@@ -6,7 +6,9 @@
 
 UNIFIED_SOURCES += [
     '../FFmpegAudioDecoder.cpp',
+    '../FFmpegAudioEncoder.cpp',
     '../FFmpegDataDecoder.cpp',
+    "../FFmpegDataEncoder.cpp",
     '../FFmpegDecoderModule.cpp',
     '../FFmpegEncoderModule.cpp',
     '../FFmpegVideoDecoder.cpp',
diff --git a/dom/media/platforms/ffmpeg/libav55/moz.build b/dom/media/platforms/ffmpeg/libav55/moz.build
index af2d4f1831..2c3d89b9b3 100644
--- a/dom/media/platforms/ffmpeg/libav55/moz.build
+++ b/dom/media/platforms/ffmpeg/libav55/moz.build
@@ -6,7 +6,9 @@
 
 UNIFIED_SOURCES += [
     '../FFmpegAudioDecoder.cpp',
+    '../FFmpegAudioEncoder.cpp',
     '../FFmpegDataDecoder.cpp',
+    "../FFmpegDataEncoder.cpp",
     '../FFmpegDecoderModule.cpp',
     '../FFmpegEncoderModule.cpp',
     '../FFmpegVideoDecoder.cpp',
diff --git a/dom/media/platforms/ffmpeg/moz.build b/dom/media/platforms/ffmpeg/moz.build
index f519b30cec..ac78eee289 100644
--- a/dom/media/platforms/ffmpeg/moz.build
+++ b/dom/media/platforms/ffmpeg/moz.build
@@ -18,9 +18,7 @@ DIRS += [
     "ffmpeg60",
 ]
 
-UNIFIED_SOURCES += [
-    "FFmpegRuntimeLinker.cpp",
-]
+UNIFIED_SOURCES += ["FFmpegRuntimeLinker.cpp"]
 
 if CONFIG["MOZ_WIDGET_TOOLKIT"] == "gtk":
     include("/ipc/chromium/chromium-config.mozbuild")
diff --git a/dom/media/platforms/moz.build b/dom/media/platforms/moz.build
index 6f71c5cc12..9a4f19aa4b 100644
--- a/dom/media/platforms/moz.build
+++ b/dom/media/platforms/moz.build
@@ -11,6 +11,7 @@ EXPORTS += [
     "agnostic/TheoraDecoder.h",
     "agnostic/VPXDecoder.h",
     "AllocationPolicy.h",
+    "EncoderConfig.h",
     "MediaCodecsSupport.h",
     "MediaTelemetryConstants.h",
     "PDMFactory.h",
@@ -32,6 +33,7 @@ UNIFIED_SOURCES += [
     "agnostic/TheoraDecoder.cpp",
     "agnostic/VPXDecoder.cpp",
     "AllocationPolicy.cpp",
+    "EncoderConfig.cpp",
     "MediaCodecsSupport.cpp",
     "PDMFactory.cpp",
     "PEMFactory.cpp",
diff --git a/dom/media/platforms/wmf/DXVA2Manager.cpp b/dom/media/platforms/wmf/DXVA2Manager.cpp
index 36b424ab8e..9efe9dab55 100644
--- a/dom/media/platforms/wmf/DXVA2Manager.cpp
+++ b/dom/media/platforms/wmf/DXVA2Manager.cpp
@@ -21,6 +21,8 @@
 #include "gfxCrashReporterUtils.h"
 #include "gfxWindowsPlatform.h"
 #include "mfapi.h"
+#include "mozilla/AppShutdown.h"
+#include "mozilla/ClearOnShutdown.h"
 #include "mozilla/StaticMutex.h"
 #include "mozilla/StaticPrefs_media.h"
 #include "mozilla/Telemetry.h"
@@ -122,6 +124,38 @@ using layers::ImageContainer;
 using namespace layers;
 using namespace gfx;
 
+StaticRefPtr<ID3D11Device> sDevice;
+StaticMutex sDeviceMutex;
+
+// We found an issue where the ID3D11VideoDecoder won't release its underlying
+// resources properly if the decoder iscreated from a compositor device by
+// ourselves. This problem has been observed with both VP9 and, reportedly, AV1
+// decoders, it does not seem to affect the H264 decoder, but the underlying
+// decoder created by MFT seems not having this issue.
+// Therefore, when checking whether we can use hardware decoding, we should use
+// a non-compositor device to create a decoder in order to prevent resource
+// leaking that can significantly degrade the performance. For the actual
+// decoding, we will still use the compositor device if it's avaiable in order
+// to avoid video copying.
+static ID3D11Device* GetDeviceForDecoderCheck() {
+  StaticMutexAutoLock lock(sDeviceMutex);
+  if (AppShutdown::IsInOrBeyond(ShutdownPhase::XPCOMShutdown)) {
+    return nullptr;
+  }
+  if (!sDevice) {
+    sDevice = gfx::DeviceManagerDx::Get()->CreateDecoderDevice(
+        {DeviceManagerDx::DeviceFlag::disableDeviceReuse});
+    auto clearOnShutdown = [] { ClearOnShutdown(&sDevice); };
+    if (!NS_IsMainThread()) {
+      Unused << NS_DispatchToMainThread(
+          NS_NewRunnableFunction(__func__, clearOnShutdown));
+    } else {
+      clearOnShutdown();
+    }
+  }
+  return sDevice.get();
+}
+
 void GetDXVA2ExtendedFormatFromMFMediaType(IMFMediaType* pType,
                                            DXVA2_ExtendedFormat* pFormat) {
   // Get the interlace mode.
@@ -362,10 +396,10 @@ class D3D11DXVA2Manager : public DXVA2Manager {
   HRESULT CreateOutputSample(RefPtr<IMFSample>& aSample,
                              ID3D11Texture2D* aTexture);
 
+  // This is used for check whether hw decoding is possible before using MFT for
+  // decoding.
   bool CanCreateDecoder(const D3D11_VIDEO_DECODER_DESC& aDesc) const;
 
-  already_AddRefed<ID3D11VideoDecoder> CreateDecoder(
-      const D3D11_VIDEO_DECODER_DESC& aDesc) const;
   void RefreshIMFSampleWrappers();
   void ReleaseAllIMFSamples();
 
@@ -618,10 +652,11 @@ D3D11DXVA2Manager::InitInternal(layers::KnowsCompositor* aKnowsCompositor,
   mDevice = aDevice;
 
   if (!mDevice) {
-    bool useHardwareWebRender =
-        aKnowsCompositor && aKnowsCompositor->UsingHardwareWebRender();
-    mDevice =
-        gfx::DeviceManagerDx::Get()->CreateDecoderDevice(useHardwareWebRender);
+    DeviceManagerDx::DeviceFlagSet flags;
+    if (aKnowsCompositor && aKnowsCompositor->UsingHardwareWebRender()) {
+      flags += DeviceManagerDx::DeviceFlag::isHardwareWebRenderInUse;
+    }
+    mDevice = gfx::DeviceManagerDx::Get()->CreateDecoderDevice(flags);
     if (!mDevice) {
       aFailureReason.AssignLiteral("Failed to create D3D11 device for decoder");
       return E_FAIL;
@@ -1155,20 +1190,26 @@ D3D11DXVA2Manager::ConfigureForSize(IMFMediaType* aInputType,
 
 bool D3D11DXVA2Manager::CanCreateDecoder(
     const D3D11_VIDEO_DECODER_DESC& aDesc) const {
-  RefPtr<ID3D11VideoDecoder> decoder = CreateDecoder(aDesc);
-  return decoder.get() != nullptr;
-}
+  RefPtr<ID3D11Device> device = GetDeviceForDecoderCheck();
+  if (!device) {
+    LOG("Can't create decoder due to lacking of ID3D11Device!");
+    return false;
+  }
 
-already_AddRefed<ID3D11VideoDecoder> D3D11DXVA2Manager::CreateDecoder(
-    const D3D11_VIDEO_DECODER_DESC& aDesc) const {
   RefPtr<ID3D11VideoDevice> videoDevice;
-  HRESULT hr = mDevice->QueryInterface(
+  HRESULT hr = device->QueryInterface(
       static_cast<ID3D11VideoDevice**>(getter_AddRefs(videoDevice)));
-  NS_ENSURE_TRUE(SUCCEEDED(hr), nullptr);
+  if (FAILED(hr)) {
+    LOG("Failed to query ID3D11VideoDevice!");
+    return false;
+  }
 
   UINT configCount = 0;
   hr = videoDevice->GetVideoDecoderConfigCount(&aDesc, &configCount);
-  NS_ENSURE_TRUE(SUCCEEDED(hr), nullptr);
+  if (FAILED(hr)) {
+    LOG("Failed to get decoder config count!");
+    return false;
+  }
 
   for (UINT i = 0; i < configCount; i++) {
     D3D11_VIDEO_DECODER_CONFIG config;
@@ -1177,10 +1218,10 @@ already_AddRefed<ID3D11VideoDecoder> D3D11DXVA2Manager::CreateDecoder(
       RefPtr<ID3D11VideoDecoder> decoder;
       hr = videoDevice->CreateVideoDecoder(&aDesc, &config,
                                            decoder.StartAssignment());
-      return decoder.forget();
+      return decoder != nullptr;
     }
   }
-  return nullptr;
+  return false;
 }
 
 /* static */
diff --git a/dom/media/platforms/wmf/MFCDMSession.cpp b/dom/media/platforms/wmf/MFCDMSession.cpp
index b797898abb..0ae4614f3b 100644
--- a/dom/media/platforms/wmf/MFCDMSession.cpp
+++ b/dom/media/platforms/wmf/MFCDMSession.cpp
@@ -11,6 +11,7 @@
 #include "MFMediaEngineUtils.h"
 #include "GMPUtils.h"  // ToHexString
 #include "mozilla/EMEUtils.h"
+#include "mozilla/dom/BindingUtils.h"
 #include "mozilla/dom/MediaKeyMessageEventBinding.h"
 #include "mozilla/dom/MediaKeyStatusMapBinding.h"
 #include "nsThreadUtils.h"
@@ -244,7 +245,7 @@ void MFCDMSession::OnSessionKeysChange() {
     nsAutoCString keyIdString(ToHexString(keyId));
     LOG("Append keyid-sz=%u, keyid=%s, status=%s", keyStatus.cbKeyId,
         keyIdString.get(),
-        ToMediaKeyStatusStr(ToMediaKeyStatus(keyStatus.eMediaKeyStatus)));
+        dom::GetEnumString(ToMediaKeyStatus(keyStatus.eMediaKeyStatus)).get());
     keyInfos.AppendElement(MFCDMKeyInformation{
         std::move(keyId), ToMediaKeyStatus(keyStatus.eMediaKeyStatus)});
   }
diff --git a/dom/media/platforms/wmf/MFMediaEngineStream.cpp b/dom/media/platforms/wmf/MFMediaEngineStream.cpp
index 70ffa50142..5875b5a17c 100644
--- a/dom/media/platforms/wmf/MFMediaEngineStream.cpp
+++ b/dom/media/platforms/wmf/MFMediaEngineStream.cpp
@@ -11,6 +11,7 @@
 #include "TimeUnits.h"
 #include "mozilla/ProfilerLabels.h"
 #include "mozilla/ProfilerMarkerTypes.h"
+#include "mozilla/ScopeExit.h"
 #include "WMF.h"
 #include "WMFUtils.h"
 
@@ -126,6 +127,13 @@ HRESULT MFMediaEngineStream::RuntimeClassInitialize(
   mTaskQueue = aParentSource->GetTaskQueue();
   MOZ_ASSERT(mTaskQueue);
   mStreamId = aStreamId;
+
+  auto errorExit = MakeScopeExit([&] {
+    SLOG("Failed to initialize media stream (id=%" PRIu64 ")", aStreamId);
+    mIsShutdown = true;
+    Unused << mMediaEventQueue->Shutdown();
+  });
+
   RETURN_IF_FAILED(wmf::MFCreateEventQueue(&mMediaEventQueue));
 
   ComPtr<IMFMediaType> mediaType;
@@ -134,6 +142,7 @@ HRESULT MFMediaEngineStream::RuntimeClassInitialize(
   RETURN_IF_FAILED(GenerateStreamDescriptor(mediaType));
   SLOG("Initialized %s (id=%" PRIu64 ", descriptorId=%lu)",
        GetDescriptionName().get(), aStreamId, mStreamDescriptorId);
+  errorExit.release();
   return S_OK;
 }
 
diff --git a/dom/media/platforms/wmf/WMFDataEncoderUtils.h b/dom/media/platforms/wmf/WMFDataEncoderUtils.h
index 7472827b49..19f04e768f 100644
--- a/dom/media/platforms/wmf/WMFDataEncoderUtils.h
+++ b/dom/media/platforms/wmf/WMFDataEncoderUtils.h
@@ -32,7 +32,6 @@ static const GUID CodecToSubtype(CodecType aCodec) {
     case CodecType::VP9:
       return MFVideoFormat_VP90;
     default:
-      MOZ_ASSERT(false, "Unsupported codec");
       return GUID_NULL;
   }
 }
diff --git a/dom/media/platforms/wmf/WMFEncoderModule.cpp b/dom/media/platforms/wmf/WMFEncoderModule.cpp
index f9f35db653..7b5af9bf50 100644
--- a/dom/media/platforms/wmf/WMFEncoderModule.cpp
+++ b/dom/media/platforms/wmf/WMFEncoderModule.cpp
@@ -12,6 +12,10 @@ namespace mozilla {
 extern LazyLogModule sPEMLog;
 
 bool WMFEncoderModule::SupportsCodec(CodecType aCodecType) const {
+  if (aCodecType > CodecType::_BeginAudio_ &&
+      aCodecType < CodecType::_EndAudio_) {
+    return false;
+  }
   return CanCreateWMFEncoder(aCodecType);
 }
 
@@ -19,6 +23,9 @@ bool WMFEncoderModule::Supports(const EncoderConfig& aConfig) const {
   if (!CanLikelyEncode(aConfig)) {
     return false;
   }
+  if (aConfig.IsAudio()) {
+    return false;
+  }
   return SupportsCodec(aConfig.mCodec);
 }
 
diff --git a/dom/media/platforms/wmf/WMFUtils.cpp b/dom/media/platforms/wmf/WMFUtils.cpp
index dda9df808e..bf5b8fe67d 100644
--- a/dom/media/platforms/wmf/WMFUtils.cpp
+++ b/dom/media/platforms/wmf/WMFUtils.cpp
@@ -333,7 +333,9 @@ GUID VideoMimeTypeToMediaFoundationSubtype(const nsACString& aMimeType) {
   if (MP4Decoder::IsHEVC(aMimeType)) {
     return MFVideoFormat_HEVC;
   }
-  NS_WARNING("Unsupport video mimetype");
+  NS_WARNING(nsAutoCString(nsDependentCString("Unsupported video mimetype ") +
+                           aMimeType)
+                 .get());
   return GUID_NULL;
 }
 
diff --git a/dom/media/platforms/wrappers/MediaChangeMonitor.cpp b/dom/media/platforms/wrappers/MediaChangeMonitor.cpp
index 46989840bf..bb7b015fab 100644
--- a/dom/media/platforms/wrappers/MediaChangeMonitor.cpp
+++ b/dom/media/platforms/wrappers/MediaChangeMonitor.cpp
@@ -800,6 +800,7 @@ RefPtr<ShutdownPromise> MediaChangeMonitor::ShutdownDecoder() {
   AssertOnThread();
   mConversionRequired.reset();
   if (mDecoder) {
+    MutexAutoLock lock(mMutex);
     RefPtr<MediaDataDecoder> decoder = std::move(mDecoder);
     return decoder->Shutdown();
   }
@@ -847,6 +848,7 @@ MediaChangeMonitor::CreateDecoder() {
           ->Then(
               GetCurrentSerialEventTarget(), __func__,
               [self = RefPtr{this}, this](RefPtr<MediaDataDecoder>&& aDecoder) {
+                MutexAutoLock lock(mMutex);
                 mDecoder = std::move(aDecoder);
                 DDLINKCHILD("decoder", mDecoder.get());
                 return CreateDecoderPromise::CreateAndResolve(true, __func__);
@@ -1095,6 +1097,11 @@ void MediaChangeMonitor::FlushThenShutdownDecoder(
       ->Track(mFlushRequest);
 }
 
+MediaDataDecoder* MediaChangeMonitor::GetDecoderOnNonOwnerThread() const {
+  MutexAutoLock lock(mMutex);
+  return mDecoder;
+}
+
 #undef LOG
 
 }  // namespace mozilla
diff --git a/dom/media/platforms/wrappers/MediaChangeMonitor.h b/dom/media/platforms/wrappers/MediaChangeMonitor.h
index a3ee5b5aa0..ff4f6921f6 100644
--- a/dom/media/platforms/wrappers/MediaChangeMonitor.h
+++ b/dom/media/platforms/wrappers/MediaChangeMonitor.h
@@ -41,34 +41,34 @@ class MediaChangeMonitor final
   RefPtr<ShutdownPromise> Shutdown() override;
   bool IsHardwareAccelerated(nsACString& aFailureReason) const override;
   nsCString GetDescriptionName() const override {
-    if (mDecoder) {
-      return mDecoder->GetDescriptionName();
+    if (RefPtr<MediaDataDecoder> decoder = GetDecoderOnNonOwnerThread()) {
+      return decoder->GetDescriptionName();
     }
     return "MediaChangeMonitor decoder (pending)"_ns;
   }
   nsCString GetProcessName() const override {
-    if (mDecoder) {
-      return mDecoder->GetProcessName();
+    if (RefPtr<MediaDataDecoder> decoder = GetDecoderOnNonOwnerThread()) {
+      return decoder->GetProcessName();
     }
     return "MediaChangeMonitor"_ns;
   }
   nsCString GetCodecName() const override {
-    if (mDecoder) {
-      return mDecoder->GetCodecName();
+    if (RefPtr<MediaDataDecoder> decoder = GetDecoderOnNonOwnerThread()) {
+      return decoder->GetCodecName();
     }
     return "MediaChangeMonitor"_ns;
   }
   void SetSeekThreshold(const media::TimeUnit& aTime) override;
   bool SupportDecoderRecycling() const override {
-    if (mDecoder) {
-      return mDecoder->SupportDecoderRecycling();
+    if (RefPtr<MediaDataDecoder> decoder = GetDecoderOnNonOwnerThread()) {
+      return decoder->SupportDecoderRecycling();
     }
     return false;
   }
 
   ConversionRequired NeedsConversion() const override {
-    if (mDecoder) {
-      return mDecoder->NeedsConversion();
+    if (RefPtr<MediaDataDecoder> decoder = GetDecoderOnNonOwnerThread()) {
+      return decoder->NeedsConversion();
     }
     // Default so no conversion is performed.
     return ConversionRequired::kNeedNone;
@@ -100,6 +100,9 @@ class MediaChangeMonitor final
     MOZ_ASSERT(!mThread || mThread->IsOnCurrentThread());
   }
 
+  // This is used for getting decoder debug info on other threads. Thread-safe.
+  MediaDataDecoder* GetDecoderOnNonOwnerThread() const;
+
   bool CanRecycleDecoder() const;
 
   typedef MozPromise<bool, MediaResult, true /* exclusive */>
@@ -140,6 +143,13 @@ class MediaChangeMonitor final
   const CreateDecoderParamsForAsync mParams;
   // Keep any seek threshold set for after decoder creation and initialization.
   Maybe<media::TimeUnit> mPendingSeekThreshold;
+
+  // This lock is used for mDecoder specifically, but it doens't need to be used
+  // for every places accessing mDecoder which is mostly on the owner thread.
+  // However, when requesting decoder debug info, it can happen on other
+  // threads, so we need this mutex to avoid the data race of
+  // creating/destroying decoder and accessing decoder's debug info.
+  mutable Mutex MOZ_ANNOTATED mMutex{"MediaChangeMonitor"};
 };
 
 }  // namespace mozilla
-- 
cgit v1.2.3