From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Sun, 7 Apr 2024 21:33:14 +0200
Subject: Adding upstream version 115.7.0esr.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 dom/media/platforms/apple/AppleATDecoder.cpp     | 672 ++++++++++++++++++++
 dom/media/platforms/apple/AppleATDecoder.h       |  80 +++
 dom/media/platforms/apple/AppleDecoderModule.cpp | 230 +++++++
 dom/media/platforms/apple/AppleDecoderModule.h   |  62 ++
 dom/media/platforms/apple/AppleEncoderModule.cpp |  25 +
 dom/media/platforms/apple/AppleEncoderModule.h   |  27 +
 dom/media/platforms/apple/AppleUtils.h           |  88 +++
 dom/media/platforms/apple/AppleVTDecoder.cpp     | 761 +++++++++++++++++++++++
 dom/media/platforms/apple/AppleVTDecoder.h       | 145 +++++
 dom/media/platforms/apple/AppleVTEncoder.cpp     | 628 +++++++++++++++++++
 dom/media/platforms/apple/AppleVTEncoder.h       |  85 +++
 11 files changed, 2803 insertions(+)
 create mode 100644 dom/media/platforms/apple/AppleATDecoder.cpp
 create mode 100644 dom/media/platforms/apple/AppleATDecoder.h
 create mode 100644 dom/media/platforms/apple/AppleDecoderModule.cpp
 create mode 100644 dom/media/platforms/apple/AppleDecoderModule.h
 create mode 100644 dom/media/platforms/apple/AppleEncoderModule.cpp
 create mode 100644 dom/media/platforms/apple/AppleEncoderModule.h
 create mode 100644 dom/media/platforms/apple/AppleUtils.h
 create mode 100644 dom/media/platforms/apple/AppleVTDecoder.cpp
 create mode 100644 dom/media/platforms/apple/AppleVTDecoder.h
 create mode 100644 dom/media/platforms/apple/AppleVTEncoder.cpp
 create mode 100644 dom/media/platforms/apple/AppleVTEncoder.h

(limited to 'dom/media/platforms/apple')

diff --git a/dom/media/platforms/apple/AppleATDecoder.cpp b/dom/media/platforms/apple/AppleATDecoder.cpp
new file mode 100644
index 0000000000..13a6be3e31
--- /dev/null
+++ b/dom/media/platforms/apple/AppleATDecoder.cpp
@@ -0,0 +1,672 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "AppleATDecoder.h"
+#include "Adts.h"
+#include "AppleUtils.h"
+#include "MP4Decoder.h"
+#include "MediaInfo.h"
+#include "VideoUtils.h"
+#include "mozilla/Logging.h"
+#include "mozilla/SyncRunnable.h"
+#include "mozilla/UniquePtr.h"
+#include "nsTArray.h"
+
+#define LOG(...) DDMOZ_LOG(sPDMLog, mozilla::LogLevel::Debug, __VA_ARGS__)
+#define LOGEX(_this, ...) \
+  DDMOZ_LOGEX(_this, sPDMLog, mozilla::LogLevel::Debug, __VA_ARGS__)
+#define FourCC2Str(n) \
+  ((char[5]){(char)(n >> 24), (char)(n >> 16), (char)(n >> 8), (char)(n), 0})
+
+namespace mozilla {
+
+AppleATDecoder::AppleATDecoder(const AudioInfo& aConfig)
+    : mConfig(aConfig),
+      mFileStreamError(false),
+      mConverter(nullptr),
+      mOutputFormat(),
+      mStream(nullptr),
+      mParsedFramesForAACMagicCookie(0),
+      mErrored(false) {
+  MOZ_COUNT_CTOR(AppleATDecoder);
+  LOG("Creating Apple AudioToolbox decoder");
+  LOG("Audio Decoder configuration: %s %d Hz %d channels %d bits per channel",
+      mConfig.mMimeType.get(), mConfig.mRate, mConfig.mChannels,
+      mConfig.mBitDepth);
+
+  if (mConfig.mMimeType.EqualsLiteral("audio/mpeg")) {
+    mFormatID = kAudioFormatMPEGLayer3;
+  } else if (mConfig.mMimeType.EqualsLiteral("audio/mp4a-latm")) {
+    mFormatID = kAudioFormatMPEG4AAC;
+    if (aConfig.mCodecSpecificConfig.is<AacCodecSpecificData>()) {
+      const AacCodecSpecificData& aacCodecSpecificData =
+          aConfig.mCodecSpecificConfig.as<AacCodecSpecificData>();
+      mEncoderDelay = aacCodecSpecificData.mEncoderDelayFrames;
+      mTotalMediaFrames = aacCodecSpecificData.mMediaFrameCount;
+      LOG("AppleATDecoder (aac), found encoder delay (%" PRIu32
+          ") and total frame count (%" PRIu64 ") in codec-specific side data",
+          mEncoderDelay, mTotalMediaFrames);
+    }
+  } else {
+    mFormatID = 0;
+  }
+}
+
+AppleATDecoder::~AppleATDecoder() {
+  MOZ_COUNT_DTOR(AppleATDecoder);
+  MOZ_ASSERT(!mConverter);
+}
+
+RefPtr<MediaDataDecoder::InitPromise> AppleATDecoder::Init() {
+  if (!mFormatID) {
+    return InitPromise::CreateAndReject(
+        MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
+                    RESULT_DETAIL("Non recognised format")),
+        __func__);
+  }
+  mThread = GetCurrentSerialEventTarget();
+
+  return InitPromise::CreateAndResolve(TrackType::kAudioTrack, __func__);
+}
+
+RefPtr<MediaDataDecoder::FlushPromise> AppleATDecoder::Flush() {
+  MOZ_ASSERT(mThread->IsOnCurrentThread());
+  LOG("Flushing AudioToolbox AAC decoder");
+  mQueuedSamples.Clear();
+  mDecodedSamples.Clear();
+
+  if (mConverter) {
+    OSStatus rv = AudioConverterReset(mConverter);
+    if (rv) {
+      LOG("Error %d resetting AudioConverter", static_cast<int>(rv));
+    }
+  }
+  if (mErrored) {
+    mParsedFramesForAACMagicCookie = 0;
+    mMagicCookie.Clear();
+    ProcessShutdown();
+    mErrored = false;
+  }
+  return FlushPromise::CreateAndResolve(true, __func__);
+}
+
+RefPtr<MediaDataDecoder::DecodePromise> AppleATDecoder::Drain() {
+  MOZ_ASSERT(mThread->IsOnCurrentThread());
+  LOG("Draining AudioToolbox AAC decoder");
+  return DecodePromise::CreateAndResolve(DecodedData(), __func__);
+}
+
+RefPtr<ShutdownPromise> AppleATDecoder::Shutdown() {
+  // mThread may not be set if Init hasn't been called first.
+  MOZ_ASSERT(!mThread || mThread->IsOnCurrentThread());
+  ProcessShutdown();
+  return ShutdownPromise::CreateAndResolve(true, __func__);
+}
+
+void AppleATDecoder::ProcessShutdown() {
+  // mThread may not be set if Init hasn't been called first.
+  MOZ_ASSERT(!mThread || mThread->IsOnCurrentThread());
+
+  if (mStream) {
+    OSStatus rv = AudioFileStreamClose(mStream);
+    if (rv) {
+      LOG("error %d disposing of AudioFileStream", static_cast<int>(rv));
+      return;
+    }
+    mStream = nullptr;
+  }
+
+  if (mConverter) {
+    LOG("Shutdown: Apple AudioToolbox AAC decoder");
+    OSStatus rv = AudioConverterDispose(mConverter);
+    if (rv) {
+      LOG("error %d disposing of AudioConverter", static_cast<int>(rv));
+    }
+    mConverter = nullptr;
+  }
+}
+
+nsCString AppleATDecoder::GetCodecName() const {
+  switch (mFormatID) {
+    case kAudioFormatMPEGLayer3:
+      return "mp3"_ns;
+    case kAudioFormatMPEG4AAC:
+      return "aac"_ns;
+    default:
+      return "unknown"_ns;
+  }
+}
+
+struct PassthroughUserData {
+  UInt32 mChannels;
+  UInt32 mDataSize;
+  const void* mData;
+  AudioStreamPacketDescription mPacket;
+};
+
+// Error value we pass through the decoder to signal that nothing
+// has gone wrong during decoding and we're done processing the packet.
+const uint32_t kNoMoreDataErr = 'MOAR';
+
+static OSStatus _PassthroughInputDataCallback(
+    AudioConverterRef aAudioConverter, UInt32* aNumDataPackets /* in/out */,
+    AudioBufferList* aData /* in/out */,
+    AudioStreamPacketDescription** aPacketDesc, void* aUserData) {
+  PassthroughUserData* userData = (PassthroughUserData*)aUserData;
+  if (!userData->mDataSize) {
+    *aNumDataPackets = 0;
+    return kNoMoreDataErr;
+  }
+
+  if (aPacketDesc) {
+    userData->mPacket.mStartOffset = 0;
+    userData->mPacket.mVariableFramesInPacket = 0;
+    userData->mPacket.mDataByteSize = userData->mDataSize;
+    *aPacketDesc = &userData->mPacket;
+  }
+
+  aData->mBuffers[0].mNumberChannels = userData->mChannels;
+  aData->mBuffers[0].mDataByteSize = userData->mDataSize;
+  aData->mBuffers[0].mData = const_cast<void*>(userData->mData);
+
+  // No more data to provide following this run.
+  userData->mDataSize = 0;
+
+  return noErr;
+}
+
+RefPtr<MediaDataDecoder::DecodePromise> AppleATDecoder::Decode(
+    MediaRawData* aSample) {
+  MOZ_ASSERT(mThread->IsOnCurrentThread());
+  LOG("mp4 input sample pts=%s duration=%s %s %llu bytes audio",
+      aSample->mTime.ToString().get(), aSample->GetEndTime().ToString().get(),
+      aSample->mKeyframe ? " keyframe" : "",
+      (unsigned long long)aSample->Size());
+
+  MediaResult rv = NS_OK;
+  if (!mConverter) {
+    rv = SetupDecoder(aSample);
+    if (rv != NS_OK && rv != NS_ERROR_NOT_INITIALIZED) {
+      return DecodePromise::CreateAndReject(rv, __func__);
+    }
+  }
+
+  mQueuedSamples.AppendElement(aSample);
+
+  if (rv == NS_OK) {
+    for (size_t i = 0; i < mQueuedSamples.Length(); i++) {
+      rv = DecodeSample(mQueuedSamples[i]);
+      if (NS_FAILED(rv)) {
+        mErrored = true;
+        return DecodePromise::CreateAndReject(rv, __func__);
+      }
+    }
+    mQueuedSamples.Clear();
+  }
+
+  DecodedData results = std::move(mDecodedSamples);
+  mDecodedSamples = DecodedData();
+  return DecodePromise::CreateAndResolve(std::move(results), __func__);
+}
+
+MediaResult AppleATDecoder::DecodeSample(MediaRawData* aSample) {
+  MOZ_ASSERT(mThread->IsOnCurrentThread());
+
+  // Array containing the queued decoded audio frames, about to be output.
+  nsTArray<AudioDataValue> outputData;
+  UInt32 channels = mOutputFormat.mChannelsPerFrame;
+  // Pick a multiple of the frame size close to a power of two
+  // for efficient allocation. We're mainly using this decoder to decode AAC,
+  // that has packets of 1024 audio frames.
+  const uint32_t MAX_AUDIO_FRAMES = 1024;
+  const uint32_t maxDecodedSamples = MAX_AUDIO_FRAMES * channels;
+
+  // Descriptions for _decompressed_ audio packets. ignored.
+  auto packets = MakeUnique<AudioStreamPacketDescription[]>(MAX_AUDIO_FRAMES);
+
+  // This API insists on having packets spoon-fed to it from a callback.
+  // This structure exists only to pass our state.
+  PassthroughUserData userData = {channels, (UInt32)aSample->Size(),
+                                  aSample->Data()};
+
+  // Decompressed audio buffer
+  AlignedAudioBuffer decoded(maxDecodedSamples);
+  if (!decoded) {
+    return NS_ERROR_OUT_OF_MEMORY;
+  }
+
+  do {
+    AudioBufferList decBuffer;
+    decBuffer.mNumberBuffers = 1;
+    decBuffer.mBuffers[0].mNumberChannels = channels;
+    decBuffer.mBuffers[0].mDataByteSize =
+        maxDecodedSamples * sizeof(AudioDataValue);
+    decBuffer.mBuffers[0].mData = decoded.get();
+
+    // in: the max number of packets we can handle from the decoder.
+    // out: the number of packets the decoder is actually returning.
+    UInt32 numFrames = MAX_AUDIO_FRAMES;
+
+    OSStatus rv = AudioConverterFillComplexBuffer(
+        mConverter, _PassthroughInputDataCallback, &userData,
+        &numFrames /* in/out */, &decBuffer, packets.get());
+
+    if (rv && rv != kNoMoreDataErr) {
+      LOG("Error decoding audio sample: %d\n", static_cast<int>(rv));
+      return MediaResult(
+          NS_ERROR_DOM_MEDIA_DECODE_ERR,
+          RESULT_DETAIL("Error decoding audio sample: %d @ %s",
+                        static_cast<int>(rv), aSample->mTime.ToString().get()));
+    }
+
+    if (numFrames) {
+      AudioDataValue* outputFrames = decoded.get();
+      outputData.AppendElements(outputFrames, numFrames * channels);
+    }
+
+    if (rv == kNoMoreDataErr) {
+      break;
+    }
+  } while (true);
+
+  if (outputData.IsEmpty()) {
+    return NS_OK;
+  }
+
+  size_t numFrames = outputData.Length() / channels;
+  int rate = mOutputFormat.mSampleRate;
+  media::TimeUnit duration(numFrames, rate);
+  if (!duration.IsValid()) {
+    NS_WARNING("Invalid count of accumulated audio samples");
+    return MediaResult(
+        NS_ERROR_DOM_MEDIA_OVERFLOW_ERR,
+        RESULT_DETAIL(
+            "Invalid count of accumulated audio samples: num:%llu rate:%d",
+            uint64_t(numFrames), rate));
+  }
+
+  LOG("Decoded audio packet [%s, %s] (duration: %s)\n",
+      aSample->mTime.ToString().get(), aSample->GetEndTime().ToString().get(),
+      duration.ToString().get());
+
+  AudioSampleBuffer data(outputData.Elements(), outputData.Length());
+  if (!data.Data()) {
+    return NS_ERROR_OUT_OF_MEMORY;
+  }
+  if (mChannelLayout && !mAudioConverter) {
+    AudioConfig in(*mChannelLayout, channels, rate);
+    AudioConfig out(AudioConfig::ChannelLayout::SMPTEDefault(*mChannelLayout),
+                    channels, rate);
+    mAudioConverter = MakeUnique<AudioConverter>(in, out);
+  }
+  if (mAudioConverter && mChannelLayout && mChannelLayout->IsValid()) {
+    MOZ_ASSERT(mAudioConverter->CanWorkInPlace());
+    data = mAudioConverter->Process(std::move(data));
+  }
+
+  RefPtr<AudioData> audio = new AudioData(
+      aSample->mOffset, aSample->mTime, data.Forget(), channels, rate,
+      mChannelLayout && mChannelLayout->IsValid()
+          ? mChannelLayout->Map()
+          : AudioConfig::ChannelLayout::UNKNOWN_MAP);
+  MOZ_DIAGNOSTIC_ASSERT(duration == audio->mDuration, "must be equal");
+  mDecodedSamples.AppendElement(std::move(audio));
+  return NS_OK;
+}
+
+MediaResult AppleATDecoder::GetInputAudioDescription(
+    AudioStreamBasicDescription& aDesc, const nsTArray<uint8_t>& aExtraData) {
+  MOZ_ASSERT(mThread->IsOnCurrentThread());
+
+  // Request the properties from CoreAudio using the codec magic cookie
+  AudioFormatInfo formatInfo;
+  PodZero(&formatInfo.mASBD);
+  formatInfo.mASBD.mFormatID = mFormatID;
+  if (mFormatID == kAudioFormatMPEG4AAC) {
+    formatInfo.mASBD.mFormatFlags = mConfig.mExtendedProfile;
+  }
+  formatInfo.mMagicCookieSize = aExtraData.Length();
+  formatInfo.mMagicCookie = aExtraData.Elements();
+
+  UInt32 formatListSize;
+  // Attempt to retrieve the default format using
+  // kAudioFormatProperty_FormatInfo method.
+  // This method only retrieves the FramesPerPacket information required
+  // by the decoder, which depends on the codec type and profile.
+  aDesc.mFormatID = mFormatID;
+  aDesc.mChannelsPerFrame = mConfig.mChannels;
+  aDesc.mSampleRate = mConfig.mRate;
+  UInt32 inputFormatSize = sizeof(aDesc);
+  OSStatus rv = AudioFormatGetProperty(kAudioFormatProperty_FormatInfo, 0, NULL,
+                                       &inputFormatSize, &aDesc);
+  if (NS_WARN_IF(rv)) {
+    return MediaResult(
+        NS_ERROR_FAILURE,
+        RESULT_DETAIL("Unable to get format info:%d", int32_t(rv)));
+  }
+
+  // If any of the methods below fail, we will return the default format as
+  // created using kAudioFormatProperty_FormatInfo above.
+  rv = AudioFormatGetPropertyInfo(kAudioFormatProperty_FormatList,
+                                  sizeof(formatInfo), &formatInfo,
+                                  &formatListSize);
+  if (rv || (formatListSize % sizeof(AudioFormatListItem))) {
+    return NS_OK;
+  }
+  size_t listCount = formatListSize / sizeof(AudioFormatListItem);
+  auto formatList = MakeUnique<AudioFormatListItem[]>(listCount);
+
+  rv = AudioFormatGetProperty(kAudioFormatProperty_FormatList,
+                              sizeof(formatInfo), &formatInfo, &formatListSize,
+                              formatList.get());
+  if (rv) {
+    return NS_OK;
+  }
+  LOG("found %zu available audio stream(s)",
+      formatListSize / sizeof(AudioFormatListItem));
+  // Get the index number of the first playable format.
+  // This index number will be for the highest quality layer the platform
+  // is capable of playing.
+  UInt32 itemIndex;
+  UInt32 indexSize = sizeof(itemIndex);
+  rv = AudioFormatGetProperty(kAudioFormatProperty_FirstPlayableFormatFromList,
+                              formatListSize, formatList.get(), &indexSize,
+                              &itemIndex);
+  if (rv) {
+    return NS_OK;
+  }
+
+  aDesc = formatList[itemIndex].mASBD;
+
+  return NS_OK;
+}
+
+AudioConfig::Channel ConvertChannelLabel(AudioChannelLabel id) {
+  switch (id) {
+    case kAudioChannelLabel_Left:
+      return AudioConfig::CHANNEL_FRONT_LEFT;
+    case kAudioChannelLabel_Right:
+      return AudioConfig::CHANNEL_FRONT_RIGHT;
+    case kAudioChannelLabel_Mono:
+    case kAudioChannelLabel_Center:
+      return AudioConfig::CHANNEL_FRONT_CENTER;
+    case kAudioChannelLabel_LFEScreen:
+      return AudioConfig::CHANNEL_LFE;
+    case kAudioChannelLabel_LeftSurround:
+      return AudioConfig::CHANNEL_SIDE_LEFT;
+    case kAudioChannelLabel_RightSurround:
+      return AudioConfig::CHANNEL_SIDE_RIGHT;
+    case kAudioChannelLabel_CenterSurround:
+      return AudioConfig::CHANNEL_BACK_CENTER;
+    case kAudioChannelLabel_RearSurroundLeft:
+      return AudioConfig::CHANNEL_BACK_LEFT;
+    case kAudioChannelLabel_RearSurroundRight:
+      return AudioConfig::CHANNEL_BACK_RIGHT;
+    default:
+      return AudioConfig::CHANNEL_INVALID;
+  }
+}
+
+// Will set mChannelLayout if a channel layout could properly be identified
+// and is supported.
+nsresult AppleATDecoder::SetupChannelLayout() {
+  MOZ_ASSERT(mThread->IsOnCurrentThread());
+
+  // Determine the channel layout.
+  UInt32 propertySize;
+  UInt32 size;
+  OSStatus status = AudioConverterGetPropertyInfo(
+      mConverter, kAudioConverterOutputChannelLayout, &propertySize, NULL);
+  if (status || !propertySize) {
+    LOG("Couldn't get channel layout property (%s)", FourCC2Str(status));
+    return NS_ERROR_FAILURE;
+  }
+
+  auto data = MakeUnique<uint8_t[]>(propertySize);
+  size = propertySize;
+  status = AudioConverterGetProperty(
+      mConverter, kAudioConverterInputChannelLayout, &size, data.get());
+  if (status || size != propertySize) {
+    LOG("Couldn't get channel layout property (%s)", FourCC2Str(status));
+    return NS_ERROR_FAILURE;
+  }
+
+  AudioChannelLayout* layout =
+      reinterpret_cast<AudioChannelLayout*>(data.get());
+  AudioChannelLayoutTag tag = layout->mChannelLayoutTag;
+
+  // if tag is kAudioChannelLayoutTag_UseChannelDescriptions then the structure
+  // directly contains the the channel layout mapping.
+  // If tag is kAudioChannelLayoutTag_UseChannelBitmap then the layout will
+  // be defined via the bitmap and can be retrieved using
+  // kAudioFormatProperty_ChannelLayoutForBitmap property.
+  // Otherwise the tag itself describes the layout.
+  if (tag != kAudioChannelLayoutTag_UseChannelDescriptions) {
+    AudioFormatPropertyID property =
+        tag == kAudioChannelLayoutTag_UseChannelBitmap
+            ? kAudioFormatProperty_ChannelLayoutForBitmap
+            : kAudioFormatProperty_ChannelLayoutForTag;
+
+    if (property == kAudioFormatProperty_ChannelLayoutForBitmap) {
+      status = AudioFormatGetPropertyInfo(
+          property, sizeof(UInt32), &layout->mChannelBitmap, &propertySize);
+    } else {
+      status = AudioFormatGetPropertyInfo(
+          property, sizeof(AudioChannelLayoutTag), &tag, &propertySize);
+    }
+    if (status || !propertySize) {
+      LOG("Couldn't get channel layout property info (%s:%s)",
+          FourCC2Str(property), FourCC2Str(status));
+      return NS_ERROR_FAILURE;
+    }
+    data = MakeUnique<uint8_t[]>(propertySize);
+    layout = reinterpret_cast<AudioChannelLayout*>(data.get());
+    size = propertySize;
+
+    if (property == kAudioFormatProperty_ChannelLayoutForBitmap) {
+      status = AudioFormatGetProperty(property, sizeof(UInt32),
+                                      &layout->mChannelBitmap, &size, layout);
+    } else {
+      status = AudioFormatGetProperty(property, sizeof(AudioChannelLayoutTag),
+                                      &tag, &size, layout);
+    }
+    if (status || size != propertySize) {
+      LOG("Couldn't get channel layout property (%s:%s)", FourCC2Str(property),
+          FourCC2Str(status));
+      return NS_ERROR_FAILURE;
+    }
+    // We have retrieved the channel layout from the tag or bitmap.
+    // We can now directly use the channel descriptions.
+    layout->mChannelLayoutTag = kAudioChannelLayoutTag_UseChannelDescriptions;
+  }
+
+  if (layout->mNumberChannelDescriptions != mOutputFormat.mChannelsPerFrame) {
+    LOG("Not matching the original channel number");
+    return NS_ERROR_FAILURE;
+  }
+
+  AutoTArray<AudioConfig::Channel, 8> channels;
+  channels.SetLength(layout->mNumberChannelDescriptions);
+  for (uint32_t i = 0; i < layout->mNumberChannelDescriptions; i++) {
+    AudioChannelLabel id = layout->mChannelDescriptions[i].mChannelLabel;
+    AudioConfig::Channel channel = ConvertChannelLabel(id);
+    channels[i] = channel;
+  }
+  mChannelLayout = MakeUnique<AudioConfig::ChannelLayout>(
+      mOutputFormat.mChannelsPerFrame, channels.Elements());
+  return NS_OK;
+}
+
+MediaResult AppleATDecoder::SetupDecoder(MediaRawData* aSample) {
+  MOZ_ASSERT(mThread->IsOnCurrentThread());
+  static const uint32_t MAX_FRAMES = 2;
+
+  if (mFormatID == kAudioFormatMPEG4AAC && mConfig.mExtendedProfile == 2 &&
+      mParsedFramesForAACMagicCookie < MAX_FRAMES) {
+    // Check for implicit SBR signalling if stream is AAC-LC
+    // This will provide us with an updated magic cookie for use with
+    // GetInputAudioDescription.
+    if (NS_SUCCEEDED(GetImplicitAACMagicCookie(aSample)) &&
+        !mMagicCookie.Length()) {
+      // nothing found yet, will try again later
+      mParsedFramesForAACMagicCookie++;
+      return NS_ERROR_NOT_INITIALIZED;
+    }
+    // An error occurred, fallback to using default stream description
+  }
+
+  LOG("Initializing Apple AudioToolbox decoder");
+
+  // Should we try and use magic cookie data from the AAC data? We do this if
+  // - We have an AAC config &
+  // - We do not aleady have magic cookie data.
+  // Otherwise we just use the existing cookie (which may be empty).
+  bool shouldUseAacMagicCookie =
+      mConfig.mCodecSpecificConfig.is<AacCodecSpecificData>() &&
+      mMagicCookie.IsEmpty();
+
+  nsTArray<uint8_t>& magicCookie =
+      shouldUseAacMagicCookie
+          ? *mConfig.mCodecSpecificConfig.as<AacCodecSpecificData>()
+                 .mEsDescriptorBinaryBlob
+          : mMagicCookie;
+  AudioStreamBasicDescription inputFormat;
+  PodZero(&inputFormat);
+
+  MediaResult rv = GetInputAudioDescription(inputFormat, magicCookie);
+  if (NS_FAILED(rv)) {
+    return rv;
+  }
+  // Fill in the output format manually.
+  PodZero(&mOutputFormat);
+  mOutputFormat.mFormatID = kAudioFormatLinearPCM;
+  mOutputFormat.mSampleRate = inputFormat.mSampleRate;
+  mOutputFormat.mChannelsPerFrame = inputFormat.mChannelsPerFrame;
+#if defined(MOZ_SAMPLE_TYPE_FLOAT32)
+  mOutputFormat.mBitsPerChannel = 32;
+  mOutputFormat.mFormatFlags = kLinearPCMFormatFlagIsFloat | 0;
+#elif defined(MOZ_SAMPLE_TYPE_S16)
+  mOutputFormat.mBitsPerChannel = 16;
+  mOutputFormat.mFormatFlags = kLinearPCMFormatFlagIsSignedInteger | 0;
+#else
+#  error Unknown audio sample type
+#endif
+  // Set up the decoder so it gives us one sample per frame
+  mOutputFormat.mFramesPerPacket = 1;
+  mOutputFormat.mBytesPerPacket = mOutputFormat.mBytesPerFrame =
+      mOutputFormat.mChannelsPerFrame * mOutputFormat.mBitsPerChannel / 8;
+
+  OSStatus status =
+      AudioConverterNew(&inputFormat, &mOutputFormat, &mConverter);
+  if (status) {
+    LOG("Error %d constructing AudioConverter", int(status));
+    mConverter = nullptr;
+    return MediaResult(
+        NS_ERROR_FAILURE,
+        RESULT_DETAIL("Error constructing AudioConverter:%d", int32_t(status)));
+  }
+
+  if (magicCookie.Length() && mFormatID == kAudioFormatMPEG4AAC) {
+    status = AudioConverterSetProperty(
+        mConverter, kAudioConverterDecompressionMagicCookie,
+        magicCookie.Length(), magicCookie.Elements());
+    if (status) {
+      LOG("Error setting AudioConverter AAC cookie:%d", int32_t(status));
+      ProcessShutdown();
+      return MediaResult(
+          NS_ERROR_FAILURE,
+          RESULT_DETAIL("Error setting AudioConverter AAC cookie:%d",
+                        int32_t(status)));
+    }
+  }
+
+  if (NS_FAILED(SetupChannelLayout())) {
+    NS_WARNING("Couldn't retrieve channel layout, will use default layout");
+  }
+
+  return NS_OK;
+}
+
+static void _MetadataCallback(void* aAppleATDecoder, AudioFileStreamID aStream,
+                              AudioFileStreamPropertyID aProperty,
+                              UInt32* aFlags) {
+  AppleATDecoder* decoder = static_cast<AppleATDecoder*>(aAppleATDecoder);
+  MOZ_RELEASE_ASSERT(decoder->mThread->IsOnCurrentThread());
+
+  LOGEX(decoder, "MetadataCallback receiving: '%s'", FourCC2Str(aProperty));
+  if (aProperty == kAudioFileStreamProperty_MagicCookieData) {
+    UInt32 size;
+    Boolean writeable;
+    OSStatus rv =
+        AudioFileStreamGetPropertyInfo(aStream, aProperty, &size, &writeable);
+    if (rv) {
+      LOGEX(decoder, "Couldn't get property info for '%s' (%s)",
+            FourCC2Str(aProperty), FourCC2Str(rv));
+      decoder->mFileStreamError = true;
+      return;
+    }
+    auto data = MakeUnique<uint8_t[]>(size);
+    rv = AudioFileStreamGetProperty(aStream, aProperty, &size, data.get());
+    if (rv) {
+      LOGEX(decoder, "Couldn't get property '%s' (%s)", FourCC2Str(aProperty),
+            FourCC2Str(rv));
+      decoder->mFileStreamError = true;
+      return;
+    }
+    decoder->mMagicCookie.AppendElements(data.get(), size);
+  }
+}
+
+static void _SampleCallback(void* aSBR, UInt32 aNumBytes, UInt32 aNumPackets,
+                            const void* aData,
+                            AudioStreamPacketDescription* aPackets) {}
+
+nsresult AppleATDecoder::GetImplicitAACMagicCookie(
+    const MediaRawData* aSample) {
+  MOZ_ASSERT(mThread->IsOnCurrentThread());
+
+  // Prepend ADTS header to AAC audio.
+  RefPtr<MediaRawData> adtssample(aSample->Clone());
+  if (!adtssample) {
+    return NS_ERROR_OUT_OF_MEMORY;
+  }
+  int8_t frequency_index = Adts::GetFrequencyIndex(mConfig.mRate);
+
+  bool rv = Adts::ConvertSample(mConfig.mChannels, frequency_index,
+                                mConfig.mProfile, adtssample);
+  if (!rv) {
+    NS_WARNING("Failed to apply ADTS header");
+    return NS_ERROR_FAILURE;
+  }
+  if (!mStream) {
+    OSStatus rv = AudioFileStreamOpen(this, _MetadataCallback, _SampleCallback,
+                                      kAudioFileAAC_ADTSType, &mStream);
+    if (rv) {
+      NS_WARNING("Couldn't open AudioFileStream");
+      return NS_ERROR_FAILURE;
+    }
+  }
+
+  OSStatus status = AudioFileStreamParseBytes(
+      mStream, adtssample->Size(), adtssample->Data(), 0 /* discontinuity */);
+  if (status) {
+    NS_WARNING("Couldn't parse sample");
+  }
+
+  if (status || mFileStreamError || mMagicCookie.Length()) {
+    // We have decoded a magic cookie or an error occurred as such
+    // we won't need the stream any longer.
+    AudioFileStreamClose(mStream);
+    mStream = nullptr;
+  }
+
+  return (mFileStreamError || status) ? NS_ERROR_FAILURE : NS_OK;
+}
+
+}  // namespace mozilla
+
+#undef LOG
+#undef LOGEX
diff --git a/dom/media/platforms/apple/AppleATDecoder.h b/dom/media/platforms/apple/AppleATDecoder.h
new file mode 100644
index 0000000000..d7aba2aacb
--- /dev/null
+++ b/dom/media/platforms/apple/AppleATDecoder.h
@@ -0,0 +1,80 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_AppleATDecoder_h
+#define mozilla_AppleATDecoder_h
+
+#include <AudioToolbox/AudioToolbox.h>
+#include "PlatformDecoderModule.h"
+#include "mozilla/Vector.h"
+#include "AudioConverter.h"
+
+namespace mozilla {
+
+class TaskQueue;
+
+DDLoggedTypeDeclNameAndBase(AppleATDecoder, MediaDataDecoder);
+
+class AppleATDecoder final : public MediaDataDecoder,
+                             public DecoderDoctorLifeLogger<AppleATDecoder> {
+ public:
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(AppleATDecoder, final);
+
+  explicit AppleATDecoder(const AudioInfo& aConfig);
+
+  RefPtr<InitPromise> Init() override;
+  RefPtr<DecodePromise> Decode(MediaRawData* aSample) override;
+  RefPtr<DecodePromise> Drain() override;
+  RefPtr<FlushPromise> Flush() override;
+  RefPtr<ShutdownPromise> Shutdown() override;
+
+  nsCString GetDescriptionName() const override {
+    return "apple coremedia decoder"_ns;
+  }
+
+  nsCString GetCodecName() const override;
+
+  // Callbacks also need access to the config.
+  const AudioInfo mConfig;
+
+  // Use to extract magic cookie for HE-AAC detection.
+  nsTArray<uint8_t> mMagicCookie;
+  // Will be set to true should an error occurred while attempting to retrieve
+  // the magic cookie property.
+  bool mFileStreamError;
+
+  nsCOMPtr<nsISerialEventTarget> mThread;
+
+ private:
+  ~AppleATDecoder();
+
+  AudioConverterRef mConverter;
+  AudioStreamBasicDescription mOutputFormat;
+  UInt32 mFormatID;
+  AudioFileStreamID mStream;
+  nsTArray<RefPtr<MediaRawData>> mQueuedSamples;
+  UniquePtr<AudioConfig::ChannelLayout> mChannelLayout;
+  UniquePtr<AudioConverter> mAudioConverter;
+  DecodedData mDecodedSamples;
+
+  void ProcessShutdown();
+  MediaResult DecodeSample(MediaRawData* aSample);
+  MediaResult GetInputAudioDescription(AudioStreamBasicDescription& aDesc,
+                                       const nsTArray<uint8_t>& aExtraData);
+  // Setup AudioConverter once all information required has been gathered.
+  // Will return NS_ERROR_NOT_INITIALIZED if more data is required.
+  MediaResult SetupDecoder(MediaRawData* aSample);
+  nsresult GetImplicitAACMagicCookie(const MediaRawData* aSample);
+  nsresult SetupChannelLayout();
+  uint32_t mParsedFramesForAACMagicCookie;
+  uint32_t mEncoderDelay = 0;
+  uint64_t mTotalMediaFrames = 0;
+  bool mErrored;
+};
+
+}  // namespace mozilla
+
+#endif  // mozilla_AppleATDecoder_h
diff --git a/dom/media/platforms/apple/AppleDecoderModule.cpp b/dom/media/platforms/apple/AppleDecoderModule.cpp
new file mode 100644
index 0000000000..2e17d93313
--- /dev/null
+++ b/dom/media/platforms/apple/AppleDecoderModule.cpp
@@ -0,0 +1,230 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "AppleDecoderModule.h"
+
+#include <dlfcn.h>
+
+#include "AppleATDecoder.h"
+#include "AppleVTDecoder.h"
+#include "MP4Decoder.h"
+#include "VideoUtils.h"
+#include "VPXDecoder.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/Logging.h"
+#include "mozilla/StaticPrefs_media.h"
+#include "mozilla/gfx/gfxVars.h"
+
+extern "C" {
+// Only exists from MacOS 11
+extern void VTRegisterSupplementalVideoDecoderIfAvailable(
+    CMVideoCodecType codecType) __attribute__((weak_import));
+extern Boolean VTIsHardwareDecodeSupported(CMVideoCodecType codecType)
+    __attribute__((weak_import));
+}
+
+namespace mozilla {
+
+bool AppleDecoderModule::sInitialized = false;
+bool AppleDecoderModule::sCanUseVP9Decoder = false;
+
+/* static */
+void AppleDecoderModule::Init() {
+  if (sInitialized) {
+    return;
+  }
+
+  sInitialized = true;
+  if (RegisterSupplementalVP9Decoder()) {
+    sCanUseVP9Decoder = CanCreateHWDecoder(media::MediaCodec::VP9);
+  }
+}
+
+nsresult AppleDecoderModule::Startup() {
+  if (!sInitialized) {
+    return NS_ERROR_FAILURE;
+  }
+  return NS_OK;
+}
+
+already_AddRefed<MediaDataDecoder> AppleDecoderModule::CreateVideoDecoder(
+    const CreateDecoderParams& aParams) {
+  if (Supports(SupportDecoderParams(aParams), nullptr /* diagnostics */) ==
+      media::DecodeSupport::Unsupported) {
+    return nullptr;
+  }
+  RefPtr<MediaDataDecoder> decoder;
+  if (IsVideoSupported(aParams.VideoConfig(), aParams.mOptions)) {
+    decoder = new AppleVTDecoder(aParams.VideoConfig(), aParams.mImageContainer,
+                                 aParams.mOptions, aParams.mKnowsCompositor,
+                                 aParams.mTrackingId);
+  }
+  return decoder.forget();
+}
+
+already_AddRefed<MediaDataDecoder> AppleDecoderModule::CreateAudioDecoder(
+    const CreateDecoderParams& aParams) {
+  if (Supports(SupportDecoderParams(aParams), nullptr /* diagnostics */) ==
+      media::DecodeSupport::Unsupported) {
+    return nullptr;
+  }
+  RefPtr<MediaDataDecoder> decoder = new AppleATDecoder(aParams.AudioConfig());
+  return decoder.forget();
+}
+
+media::DecodeSupportSet AppleDecoderModule::SupportsMimeType(
+    const nsACString& aMimeType, DecoderDoctorDiagnostics* aDiagnostics) const {
+  bool checkSupport = (aMimeType.EqualsLiteral("audio/mpeg") &&
+                       !StaticPrefs::media_ffvpx_mp3_enabled()) ||
+                      aMimeType.EqualsLiteral("audio/mp4a-latm") ||
+                      MP4Decoder::IsH264(aMimeType) ||
+                      VPXDecoder::IsVP9(aMimeType);
+  media::DecodeSupportSet supportType{media::DecodeSupport::Unsupported};
+
+  if (checkSupport) {
+    UniquePtr<TrackInfo> trackInfo = CreateTrackInfoWithMIMEType(aMimeType);
+    if (!trackInfo) {
+      supportType = media::DecodeSupport::Unsupported;
+    } else if (trackInfo->IsAudio()) {
+      supportType = media::DecodeSupport::SoftwareDecode;
+    } else {
+      supportType = Supports(SupportDecoderParams(*trackInfo), aDiagnostics);
+    }
+  }
+
+  MOZ_LOG(sPDMLog, LogLevel::Debug,
+          ("Apple decoder %s requested type '%s'",
+           supportType == media::DecodeSupport::Unsupported ? "rejects"
+                                                            : "supports",
+           aMimeType.BeginReading()));
+  return supportType;
+}
+
+media::DecodeSupportSet AppleDecoderModule::Supports(
+    const SupportDecoderParams& aParams,
+    DecoderDoctorDiagnostics* aDiagnostics) const {
+  const auto& trackInfo = aParams.mConfig;
+  if (trackInfo.IsAudio()) {
+    return SupportsMimeType(trackInfo.mMimeType, aDiagnostics);
+  }
+  bool checkSupport = trackInfo.GetAsVideoInfo() &&
+                      IsVideoSupported(*trackInfo.GetAsVideoInfo());
+  if (checkSupport) {
+    if (trackInfo.mMimeType == "video/vp9" &&
+        CanCreateHWDecoder(media::MediaCodec::VP9)) {
+      return media::DecodeSupport::HardwareDecode;
+    }
+    return media::DecodeSupport::SoftwareDecode;
+  }
+  return media::DecodeSupport::Unsupported;
+}
+
+bool AppleDecoderModule::IsVideoSupported(
+    const VideoInfo& aConfig,
+    const CreateDecoderParams::OptionSet& aOptions) const {
+  if (MP4Decoder::IsH264(aConfig.mMimeType)) {
+    return true;
+  }
+  if (!VPXDecoder::IsVP9(aConfig.mMimeType) || !sCanUseVP9Decoder ||
+      aOptions.contains(
+          CreateDecoderParams::Option::HardwareDecoderNotAllowed)) {
+    return false;
+  }
+  if (aConfig.HasAlpha()) {
+    return false;
+  }
+
+  // HW VP9 decoder only supports 8 or 10 bit color.
+  if (aConfig.mColorDepth != gfx::ColorDepth::COLOR_8 &&
+      aConfig.mColorDepth != gfx::ColorDepth::COLOR_10) {
+    return false;
+  }
+
+  // See if we have a vpcC box, and check further constraints.
+  // HW VP9 Decoder supports Profile 0 & 2 (YUV420)
+  if (aConfig.mExtraData && aConfig.mExtraData->Length() < 5) {
+    return true;  // Assume it's okay.
+  }
+  int profile = aConfig.mExtraData->ElementAt(4);
+
+  if (profile != 0 && profile != 2) {
+    return false;
+  }
+
+  return true;
+}
+
+/* static */
+bool AppleDecoderModule::CanCreateHWDecoder(media::MediaCodec aCodec) {
+  // Check whether HW decode should even be enabled
+  if (!gfx::gfxVars::CanUseHardwareVideoDecoding()) {
+    return false;
+  }
+
+  VideoInfo info(1920, 1080);
+  bool checkSupport = false;
+
+  // We must wrap the code within __builtin_available to avoid compilation
+  // warning as VTIsHardwareDecodeSupported is only available from macOS 10.13.
+  if (__builtin_available(macOS 10.13, *)) {
+    if (!VTIsHardwareDecodeSupported) {
+      return false;
+    }
+    switch (aCodec) {
+      case media::MediaCodec::VP9:
+        info.mMimeType = "video/vp9";
+        VPXDecoder::GetVPCCBox(info.mExtraData, VPXDecoder::VPXStreamInfo());
+        checkSupport = VTIsHardwareDecodeSupported(kCMVideoCodecType_VP9);
+        break;
+      default:
+        // Only support VP9 HW decode for time being
+        checkSupport = false;
+        break;
+    }
+  }
+  // Attempt to create decoder
+  if (checkSupport) {
+    RefPtr<AppleVTDecoder> decoder =
+        new AppleVTDecoder(info, nullptr, {}, nullptr, Nothing());
+    MediaResult rv = decoder->InitializeSession();
+    if (!NS_SUCCEEDED(rv)) {
+      return false;
+    }
+    nsAutoCString failureReason;
+    bool hwSupport = decoder->IsHardwareAccelerated(failureReason);
+    decoder->Shutdown();
+    if (!hwSupport) {
+      MOZ_LOG(sPDMLog, LogLevel::Debug,
+              ("Apple HW decode failure: '%s'", failureReason.BeginReading()));
+    }
+    return hwSupport;
+  }
+  return false;
+}
+
+/* static */
+bool AppleDecoderModule::RegisterSupplementalVP9Decoder() {
+  static bool sRegisterIfAvailable = []() {
+#if !defined(MAC_OS_VERSION_11_0) || \
+    MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_VERSION_11_0
+    if (nsCocoaFeatures::OnBigSurOrLater()) {
+#else
+    if (__builtin_available(macos 11.0, *)) {
+#endif
+      VTRegisterSupplementalVideoDecoderIfAvailable(kCMVideoCodecType_VP9);
+      return true;
+    }
+    return false;
+  }();
+  return sRegisterIfAvailable;
+}
+
+/* static */
+already_AddRefed<PlatformDecoderModule> AppleDecoderModule::Create() {
+  return MakeAndAddRef<AppleDecoderModule>();
+}
+
+}  // namespace mozilla
diff --git a/dom/media/platforms/apple/AppleDecoderModule.h b/dom/media/platforms/apple/AppleDecoderModule.h
new file mode 100644
index 0000000000..f869243a5c
--- /dev/null
+++ b/dom/media/platforms/apple/AppleDecoderModule.h
@@ -0,0 +1,62 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_AppleDecoderModule_h
+#define mozilla_AppleDecoderModule_h
+
+#include "PlatformDecoderModule.h"
+
+namespace mozilla {
+
+class AppleDecoderModule : public PlatformDecoderModule {
+  template <typename T, typename... Args>
+  friend already_AddRefed<T> MakeAndAddRef(Args&&...);
+
+ public:
+  static already_AddRefed<PlatformDecoderModule> Create();
+
+  nsresult Startup() override;
+
+  // Decode thread.
+  already_AddRefed<MediaDataDecoder> CreateVideoDecoder(
+      const CreateDecoderParams& aParams) override;
+
+  // Decode thread.
+  already_AddRefed<MediaDataDecoder> CreateAudioDecoder(
+      const CreateDecoderParams& aParams) override;
+
+  media::DecodeSupportSet SupportsMimeType(
+      const nsACString& aMimeType,
+      DecoderDoctorDiagnostics* aDiagnostics) const override;
+
+  media::DecodeSupportSet Supports(
+      const SupportDecoderParams& aParams,
+      DecoderDoctorDiagnostics* aDiagnostics) const override;
+
+  static void Init();
+
+  static bool sCanUseVP9Decoder;
+
+  static constexpr int kCMVideoCodecType_H264{'avc1'};
+  static constexpr int kCMVideoCodecType_VP9{'vp09'};
+
+ private:
+  AppleDecoderModule() = default;
+  virtual ~AppleDecoderModule() = default;
+
+  static bool sInitialized;
+  bool IsVideoSupported(const VideoInfo& aConfig,
+                        const CreateDecoderParams::OptionSet& aOptions =
+                            CreateDecoderParams::OptionSet()) const;
+  // Enable VP9 HW decoder.
+  static bool RegisterSupplementalVP9Decoder();
+  // Return true if a dummy hardware decoder could be created.
+  static bool CanCreateHWDecoder(media::MediaCodec aCodec);
+};
+
+}  // namespace mozilla
+
+#endif  // mozilla_AppleDecoderModule_h
diff --git a/dom/media/platforms/apple/AppleEncoderModule.cpp b/dom/media/platforms/apple/AppleEncoderModule.cpp
new file mode 100644
index 0000000000..f0321297a4
--- /dev/null
+++ b/dom/media/platforms/apple/AppleEncoderModule.cpp
@@ -0,0 +1,25 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "AppleEncoderModule.h"
+
+#include "AppleVTEncoder.h"
+#include "MP4Decoder.h"
+
+namespace mozilla {
+
+bool AppleEncoderModule::SupportsMimeType(const nsACString& aMimeType) const {
+  return MP4Decoder::IsH264(aMimeType);
+}
+
+already_AddRefed<MediaDataEncoder> AppleEncoderModule::CreateVideoEncoder(
+    const CreateEncoderParams& aParams, const bool aHardwareNotAllowed) const {
+  RefPtr<MediaDataEncoder> encoder(new AppleVTEncoder(
+      aParams.ToH264Config(), aParams.mTaskQueue, aHardwareNotAllowed));
+  return encoder.forget();
+}
+
+}  // namespace mozilla
diff --git a/dom/media/platforms/apple/AppleEncoderModule.h b/dom/media/platforms/apple/AppleEncoderModule.h
new file mode 100644
index 0000000000..ec2868f104
--- /dev/null
+++ b/dom/media/platforms/apple/AppleEncoderModule.h
@@ -0,0 +1,27 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef AppleEncoderModule_h_
+#define AppleEncoderModule_h_
+
+#include "PlatformEncoderModule.h"
+
+namespace mozilla {
+class AppleEncoderModule final : public PlatformEncoderModule {
+ public:
+  AppleEncoderModule() {}
+  virtual ~AppleEncoderModule() {}
+
+  bool SupportsMimeType(const nsACString& aMimeType) const override;
+
+  already_AddRefed<MediaDataEncoder> CreateVideoEncoder(
+      const CreateEncoderParams& aParams,
+      const bool aHardwareNotAllowed) const override;
+};
+
+}  // namespace mozilla
+
+#endif /* AppleEncoderModule_h_ */
diff --git a/dom/media/platforms/apple/AppleUtils.h b/dom/media/platforms/apple/AppleUtils.h
new file mode 100644
index 0000000000..96bf079b0c
--- /dev/null
+++ b/dom/media/platforms/apple/AppleUtils.h
@@ -0,0 +1,88 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Utility functions to help with Apple API calls.
+
+#ifndef mozilla_AppleUtils_h
+#define mozilla_AppleUtils_h
+
+#include "mozilla/Attributes.h"
+#include <CoreFoundation/CFBase.h>  // For CFRelease()
+#include <CoreVideo/CVBuffer.h>     // For CVBufferRelease()
+
+namespace mozilla {
+
+// Wrapper class to call CFRelease/CVBufferRelease on reference types
+// when they go out of scope.
+template <class T, class F, F relFunc>
+class AutoObjRefRelease {
+ public:
+  MOZ_IMPLICIT AutoObjRefRelease(T aRef) : mRef(aRef) {}
+  ~AutoObjRefRelease() {
+    if (mRef) {
+      relFunc(mRef);
+    }
+  }
+  // Return the wrapped ref so it can be used as an in parameter.
+  operator T() { return mRef; }
+  // Return a pointer to the wrapped ref for use as an out parameter.
+  T* receive() { return &mRef; }
+
+ private:
+  // Copy operator isn't supported and is not implemented.
+  AutoObjRefRelease<T, F, relFunc>& operator=(
+      const AutoObjRefRelease<T, F, relFunc>&);
+  T mRef;
+};
+
+template <typename T>
+using AutoCFRelease = AutoObjRefRelease<T, decltype(&CFRelease), &CFRelease>;
+template <typename T>
+using AutoCVBufferRelease =
+    AutoObjRefRelease<T, decltype(&CVBufferRelease), &CVBufferRelease>;
+
+// CFRefPtr: A CoreFoundation smart pointer.
+template <class T>
+class CFRefPtr {
+ public:
+  explicit CFRefPtr(T aRef) : mRef(aRef) {
+    if (mRef) {
+      CFRetain(mRef);
+    }
+  }
+  // Copy constructor.
+  CFRefPtr(const CFRefPtr<T>& aCFRefPtr) : mRef(aCFRefPtr.mRef) {
+    if (mRef) {
+      CFRetain(mRef);
+    }
+  }
+  // Copy operator
+  CFRefPtr<T>& operator=(const CFRefPtr<T>& aCFRefPtr) {
+    if (mRef == aCFRefPtr.mRef) {
+      return;
+    }
+    if (mRef) {
+      CFRelease(mRef);
+    }
+    mRef = aCFRefPtr.mRef;
+    if (mRef) {
+      CFRetain(mRef);
+    }
+    return *this;
+  }
+  ~CFRefPtr() {
+    if (mRef) {
+      CFRelease(mRef);
+    }
+  }
+  // Return the wrapped ref so it can be used as an in parameter.
+  operator T() { return mRef; }
+
+ private:
+  T mRef;
+};
+
+}  // namespace mozilla
+
+#endif  // mozilla_AppleUtils_h
diff --git a/dom/media/platforms/apple/AppleVTDecoder.cpp b/dom/media/platforms/apple/AppleVTDecoder.cpp
new file mode 100644
index 0000000000..7abc46274b
--- /dev/null
+++ b/dom/media/platforms/apple/AppleVTDecoder.cpp
@@ -0,0 +1,761 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "AppleVTDecoder.h"
+
+#include <CoreVideo/CVPixelBufferIOSurface.h>
+#include <IOSurface/IOSurface.h>
+#include <limits>
+
+#include "AppleDecoderModule.h"
+#include "AppleUtils.h"
+#include "CallbackThreadRegistry.h"
+#include "H264.h"
+#include "MP4Decoder.h"
+#include "MacIOSurfaceImage.h"
+#include "MediaData.h"
+#include "VPXDecoder.h"
+#include "VideoUtils.h"
+#include "gfxMacUtils.h"
+#include "gfxPlatform.h"
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/Logging.h"
+#include "mozilla/TaskQueue.h"
+#include "mozilla/gfx/gfxVars.h"
+#include "nsThreadUtils.h"
+
+#define LOG(...) DDMOZ_LOG(sPDMLog, mozilla::LogLevel::Debug, __VA_ARGS__)
+#define LOGEX(_this, ...) \
+  DDMOZ_LOGEX(_this, sPDMLog, mozilla::LogLevel::Debug, __VA_ARGS__)
+
+namespace mozilla {
+
+using namespace layers;
+
+AppleVTDecoder::AppleVTDecoder(const VideoInfo& aConfig,
+                               layers::ImageContainer* aImageContainer,
+                               CreateDecoderParams::OptionSet aOptions,
+                               layers::KnowsCompositor* aKnowsCompositor,
+                               Maybe<TrackingId> aTrackingId)
+    : mExtraData(aConfig.mExtraData),
+      mPictureWidth(aConfig.mImage.width),
+      mPictureHeight(aConfig.mImage.height),
+      mDisplayWidth(aConfig.mDisplay.width),
+      mDisplayHeight(aConfig.mDisplay.height),
+      mColorSpace(aConfig.mColorSpace
+                      ? *aConfig.mColorSpace
+                      : DefaultColorSpace({mPictureWidth, mPictureHeight})),
+      mColorPrimaries(aConfig.mColorPrimaries ? *aConfig.mColorPrimaries
+                                              : gfx::ColorSpace2::BT709),
+      mTransferFunction(aConfig.mTransferFunction
+                            ? *aConfig.mTransferFunction
+                            : gfx::TransferFunction::BT709),
+      mColorRange(aConfig.mColorRange),
+      mColorDepth(aConfig.mColorDepth),
+      mStreamType(MP4Decoder::IsH264(aConfig.mMimeType)  ? StreamType::H264
+                  : VPXDecoder::IsVP9(aConfig.mMimeType) ? StreamType::VP9
+                                                         : StreamType::Unknown),
+      mTaskQueue(TaskQueue::Create(
+          GetMediaThreadPool(MediaThreadType::PLATFORM_DECODER),
+          "AppleVTDecoder")),
+      mMaxRefFrames(
+          mStreamType != StreamType::H264 ||
+                  aOptions.contains(CreateDecoderParams::Option::LowLatency)
+              ? 0
+              : H264::ComputeMaxRefFrames(aConfig.mExtraData)),
+      mImageContainer(aImageContainer),
+      mKnowsCompositor(aKnowsCompositor)
+#ifdef MOZ_WIDGET_UIKIT
+      ,
+      mUseSoftwareImages(true)
+#else
+      ,
+      mUseSoftwareImages(aKnowsCompositor &&
+                         aKnowsCompositor->GetWebRenderCompositorType() ==
+                             layers::WebRenderCompositor::SOFTWARE)
+#endif
+      ,
+      mTrackingId(aTrackingId),
+      mIsFlushing(false),
+      mCallbackThreadId(),
+      mMonitor("AppleVTDecoder"),
+      mPromise(&mMonitor),  // To ensure our PromiseHolder is only ever accessed
+                            // with the monitor held.
+      mFormat(nullptr),
+      mSession(nullptr),
+      mIsHardwareAccelerated(false) {
+  MOZ_COUNT_CTOR(AppleVTDecoder);
+  MOZ_ASSERT(mStreamType != StreamType::Unknown);
+  // TODO: Verify aConfig.mime_type.
+  LOG("Creating AppleVTDecoder for %dx%d %s video", mDisplayWidth,
+      mDisplayHeight, mStreamType == StreamType::H264 ? "H.264" : "VP9");
+}
+
+AppleVTDecoder::~AppleVTDecoder() { MOZ_COUNT_DTOR(AppleVTDecoder); }
+
+RefPtr<MediaDataDecoder::InitPromise> AppleVTDecoder::Init() {
+  MediaResult rv = InitializeSession();
+
+  if (NS_SUCCEEDED(rv)) {
+    return InitPromise::CreateAndResolve(TrackType::kVideoTrack, __func__);
+  }
+
+  return InitPromise::CreateAndReject(rv, __func__);
+}
+
+RefPtr<MediaDataDecoder::DecodePromise> AppleVTDecoder::Decode(
+    MediaRawData* aSample) {
+  LOG("mp4 input sample %p pts %lld duration %lld us%s %zu bytes", aSample,
+      aSample->mTime.ToMicroseconds(), aSample->mDuration.ToMicroseconds(),
+      aSample->mKeyframe ? " keyframe" : "", aSample->Size());
+
+  RefPtr<AppleVTDecoder> self = this;
+  RefPtr<MediaRawData> sample = aSample;
+  return InvokeAsync(mTaskQueue, __func__, [self, this, sample] {
+    RefPtr<DecodePromise> p;
+    {
+      MonitorAutoLock mon(mMonitor);
+      p = mPromise.Ensure(__func__);
+    }
+    ProcessDecode(sample);
+    return p;
+  });
+}
+
+RefPtr<MediaDataDecoder::FlushPromise> AppleVTDecoder::Flush() {
+  mIsFlushing = true;
+  return InvokeAsync(mTaskQueue, this, __func__, &AppleVTDecoder::ProcessFlush);
+}
+
+RefPtr<MediaDataDecoder::DecodePromise> AppleVTDecoder::Drain() {
+  return InvokeAsync(mTaskQueue, this, __func__, &AppleVTDecoder::ProcessDrain);
+}
+
+RefPtr<ShutdownPromise> AppleVTDecoder::Shutdown() {
+  RefPtr<AppleVTDecoder> self = this;
+  return InvokeAsync(mTaskQueue, __func__, [self]() {
+    self->ProcessShutdown();
+    return self->mTaskQueue->BeginShutdown();
+  });
+}
+
+// Helper to fill in a timestamp structure.
+static CMSampleTimingInfo TimingInfoFromSample(MediaRawData* aSample) {
+  CMSampleTimingInfo timestamp;
+
+  timestamp.duration =
+      CMTimeMake(aSample->mDuration.ToMicroseconds(), USECS_PER_S);
+  timestamp.presentationTimeStamp =
+      CMTimeMake(aSample->mTime.ToMicroseconds(), USECS_PER_S);
+  timestamp.decodeTimeStamp =
+      CMTimeMake(aSample->mTimecode.ToMicroseconds(), USECS_PER_S);
+
+  return timestamp;
+}
+
+void AppleVTDecoder::ProcessDecode(MediaRawData* aSample) {
+  AssertOnTaskQueue();
+  PROCESS_DECODE_LOG(aSample);
+
+  if (mIsFlushing) {
+    MonitorAutoLock mon(mMonitor);
+    mPromise.Reject(NS_ERROR_DOM_MEDIA_CANCELED, __func__);
+    return;
+  }
+
+  mTrackingId.apply([&](const auto& aId) {
+    MediaInfoFlag flag = MediaInfoFlag::None;
+    flag |= (aSample->mKeyframe ? MediaInfoFlag::KeyFrame
+                                : MediaInfoFlag::NonKeyFrame);
+    flag |= (mIsHardwareAccelerated ? MediaInfoFlag::HardwareDecoding
+                                    : MediaInfoFlag::SoftwareDecoding);
+    switch (mStreamType) {
+      case StreamType::H264:
+        flag |= MediaInfoFlag::VIDEO_H264;
+        break;
+      case StreamType::VP9:
+        flag |= MediaInfoFlag::VIDEO_VP9;
+        break;
+      default:
+        break;
+    }
+    mPerformanceRecorder.Start(aSample->mTimecode.ToMicroseconds(),
+                               "AppleVTDecoder"_ns, aId, flag);
+  });
+
+  AutoCFRelease<CMBlockBufferRef> block = nullptr;
+  AutoCFRelease<CMSampleBufferRef> sample = nullptr;
+  VTDecodeInfoFlags infoFlags;
+  OSStatus rv;
+
+  // FIXME: This copies the sample data. I think we can provide
+  // a custom block source which reuses the aSample buffer.
+  // But note that there may be a problem keeping the samples
+  // alive over multiple frames.
+  rv = CMBlockBufferCreateWithMemoryBlock(
+      kCFAllocatorDefault,  // Struct allocator.
+      const_cast<uint8_t*>(aSample->Data()), aSample->Size(),
+      kCFAllocatorNull,  // Block allocator.
+      NULL,              // Block source.
+      0,                 // Data offset.
+      aSample->Size(), false, block.receive());
+  if (rv != noErr) {
+    NS_ERROR("Couldn't create CMBlockBuffer");
+    MonitorAutoLock mon(mMonitor);
+    mPromise.Reject(
+        MediaResult(NS_ERROR_OUT_OF_MEMORY,
+                    RESULT_DETAIL("CMBlockBufferCreateWithMemoryBlock:%x", rv)),
+        __func__);
+    return;
+  }
+
+  CMSampleTimingInfo timestamp = TimingInfoFromSample(aSample);
+  rv = CMSampleBufferCreate(kCFAllocatorDefault, block, true, 0, 0, mFormat, 1,
+                            1, &timestamp, 0, NULL, sample.receive());
+  if (rv != noErr) {
+    NS_ERROR("Couldn't create CMSampleBuffer");
+    MonitorAutoLock mon(mMonitor);
+    mPromise.Reject(MediaResult(NS_ERROR_OUT_OF_MEMORY,
+                                RESULT_DETAIL("CMSampleBufferCreate:%x", rv)),
+                    __func__);
+    return;
+  }
+
+  VTDecodeFrameFlags decodeFlags =
+      kVTDecodeFrame_EnableAsynchronousDecompression;
+  rv = VTDecompressionSessionDecodeFrame(
+      mSession, sample, decodeFlags, CreateAppleFrameRef(aSample), &infoFlags);
+  if (infoFlags & kVTDecodeInfo_FrameDropped) {
+    MonitorAutoLock mon(mMonitor);
+    // Smile and nod
+    NS_WARNING("Decoder synchronously dropped frame");
+    MaybeResolveBufferedFrames();
+    return;
+  }
+
+  if (rv != noErr) {
+    LOG("AppleVTDecoder: Error %d VTDecompressionSessionDecodeFrame", rv);
+    NS_WARNING("Couldn't pass frame to decoder");
+    // It appears that even when VTDecompressionSessionDecodeFrame returned a
+    // failure. Decoding sometimes actually get processed.
+    MonitorAutoLock mon(mMonitor);
+    mPromise.RejectIfExists(
+        MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR,
+                    RESULT_DETAIL("VTDecompressionSessionDecodeFrame:%x", rv)),
+        __func__);
+    return;
+  }
+}
+
+void AppleVTDecoder::ProcessShutdown() {
+  if (mSession) {
+    LOG("%s: cleaning up session %p", __func__, mSession);
+    VTDecompressionSessionInvalidate(mSession);
+    CFRelease(mSession);
+    mSession = nullptr;
+  }
+  if (mFormat) {
+    LOG("%s: releasing format %p", __func__, mFormat);
+    CFRelease(mFormat);
+    mFormat = nullptr;
+  }
+}
+
+RefPtr<MediaDataDecoder::FlushPromise> AppleVTDecoder::ProcessFlush() {
+  AssertOnTaskQueue();
+  nsresult rv = WaitForAsynchronousFrames();
+  if (NS_FAILED(rv)) {
+    LOG("AppleVTDecoder::Flush failed waiting for platform decoder");
+  }
+  MonitorAutoLock mon(mMonitor);
+  mPromise.RejectIfExists(NS_ERROR_DOM_MEDIA_CANCELED, __func__);
+
+  while (!mReorderQueue.IsEmpty()) {
+    mReorderQueue.Pop();
+  }
+  mPerformanceRecorder.Record(std::numeric_limits<int64_t>::max());
+  mSeekTargetThreshold.reset();
+  mIsFlushing = false;
+  return FlushPromise::CreateAndResolve(true, __func__);
+}
+
+RefPtr<MediaDataDecoder::DecodePromise> AppleVTDecoder::ProcessDrain() {
+  AssertOnTaskQueue();
+  nsresult rv = WaitForAsynchronousFrames();
+  if (NS_FAILED(rv)) {
+    LOG("AppleVTDecoder::Drain failed waiting for platform decoder");
+  }
+  MonitorAutoLock mon(mMonitor);
+  DecodedData samples;
+  while (!mReorderQueue.IsEmpty()) {
+    samples.AppendElement(mReorderQueue.Pop());
+  }
+  return DecodePromise::CreateAndResolve(std::move(samples), __func__);
+}
+
+AppleVTDecoder::AppleFrameRef* AppleVTDecoder::CreateAppleFrameRef(
+    const MediaRawData* aSample) {
+  MOZ_ASSERT(aSample);
+  return new AppleFrameRef(*aSample);
+}
+
+void AppleVTDecoder::SetSeekThreshold(const media::TimeUnit& aTime) {
+  if (aTime.IsValid()) {
+    mSeekTargetThreshold = Some(aTime);
+  } else {
+    mSeekTargetThreshold.reset();
+  }
+}
+
+//
+// Implementation details.
+//
+
+// Callback passed to the VideoToolbox decoder for returning data.
+// This needs to be static because the API takes a C-style pair of
+// function and userdata pointers. This validates parameters and
+// forwards the decoded image back to an object method.
+static void PlatformCallback(void* decompressionOutputRefCon,
+                             void* sourceFrameRefCon, OSStatus status,
+                             VTDecodeInfoFlags flags, CVImageBufferRef image,
+                             CMTime presentationTimeStamp,
+                             CMTime presentationDuration) {
+  AppleVTDecoder* decoder =
+      static_cast<AppleVTDecoder*>(decompressionOutputRefCon);
+  LOGEX(decoder, "AppleVideoDecoder %s status %d flags %d", __func__,
+        static_cast<int>(status), flags);
+
+  UniquePtr<AppleVTDecoder::AppleFrameRef> frameRef(
+      static_cast<AppleVTDecoder::AppleFrameRef*>(sourceFrameRefCon));
+
+  // Validate our arguments.
+  if (status != noErr) {
+    NS_WARNING("VideoToolbox decoder returned an error");
+    decoder->OnDecodeError(status);
+    return;
+  } else if (!image) {
+    NS_WARNING("VideoToolbox decoder returned no data");
+  } else if (flags & kVTDecodeInfo_FrameDropped) {
+    NS_WARNING("  ...frame tagged as dropped...");
+  } else {
+    MOZ_ASSERT(CFGetTypeID(image) == CVPixelBufferGetTypeID(),
+               "VideoToolbox returned an unexpected image type");
+  }
+
+  decoder->OutputFrame(image, *frameRef);
+}
+
+void AppleVTDecoder::MaybeResolveBufferedFrames() {
+  mMonitor.AssertCurrentThreadOwns();
+
+  if (mPromise.IsEmpty()) {
+    return;
+  }
+
+  DecodedData results;
+  while (mReorderQueue.Length() > mMaxRefFrames) {
+    results.AppendElement(mReorderQueue.Pop());
+  }
+  mPromise.Resolve(std::move(results), __func__);
+}
+
+void AppleVTDecoder::MaybeRegisterCallbackThread() {
+  ProfilerThreadId id = profiler_current_thread_id();
+  if (MOZ_LIKELY(id == mCallbackThreadId)) {
+    return;
+  }
+  mCallbackThreadId = id;
+  CallbackThreadRegistry::Get()->Register(mCallbackThreadId,
+                                          "AppleVTDecoderCallback");
+}
+
+nsCString AppleVTDecoder::GetCodecName() const {
+  switch (mStreamType) {
+    case StreamType::H264:
+      return "h264"_ns;
+    case StreamType::VP9:
+      return "vp9"_ns;
+    default:
+      return "unknown"_ns;
+  }
+}
+
+// Copy and return a decoded frame.
+void AppleVTDecoder::OutputFrame(CVPixelBufferRef aImage,
+                                 AppleVTDecoder::AppleFrameRef aFrameRef) {
+  MaybeRegisterCallbackThread();
+
+  if (mIsFlushing) {
+    // We are in the process of flushing or shutting down; ignore frame.
+    return;
+  }
+
+  LOG("mp4 output frame %lld dts %lld pts %lld duration %lld us%s",
+      aFrameRef.byte_offset, aFrameRef.decode_timestamp.ToMicroseconds(),
+      aFrameRef.composition_timestamp.ToMicroseconds(),
+      aFrameRef.duration.ToMicroseconds(),
+      aFrameRef.is_sync_point ? " keyframe" : "");
+
+  if (!aImage) {
+    // Image was dropped by decoder or none return yet.
+    // We need more input to continue.
+    MonitorAutoLock mon(mMonitor);
+    MaybeResolveBufferedFrames();
+    return;
+  }
+
+  bool useNullSample = false;
+  if (mSeekTargetThreshold.isSome()) {
+    if ((aFrameRef.composition_timestamp + aFrameRef.duration) <
+        mSeekTargetThreshold.ref()) {
+      useNullSample = true;
+    } else {
+      mSeekTargetThreshold.reset();
+    }
+  }
+
+  // Where our resulting image will end up.
+  RefPtr<MediaData> data;
+  // Bounds.
+  VideoInfo info;
+  info.mDisplay = gfx::IntSize(mDisplayWidth, mDisplayHeight);
+
+  if (useNullSample) {
+    data = new NullData(aFrameRef.byte_offset, aFrameRef.composition_timestamp,
+                        aFrameRef.duration);
+  } else if (mUseSoftwareImages) {
+    size_t width = CVPixelBufferGetWidth(aImage);
+    size_t height = CVPixelBufferGetHeight(aImage);
+    DebugOnly<size_t> planes = CVPixelBufferGetPlaneCount(aImage);
+    MOZ_ASSERT(planes == 3, "Likely not YUV420 format and it must be.");
+
+    VideoData::YCbCrBuffer buffer;
+
+    // Lock the returned image data.
+    CVReturn rv =
+        CVPixelBufferLockBaseAddress(aImage, kCVPixelBufferLock_ReadOnly);
+    if (rv != kCVReturnSuccess) {
+      NS_ERROR("error locking pixel data");
+      MonitorAutoLock mon(mMonitor);
+      mPromise.Reject(
+          MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR,
+                      RESULT_DETAIL("CVPixelBufferLockBaseAddress:%x", rv)),
+          __func__);
+      return;
+    }
+    // Y plane.
+    buffer.mPlanes[0].mData =
+        static_cast<uint8_t*>(CVPixelBufferGetBaseAddressOfPlane(aImage, 0));
+    buffer.mPlanes[0].mStride = CVPixelBufferGetBytesPerRowOfPlane(aImage, 0);
+    buffer.mPlanes[0].mWidth = width;
+    buffer.mPlanes[0].mHeight = height;
+    buffer.mPlanes[0].mSkip = 0;
+    // Cb plane.
+    buffer.mPlanes[1].mData =
+        static_cast<uint8_t*>(CVPixelBufferGetBaseAddressOfPlane(aImage, 1));
+    buffer.mPlanes[1].mStride = CVPixelBufferGetBytesPerRowOfPlane(aImage, 1);
+    buffer.mPlanes[1].mWidth = (width + 1) / 2;
+    buffer.mPlanes[1].mHeight = (height + 1) / 2;
+    buffer.mPlanes[1].mSkip = 0;
+    // Cr plane.
+    buffer.mPlanes[2].mData =
+        static_cast<uint8_t*>(CVPixelBufferGetBaseAddressOfPlane(aImage, 2));
+    buffer.mPlanes[2].mStride = CVPixelBufferGetBytesPerRowOfPlane(aImage, 2);
+    buffer.mPlanes[2].mWidth = (width + 1) / 2;
+    buffer.mPlanes[2].mHeight = (height + 1) / 2;
+    buffer.mPlanes[2].mSkip = 0;
+
+    buffer.mChromaSubsampling = gfx::ChromaSubsampling::HALF_WIDTH_AND_HEIGHT;
+    buffer.mYUVColorSpace = mColorSpace;
+    buffer.mColorPrimaries = mColorPrimaries;
+    buffer.mColorRange = mColorRange;
+
+    gfx::IntRect visible = gfx::IntRect(0, 0, mPictureWidth, mPictureHeight);
+
+    // Copy the image data into our own format.
+    data = VideoData::CreateAndCopyData(
+        info, mImageContainer, aFrameRef.byte_offset,
+        aFrameRef.composition_timestamp, aFrameRef.duration, buffer,
+        aFrameRef.is_sync_point, aFrameRef.decode_timestamp, visible,
+        mKnowsCompositor);
+    // Unlock the returned image data.
+    CVPixelBufferUnlockBaseAddress(aImage, kCVPixelBufferLock_ReadOnly);
+  } else {
+#ifndef MOZ_WIDGET_UIKIT
+    // Set pixel buffer properties on aImage before we extract its surface.
+    // This ensures that we can use defined enums to set values instead
+    // of later setting magic CFSTR values on the surface itself.
+    if (mColorSpace == gfx::YUVColorSpace::BT601) {
+      CVBufferSetAttachment(aImage, kCVImageBufferYCbCrMatrixKey,
+                            kCVImageBufferYCbCrMatrix_ITU_R_601_4,
+                            kCVAttachmentMode_ShouldPropagate);
+    } else if (mColorSpace == gfx::YUVColorSpace::BT709) {
+      CVBufferSetAttachment(aImage, kCVImageBufferYCbCrMatrixKey,
+                            kCVImageBufferYCbCrMatrix_ITU_R_709_2,
+                            kCVAttachmentMode_ShouldPropagate);
+    } else if (mColorSpace == gfx::YUVColorSpace::BT2020) {
+      CVBufferSetAttachment(aImage, kCVImageBufferYCbCrMatrixKey,
+                            kCVImageBufferYCbCrMatrix_ITU_R_2020,
+                            kCVAttachmentMode_ShouldPropagate);
+    }
+
+    if (mColorPrimaries == gfx::ColorSpace2::BT709) {
+      CVBufferSetAttachment(aImage, kCVImageBufferColorPrimariesKey,
+                            kCVImageBufferColorPrimaries_ITU_R_709_2,
+                            kCVAttachmentMode_ShouldPropagate);
+    } else if (mColorPrimaries == gfx::ColorSpace2::BT2020) {
+      CVBufferSetAttachment(aImage, kCVImageBufferColorPrimariesKey,
+                            kCVImageBufferColorPrimaries_ITU_R_2020,
+                            kCVAttachmentMode_ShouldPropagate);
+    }
+
+    // Transfer function is applied independently from the colorSpace.
+    CVBufferSetAttachment(
+        aImage, kCVImageBufferTransferFunctionKey,
+        gfxMacUtils::CFStringForTransferFunction(mTransferFunction),
+        kCVAttachmentMode_ShouldPropagate);
+
+    CFTypeRefPtr<IOSurfaceRef> surface =
+        CFTypeRefPtr<IOSurfaceRef>::WrapUnderGetRule(
+            CVPixelBufferGetIOSurface(aImage));
+    MOZ_ASSERT(surface, "Decoder didn't return an IOSurface backed buffer");
+
+    RefPtr<MacIOSurface> macSurface = new MacIOSurface(std::move(surface));
+    macSurface->SetYUVColorSpace(mColorSpace);
+    macSurface->mColorPrimaries = mColorPrimaries;
+
+    RefPtr<layers::Image> image = new layers::MacIOSurfaceImage(macSurface);
+
+    data = VideoData::CreateFromImage(
+        info.mDisplay, aFrameRef.byte_offset, aFrameRef.composition_timestamp,
+        aFrameRef.duration, image.forget(), aFrameRef.is_sync_point,
+        aFrameRef.decode_timestamp);
+#else
+    MOZ_ASSERT_UNREACHABLE("No MacIOSurface on iOS");
+#endif
+  }
+
+  if (!data) {
+    NS_ERROR("Couldn't create VideoData for frame");
+    MonitorAutoLock mon(mMonitor);
+    mPromise.Reject(MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__), __func__);
+    return;
+  }
+
+  mPerformanceRecorder.Record(
+      aFrameRef.decode_timestamp.ToMicroseconds(), [&](DecodeStage& aStage) {
+        aStage.SetResolution(static_cast<int>(CVPixelBufferGetWidth(aImage)),
+                             static_cast<int>(CVPixelBufferGetHeight(aImage)));
+        auto format = [&]() -> Maybe<DecodeStage::ImageFormat> {
+          switch (CVPixelBufferGetPixelFormatType(aImage)) {
+            case kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange:
+            case kCVPixelFormatType_420YpCbCr8BiPlanarFullRange:
+              return Some(DecodeStage::NV12);
+            case kCVPixelFormatType_422YpCbCr8_yuvs:
+            case kCVPixelFormatType_422YpCbCr8FullRange:
+              return Some(DecodeStage::YUV422P);
+            case kCVPixelFormatType_32BGRA:
+              return Some(DecodeStage::RGBA32);
+            default:
+              return Nothing();
+          }
+        }();
+        format.apply([&](auto aFormat) { aStage.SetImageFormat(aFormat); });
+        aStage.SetColorDepth(mColorDepth);
+        aStage.SetYUVColorSpace(mColorSpace);
+        aStage.SetColorRange(mColorRange);
+      });
+
+  // Frames come out in DTS order but we need to output them
+  // in composition order.
+  MonitorAutoLock mon(mMonitor);
+  mReorderQueue.Push(std::move(data));
+  MaybeResolveBufferedFrames();
+
+  LOG("%llu decoded frames queued",
+      static_cast<unsigned long long>(mReorderQueue.Length()));
+}
+
+void AppleVTDecoder::OnDecodeError(OSStatus aError) {
+  MonitorAutoLock mon(mMonitor);
+  mPromise.RejectIfExists(
+      MediaResult(NS_ERROR_DOM_MEDIA_DECODE_ERR,
+                  RESULT_DETAIL("OnDecodeError:%x", aError)),
+      __func__);
+}
+
+nsresult AppleVTDecoder::WaitForAsynchronousFrames() {
+  OSStatus rv = VTDecompressionSessionWaitForAsynchronousFrames(mSession);
+  if (rv != noErr) {
+    NS_ERROR("AppleVTDecoder: Error waiting for asynchronous frames");
+    return NS_ERROR_FAILURE;
+  }
+  return NS_OK;
+}
+
+MediaResult AppleVTDecoder::InitializeSession() {
+  OSStatus rv;
+
+  AutoCFRelease<CFDictionaryRef> extensions = CreateDecoderExtensions();
+
+  rv = CMVideoFormatDescriptionCreate(
+      kCFAllocatorDefault,
+      mStreamType == StreamType::H264
+          ? kCMVideoCodecType_H264
+          : CMVideoCodecType(AppleDecoderModule::kCMVideoCodecType_VP9),
+      mPictureWidth, mPictureHeight, extensions, &mFormat);
+  if (rv != noErr) {
+    return MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
+                       RESULT_DETAIL("Couldn't create format description!"));
+  }
+
+  // Contruct video decoder selection spec.
+  AutoCFRelease<CFDictionaryRef> spec = CreateDecoderSpecification();
+
+  // Contruct output configuration.
+  AutoCFRelease<CFDictionaryRef> outputConfiguration =
+      CreateOutputConfiguration();
+
+  VTDecompressionOutputCallbackRecord cb = {PlatformCallback, this};
+  rv =
+      VTDecompressionSessionCreate(kCFAllocatorDefault, mFormat,
+                                   spec,  // Video decoder selection.
+                                   outputConfiguration,  // Output video format.
+                                   &cb, &mSession);
+
+  if (rv != noErr) {
+    return MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
+                       RESULT_DETAIL("Couldn't create decompression session!"));
+  }
+
+  CFBooleanRef isUsingHW = nullptr;
+  rv = VTSessionCopyProperty(
+      mSession,
+      kVTDecompressionPropertyKey_UsingHardwareAcceleratedVideoDecoder,
+      kCFAllocatorDefault, &isUsingHW);
+  if (rv == noErr) {
+    mIsHardwareAccelerated = isUsingHW == kCFBooleanTrue;
+    LOG("AppleVTDecoder: %s hardware accelerated decoding",
+        mIsHardwareAccelerated ? "using" : "not using");
+  } else {
+    LOG("AppleVTDecoder: maybe hardware accelerated decoding "
+        "(VTSessionCopyProperty query failed)");
+  }
+  if (isUsingHW) {
+    CFRelease(isUsingHW);
+  }
+
+  return NS_OK;
+}
+
+CFDictionaryRef AppleVTDecoder::CreateDecoderExtensions() {
+  AutoCFRelease<CFDataRef> data = CFDataCreate(
+      kCFAllocatorDefault, mExtraData->Elements(), mExtraData->Length());
+
+  const void* atomsKey[1];
+  atomsKey[0] = mStreamType == StreamType::H264 ? CFSTR("avcC") : CFSTR("vpcC");
+  const void* atomsValue[] = {data};
+  static_assert(ArrayLength(atomsKey) == ArrayLength(atomsValue),
+                "Non matching keys/values array size");
+
+  AutoCFRelease<CFDictionaryRef> atoms = CFDictionaryCreate(
+      kCFAllocatorDefault, atomsKey, atomsValue, ArrayLength(atomsKey),
+      &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
+
+  const void* extensionKeys[] = {
+      kCVImageBufferChromaLocationBottomFieldKey,
+      kCVImageBufferChromaLocationTopFieldKey,
+      kCMFormatDescriptionExtension_SampleDescriptionExtensionAtoms};
+
+  const void* extensionValues[] = {kCVImageBufferChromaLocation_Left,
+                                   kCVImageBufferChromaLocation_Left, atoms};
+  static_assert(ArrayLength(extensionKeys) == ArrayLength(extensionValues),
+                "Non matching keys/values array size");
+
+  return CFDictionaryCreate(kCFAllocatorDefault, extensionKeys, extensionValues,
+                            ArrayLength(extensionKeys),
+                            &kCFTypeDictionaryKeyCallBacks,
+                            &kCFTypeDictionaryValueCallBacks);
+}
+
+CFDictionaryRef AppleVTDecoder::CreateDecoderSpecification() {
+  const void* specKeys[] = {
+      kVTVideoDecoderSpecification_EnableHardwareAcceleratedVideoDecoder};
+  const void* specValues[1];
+  if (gfx::gfxVars::CanUseHardwareVideoDecoding()) {
+    specValues[0] = kCFBooleanTrue;
+  } else {
+    // This GPU is blacklisted for hardware decoding.
+    specValues[0] = kCFBooleanFalse;
+  }
+  static_assert(ArrayLength(specKeys) == ArrayLength(specValues),
+                "Non matching keys/values array size");
+
+  return CFDictionaryCreate(
+      kCFAllocatorDefault, specKeys, specValues, ArrayLength(specKeys),
+      &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
+}
+
+CFDictionaryRef AppleVTDecoder::CreateOutputConfiguration() {
+  if (mUseSoftwareImages) {
+    // Output format type:
+    SInt32 PixelFormatTypeValue = kCVPixelFormatType_420YpCbCr8Planar;
+    AutoCFRelease<CFNumberRef> PixelFormatTypeNumber = CFNumberCreate(
+        kCFAllocatorDefault, kCFNumberSInt32Type, &PixelFormatTypeValue);
+    const void* outputKeys[] = {kCVPixelBufferPixelFormatTypeKey};
+    const void* outputValues[] = {PixelFormatTypeNumber};
+    static_assert(ArrayLength(outputKeys) == ArrayLength(outputValues),
+                  "Non matching keys/values array size");
+
+    return CFDictionaryCreate(
+        kCFAllocatorDefault, outputKeys, outputValues, ArrayLength(outputKeys),
+        &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
+  }
+
+#ifndef MOZ_WIDGET_UIKIT
+  // Output format type:
+
+  bool is10Bit = (gfx::BitDepthForColorDepth(mColorDepth) == 10);
+  SInt32 PixelFormatTypeValue =
+      mColorRange == gfx::ColorRange::FULL
+          ? (is10Bit ? kCVPixelFormatType_420YpCbCr10BiPlanarFullRange
+                     : kCVPixelFormatType_420YpCbCr8BiPlanarFullRange)
+          : (is10Bit ? kCVPixelFormatType_420YpCbCr10BiPlanarVideoRange
+                     : kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange);
+  AutoCFRelease<CFNumberRef> PixelFormatTypeNumber = CFNumberCreate(
+      kCFAllocatorDefault, kCFNumberSInt32Type, &PixelFormatTypeValue);
+  // Construct IOSurface Properties
+  const void* IOSurfaceKeys[] = {kIOSurfaceIsGlobal};
+  const void* IOSurfaceValues[] = {kCFBooleanTrue};
+  static_assert(ArrayLength(IOSurfaceKeys) == ArrayLength(IOSurfaceValues),
+                "Non matching keys/values array size");
+
+  // Contruct output configuration.
+  AutoCFRelease<CFDictionaryRef> IOSurfaceProperties = CFDictionaryCreate(
+      kCFAllocatorDefault, IOSurfaceKeys, IOSurfaceValues,
+      ArrayLength(IOSurfaceKeys), &kCFTypeDictionaryKeyCallBacks,
+      &kCFTypeDictionaryValueCallBacks);
+
+  const void* outputKeys[] = {kCVPixelBufferIOSurfacePropertiesKey,
+                              kCVPixelBufferPixelFormatTypeKey,
+                              kCVPixelBufferOpenGLCompatibilityKey};
+  const void* outputValues[] = {IOSurfaceProperties, PixelFormatTypeNumber,
+                                kCFBooleanTrue};
+  static_assert(ArrayLength(outputKeys) == ArrayLength(outputValues),
+                "Non matching keys/values array size");
+
+  return CFDictionaryCreate(
+      kCFAllocatorDefault, outputKeys, outputValues, ArrayLength(outputKeys),
+      &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
+#else
+  MOZ_ASSERT_UNREACHABLE("No MacIOSurface on iOS");
+#endif
+}
+
+}  // namespace mozilla
+
+#undef LOG
+#undef LOGEX
diff --git a/dom/media/platforms/apple/AppleVTDecoder.h b/dom/media/platforms/apple/AppleVTDecoder.h
new file mode 100644
index 0000000000..140a335628
--- /dev/null
+++ b/dom/media/platforms/apple/AppleVTDecoder.h
@@ -0,0 +1,145 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_AppleVTDecoder_h
+#define mozilla_AppleVTDecoder_h
+
+#include <CoreFoundation/CFDictionary.h>  // For CFDictionaryRef
+#include <CoreMedia/CoreMedia.h>          // For CMVideoFormatDescriptionRef
+#include <VideoToolbox/VideoToolbox.h>    // For VTDecompressionSessionRef
+
+#include "AppleDecoderModule.h"
+#include "PerformanceRecorder.h"
+#include "PlatformDecoderModule.h"
+#include "ReorderQueue.h"
+#include "TimeUnits.h"
+#include "mozilla/Atomics.h"
+#include "mozilla/gfx/Types.h"
+#include "mozilla/ProfilerUtils.h"
+
+namespace mozilla {
+
+DDLoggedTypeDeclNameAndBase(AppleVTDecoder, MediaDataDecoder);
+
+class AppleVTDecoder final : public MediaDataDecoder,
+                             public DecoderDoctorLifeLogger<AppleVTDecoder> {
+ public:
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(AppleVTDecoder, final);
+
+  AppleVTDecoder(const VideoInfo& aConfig,
+                 layers::ImageContainer* aImageContainer,
+                 CreateDecoderParams::OptionSet aOptions,
+                 layers::KnowsCompositor* aKnowsCompositor,
+                 Maybe<TrackingId> aTrackingId);
+
+  class AppleFrameRef {
+   public:
+    media::TimeUnit decode_timestamp;
+    media::TimeUnit composition_timestamp;
+    media::TimeUnit duration;
+    int64_t byte_offset;
+    bool is_sync_point;
+
+    explicit AppleFrameRef(const MediaRawData& aSample)
+        : decode_timestamp(aSample.mTimecode),
+          composition_timestamp(aSample.mTime),
+          duration(aSample.mDuration),
+          byte_offset(aSample.mOffset),
+          is_sync_point(aSample.mKeyframe) {}
+  };
+
+  RefPtr<InitPromise> Init() override;
+  RefPtr<DecodePromise> Decode(MediaRawData* aSample) override;
+  RefPtr<DecodePromise> Drain() override;
+  RefPtr<FlushPromise> Flush() override;
+  RefPtr<ShutdownPromise> Shutdown() override;
+  void SetSeekThreshold(const media::TimeUnit& aTime) override;
+
+  bool IsHardwareAccelerated(nsACString& aFailureReason) const override {
+    return mIsHardwareAccelerated;
+  }
+
+  nsCString GetDescriptionName() const override {
+    return mIsHardwareAccelerated ? "apple hardware VT decoder"_ns
+                                  : "apple software VT decoder"_ns;
+  }
+
+  nsCString GetCodecName() const override;
+
+  ConversionRequired NeedsConversion() const override {
+    return ConversionRequired::kNeedAVCC;
+  }
+
+  // Access from the taskqueue and the decoder's thread.
+  // OutputFrame is thread-safe.
+  void OutputFrame(CVPixelBufferRef aImage, AppleFrameRef aFrameRef);
+  void OnDecodeError(OSStatus aError);
+
+ private:
+  friend class AppleDecoderModule;  // To access InitializeSession.
+  virtual ~AppleVTDecoder();
+  RefPtr<FlushPromise> ProcessFlush();
+  RefPtr<DecodePromise> ProcessDrain();
+  void ProcessShutdown();
+  void ProcessDecode(MediaRawData* aSample);
+  void MaybeResolveBufferedFrames();
+
+  void MaybeRegisterCallbackThread();
+
+  void AssertOnTaskQueue() { MOZ_ASSERT(mTaskQueue->IsCurrentThreadIn()); }
+
+  AppleFrameRef* CreateAppleFrameRef(const MediaRawData* aSample);
+  CFDictionaryRef CreateOutputConfiguration();
+
+  const RefPtr<MediaByteBuffer> mExtraData;
+  const uint32_t mPictureWidth;
+  const uint32_t mPictureHeight;
+  const uint32_t mDisplayWidth;
+  const uint32_t mDisplayHeight;
+  const gfx::YUVColorSpace mColorSpace;
+  const gfx::ColorSpace2 mColorPrimaries;
+  const gfx::TransferFunction mTransferFunction;
+  const gfx::ColorRange mColorRange;
+  const gfx::ColorDepth mColorDepth;
+
+  // Method to set up the decompression session.
+  MediaResult InitializeSession();
+  nsresult WaitForAsynchronousFrames();
+  CFDictionaryRef CreateDecoderSpecification();
+  CFDictionaryRef CreateDecoderExtensions();
+
+  enum class StreamType { Unknown, H264, VP9 };
+  const StreamType mStreamType;
+  const RefPtr<TaskQueue> mTaskQueue;
+  const uint32_t mMaxRefFrames;
+  const RefPtr<layers::ImageContainer> mImageContainer;
+  const RefPtr<layers::KnowsCompositor> mKnowsCompositor;
+  const bool mUseSoftwareImages;
+  const Maybe<TrackingId> mTrackingId;
+
+  // Set on reader/decode thread calling Flush() to indicate that output is
+  // not required and so input samples on mTaskQueue need not be processed.
+  Atomic<bool> mIsFlushing;
+  std::atomic<ProfilerThreadId> mCallbackThreadId;
+  // Protects mReorderQueue and mPromise.
+  Monitor mMonitor MOZ_UNANNOTATED;
+  ReorderQueue mReorderQueue;
+  MozMonitoredPromiseHolder<DecodePromise> mPromise;
+
+  // Decoded frame will be dropped if its pts is smaller than this
+  // value. It shold be initialized before Input() or after Flush(). So it is
+  // safe to access it in OutputFrame without protecting.
+  Maybe<media::TimeUnit> mSeekTargetThreshold;
+
+  CMVideoFormatDescriptionRef mFormat;
+  VTDecompressionSessionRef mSession;
+  Atomic<bool> mIsHardwareAccelerated;
+  PerformanceRecorderMulti<DecodeStage> mPerformanceRecorder;
+};
+
+}  // namespace mozilla
+
+#endif  // mozilla_AppleVTDecoder_h
diff --git a/dom/media/platforms/apple/AppleVTEncoder.cpp b/dom/media/platforms/apple/AppleVTEncoder.cpp
new file mode 100644
index 0000000000..af91d99bcb
--- /dev/null
+++ b/dom/media/platforms/apple/AppleVTEncoder.cpp
@@ -0,0 +1,628 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "AppleVTEncoder.h"
+
+#include <CoreFoundation/CFArray.h>
+#include <CoreFoundation/CFByteOrder.h>
+#include <CoreFoundation/CFDictionary.h>
+
+#include "ImageContainer.h"
+#include "AnnexB.h"
+#include "H264.h"
+
+#include "libyuv.h"
+
+#include "AppleUtils.h"
+
+namespace mozilla {
+extern LazyLogModule sPEMLog;
+#define VTENC_LOGE(fmt, ...)                 \
+  MOZ_LOG(sPEMLog, mozilla::LogLevel::Error, \
+          ("[AppleVTEncoder] %s: " fmt, __func__, ##__VA_ARGS__))
+#define VTENC_LOGD(fmt, ...)                 \
+  MOZ_LOG(sPEMLog, mozilla::LogLevel::Debug, \
+          ("[AppleVTEncoder] %s: " fmt, __func__, ##__VA_ARGS__))
+
+static CFDictionaryRef BuildEncoderSpec(const bool aHardwareNotAllowed) {
+  const void* keys[] = {
+      kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder};
+  const void* values[] = {aHardwareNotAllowed ? kCFBooleanFalse
+                                              : kCFBooleanTrue};
+
+  static_assert(ArrayLength(keys) == ArrayLength(values),
+                "Non matching keys/values array size");
+  return CFDictionaryCreate(kCFAllocatorDefault, keys, values,
+                            ArrayLength(keys), &kCFTypeDictionaryKeyCallBacks,
+                            &kCFTypeDictionaryValueCallBacks);
+}
+
+static void FrameCallback(void* aEncoder, void* aFrameRefCon, OSStatus aStatus,
+                          VTEncodeInfoFlags aInfoFlags,
+                          CMSampleBufferRef aSampleBuffer) {
+  if (aStatus != noErr || !aSampleBuffer) {
+    VTENC_LOGE("VideoToolbox encoder returned no data status=%d sample=%p",
+               aStatus, aSampleBuffer);
+    aSampleBuffer = nullptr;
+  } else if (aInfoFlags & kVTEncodeInfo_FrameDropped) {
+    VTENC_LOGE("frame tagged as dropped");
+    return;
+  }
+  (static_cast<AppleVTEncoder*>(aEncoder))->OutputFrame(aSampleBuffer);
+}
+
+static bool SetAverageBitrate(VTCompressionSessionRef& aSession,
+                              MediaDataEncoder::Rate aBitsPerSec) {
+  int64_t bps(aBitsPerSec);
+  AutoCFRelease<CFNumberRef> bitrate(
+      CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &bps));
+  return VTSessionSetProperty(aSession,
+                              kVTCompressionPropertyKey_AverageBitRate,
+                              bitrate) == noErr;
+}
+
+static bool SetRealtimeProperties(VTCompressionSessionRef& aSession) {
+  return VTSessionSetProperty(aSession, kVTCompressionPropertyKey_RealTime,
+                              kCFBooleanTrue) == noErr &&
+         VTSessionSetProperty(aSession,
+                              kVTCompressionPropertyKey_AllowFrameReordering,
+                              kCFBooleanFalse) == noErr;
+}
+
+static bool SetProfileLevel(VTCompressionSessionRef& aSession,
+                            AppleVTEncoder::H264Specific::ProfileLevel aValue) {
+  CFStringRef profileLevel = nullptr;
+  switch (aValue) {
+    case AppleVTEncoder::H264Specific::ProfileLevel::BaselineAutoLevel:
+      profileLevel = kVTProfileLevel_H264_Baseline_AutoLevel;
+      break;
+    case AppleVTEncoder::H264Specific::ProfileLevel::MainAutoLevel:
+      profileLevel = kVTProfileLevel_H264_Main_AutoLevel;
+      break;
+  }
+
+  return profileLevel ? VTSessionSetProperty(
+                            aSession, kVTCompressionPropertyKey_ProfileLevel,
+                            profileLevel) == noErr
+                      : false;
+}
+
+RefPtr<MediaDataEncoder::InitPromise> AppleVTEncoder::Init() {
+  MOZ_ASSERT(!mInited, "Cannot initialize encoder again without shutting down");
+
+  if (mConfig.mSize.width == 0 || mConfig.mSize.height == 0) {
+    return InitPromise::CreateAndReject(NS_ERROR_ILLEGAL_VALUE, __func__);
+  }
+
+  AutoCFRelease<CFDictionaryRef> spec(BuildEncoderSpec(mHardwareNotAllowed));
+  AutoCFRelease<CFDictionaryRef> srcBufferAttr(
+      BuildSourceImageBufferAttributes());
+  if (!srcBufferAttr) {
+    return InitPromise::CreateAndReject(
+        MediaResult(NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR,
+                    "fail to create source buffer attributes"),
+        __func__);
+  }
+
+  OSStatus status = VTCompressionSessionCreate(
+      kCFAllocatorDefault, mConfig.mSize.width, mConfig.mSize.height,
+      kCMVideoCodecType_H264, spec, srcBufferAttr, kCFAllocatorDefault,
+      &FrameCallback, this /* outputCallbackRefCon */, &mSession);
+
+  if (status != noErr) {
+    return InitPromise::CreateAndReject(
+        MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
+                    "fail to create encoder session"),
+        __func__);
+  }
+
+  if (!SetAverageBitrate(mSession, mConfig.mBitsPerSec)) {
+    return InitPromise::CreateAndReject(
+        MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
+                    "fail to configurate average bitrate"),
+        __func__);
+  }
+
+  if (mConfig.mUsage == Usage::Realtime && !SetRealtimeProperties(mSession)) {
+    VTENC_LOGE("fail to configurate realtime properties");
+    return InitPromise::CreateAndReject(
+        MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
+                    "fail to configurate average bitrate"),
+        __func__);
+  }
+
+  int64_t interval =
+      mConfig.mKeyframeInterval > std::numeric_limits<int64_t>::max()
+          ? std::numeric_limits<int64_t>::max()
+          : mConfig.mKeyframeInterval;
+  AutoCFRelease<CFNumberRef> cf(
+      CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &interval));
+  if (VTSessionSetProperty(mSession,
+                           kVTCompressionPropertyKey_MaxKeyFrameInterval,
+                           cf) != noErr) {
+    return InitPromise::CreateAndReject(
+        MediaResult(
+            NS_ERROR_DOM_MEDIA_FATAL_ERR,
+            nsPrintfCString("fail to configurate keyframe interval:%" PRId64,
+                            interval)),
+        __func__);
+  }
+
+  if (mConfig.mCodecSpecific) {
+    const H264Specific& specific = mConfig.mCodecSpecific.ref();
+    if (!SetProfileLevel(mSession, specific.mProfileLevel)) {
+      return InitPromise::CreateAndReject(
+          MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
+                      nsPrintfCString("fail to configurate profile level:%d",
+                                      specific.mProfileLevel)),
+          __func__);
+    }
+  }
+
+  CFBooleanRef isUsingHW = nullptr;
+  status = VTSessionCopyProperty(
+      mSession, kVTCompressionPropertyKey_UsingHardwareAcceleratedVideoEncoder,
+      kCFAllocatorDefault, &isUsingHW);
+  mIsHardwareAccelerated = status == noErr && isUsingHW == kCFBooleanTrue;
+  if (isUsingHW) {
+    CFRelease(isUsingHW);
+  }
+
+  mError = NS_OK;
+  return InitPromise::CreateAndResolve(TrackInfo::TrackType::kVideoTrack,
+                                       __func__);
+}
+
+static Maybe<OSType> MapPixelFormat(MediaDataEncoder::PixelFormat aFormat) {
+  switch (aFormat) {
+    case MediaDataEncoder::PixelFormat::RGBA32:
+    case MediaDataEncoder::PixelFormat::BGRA32:
+      return Some(kCVPixelFormatType_32BGRA);
+    case MediaDataEncoder::PixelFormat::RGB24:
+      return Some(kCVPixelFormatType_24RGB);
+    case MediaDataEncoder::PixelFormat::BGR24:
+      return Some(kCVPixelFormatType_24BGR);
+    case MediaDataEncoder::PixelFormat::GRAY8:
+      return Some(kCVPixelFormatType_OneComponent8);
+    case MediaDataEncoder::PixelFormat::YUV444P:
+      return Some(kCVPixelFormatType_444YpCbCr8);
+    case MediaDataEncoder::PixelFormat::YUV420P:
+      return Some(kCVPixelFormatType_420YpCbCr8PlanarFullRange);
+    case MediaDataEncoder::PixelFormat::YUV420SP_NV12:
+      return Some(kCVPixelFormatType_420YpCbCr8BiPlanarFullRange);
+    default:
+      return Nothing();
+  }
+}
+
+CFDictionaryRef AppleVTEncoder::BuildSourceImageBufferAttributes() {
+  Maybe<OSType> fmt = MapPixelFormat(mConfig.mSourcePixelFormat);
+  if (fmt.isNothing()) {
+    VTENC_LOGE("unsupported source pixel format");
+    return nullptr;
+  }
+
+  // Source image buffer attributes
+  const void* keys[] = {kCVPixelBufferOpenGLCompatibilityKey,  // TODO
+                        kCVPixelBufferIOSurfacePropertiesKey,  // TODO
+                        kCVPixelBufferPixelFormatTypeKey};
+
+  AutoCFRelease<CFDictionaryRef> ioSurfaceProps(CFDictionaryCreate(
+      kCFAllocatorDefault, nullptr, nullptr, 0, &kCFTypeDictionaryKeyCallBacks,
+      &kCFTypeDictionaryValueCallBacks));
+  AutoCFRelease<CFNumberRef> pixelFormat(
+      CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &fmt));
+  const void* values[] = {kCFBooleanTrue, ioSurfaceProps, pixelFormat};
+
+  MOZ_ASSERT(ArrayLength(keys) == ArrayLength(values),
+             "Non matching keys/values array size");
+
+  return CFDictionaryCreate(kCFAllocatorDefault, keys, values,
+                            ArrayLength(keys), &kCFTypeDictionaryKeyCallBacks,
+                            &kCFTypeDictionaryValueCallBacks);
+}
+
+static bool IsKeyframe(CMSampleBufferRef aSample) {
+  CFArrayRef attachments = CMSampleBufferGetSampleAttachmentsArray(aSample, 0);
+  if (attachments == nullptr || CFArrayGetCount(attachments) == 0) {
+    return false;
+  }
+
+  return !CFDictionaryContainsKey(
+      static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(attachments, 0)),
+      kCMSampleAttachmentKey_NotSync);
+}
+
+static size_t GetNumParamSets(CMFormatDescriptionRef aDescription) {
+  size_t numParamSets = 0;
+  OSStatus status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(
+      aDescription, 0, nullptr, nullptr, &numParamSets, nullptr);
+  if (status != noErr) {
+    VTENC_LOGE("Cannot get number of parameter sets from format description");
+  }
+
+  return numParamSets;
+}
+
+static const uint8_t kNALUStart[4] = {0, 0, 0, 1};
+
+static size_t GetParamSet(CMFormatDescriptionRef aDescription, size_t aIndex,
+                          const uint8_t** aDataPtr) {
+  size_t length = 0;
+  int headerSize = 0;
+  if (CMVideoFormatDescriptionGetH264ParameterSetAtIndex(
+          aDescription, aIndex, aDataPtr, &length, nullptr, &headerSize) !=
+      noErr) {
+    VTENC_LOGE("fail to get parameter set from format description");
+    return 0;
+  }
+  MOZ_ASSERT(headerSize == sizeof(kNALUStart), "Only support 4 byte header");
+
+  return length;
+}
+
+static bool WriteSPSPPS(MediaRawData* aDst,
+                        CMFormatDescriptionRef aDescription) {
+  // Get SPS/PPS
+  const size_t numParamSets = GetNumParamSets(aDescription);
+  UniquePtr<MediaRawDataWriter> writer(aDst->CreateWriter());
+  for (size_t i = 0; i < numParamSets; i++) {
+    const uint8_t* data = nullptr;
+    size_t length = GetParamSet(aDescription, i, &data);
+    if (length == 0) {
+      return false;
+    }
+    if (!writer->Append(kNALUStart, sizeof(kNALUStart))) {
+      VTENC_LOGE("Cannot write NAL unit start code");
+      return false;
+    }
+    if (!writer->Append(data, length)) {
+      VTENC_LOGE("Cannot write parameter set");
+      return false;
+    }
+  }
+  return true;
+}
+
+static RefPtr<MediaByteBuffer> extractAvcc(
+    CMFormatDescriptionRef aDescription) {
+  CFPropertyListRef list = CMFormatDescriptionGetExtension(
+      aDescription,
+      kCMFormatDescriptionExtension_SampleDescriptionExtensionAtoms);
+  if (!list) {
+    VTENC_LOGE("fail to get atoms");
+    return nullptr;
+  }
+  CFDataRef avcC = static_cast<CFDataRef>(
+      CFDictionaryGetValue(static_cast<CFDictionaryRef>(list), CFSTR("avcC")));
+  if (!avcC) {
+    VTENC_LOGE("fail to extract avcC");
+    return nullptr;
+  }
+  CFIndex length = CFDataGetLength(avcC);
+  const UInt8* bytes = CFDataGetBytePtr(avcC);
+  if (length <= 0 || !bytes) {
+    VTENC_LOGE("empty avcC");
+    return nullptr;
+  }
+
+  RefPtr<MediaByteBuffer> config = new MediaByteBuffer(length);
+  config->AppendElements(bytes, length);
+  return config;
+}
+
+bool AppleVTEncoder::WriteExtraData(MediaRawData* aDst, CMSampleBufferRef aSrc,
+                                    const bool aAsAnnexB) {
+  if (!IsKeyframe(aSrc)) {
+    return true;
+  }
+
+  aDst->mKeyframe = true;
+  CMFormatDescriptionRef desc = CMSampleBufferGetFormatDescription(aSrc);
+  if (!desc) {
+    VTENC_LOGE("fail to get format description from sample");
+    return false;
+  }
+
+  if (aAsAnnexB) {
+    return WriteSPSPPS(aDst, desc);
+  }
+
+  RefPtr<MediaByteBuffer> avcc = extractAvcc(desc);
+  if (!avcc) {
+    return false;
+  }
+
+  if (!mAvcc || !H264::CompareExtraData(avcc, mAvcc)) {
+    mAvcc = avcc;
+    aDst->mExtraData = mAvcc;
+  }
+
+  return avcc != nullptr;
+}
+
+static bool WriteNALUs(MediaRawData* aDst, CMSampleBufferRef aSrc,
+                       bool aAsAnnexB = false) {
+  size_t srcRemaining = CMSampleBufferGetTotalSampleSize(aSrc);
+  CMBlockBufferRef block = CMSampleBufferGetDataBuffer(aSrc);
+  if (!block) {
+    VTENC_LOGE("Cannot get block buffer frome sample");
+    return false;
+  }
+  UniquePtr<MediaRawDataWriter> writer(aDst->CreateWriter());
+  size_t writtenLength = aDst->Size();
+  // Ensure capacity.
+  if (!writer->SetSize(writtenLength + srcRemaining)) {
+    VTENC_LOGE("Cannot allocate buffer");
+    return false;
+  }
+  size_t readLength = 0;
+  while (srcRemaining > 0) {
+    // Extract the size of next NAL unit
+    uint8_t unitSizeBytes[4];
+    MOZ_ASSERT(srcRemaining > sizeof(unitSizeBytes));
+    if (CMBlockBufferCopyDataBytes(block, readLength, sizeof(unitSizeBytes),
+                                   reinterpret_cast<uint32_t*>(
+                                       unitSizeBytes)) != kCMBlockBufferNoErr) {
+      VTENC_LOGE("Cannot copy unit size bytes");
+      return false;
+    }
+    size_t unitSize =
+        CFSwapInt32BigToHost(*reinterpret_cast<uint32_t*>(unitSizeBytes));
+
+    if (aAsAnnexB) {
+      // Replace unit size bytes with NALU start code.
+      PodCopy(writer->Data() + writtenLength, kNALUStart, sizeof(kNALUStart));
+      readLength += sizeof(unitSizeBytes);
+      srcRemaining -= sizeof(unitSizeBytes);
+      writtenLength += sizeof(kNALUStart);
+    } else {
+      // Copy unit size bytes + data.
+      unitSize += sizeof(unitSizeBytes);
+    }
+    MOZ_ASSERT(writtenLength + unitSize <= aDst->Size());
+    // Copy NAL unit data
+    if (CMBlockBufferCopyDataBytes(block, readLength, unitSize,
+                                   writer->Data() + writtenLength) !=
+        kCMBlockBufferNoErr) {
+      VTENC_LOGE("Cannot copy unit data");
+      return false;
+    }
+    readLength += unitSize;
+    srcRemaining -= unitSize;
+    writtenLength += unitSize;
+  }
+  MOZ_ASSERT(writtenLength == aDst->Size());
+  return true;
+}
+
+void AppleVTEncoder::OutputFrame(CMSampleBufferRef aBuffer) {
+  RefPtr<MediaRawData> output(new MediaRawData());
+
+  bool asAnnexB = mConfig.mUsage == Usage::Realtime;
+  bool succeeded = WriteExtraData(output, aBuffer, asAnnexB) &&
+                   WriteNALUs(output, aBuffer, asAnnexB);
+
+  output->mTime = media::TimeUnit::FromSeconds(
+      CMTimeGetSeconds(CMSampleBufferGetPresentationTimeStamp(aBuffer)));
+  output->mDuration = media::TimeUnit::FromSeconds(
+      CMTimeGetSeconds(CMSampleBufferGetOutputDuration(aBuffer)));
+  ProcessOutput(succeeded ? std::move(output) : nullptr);
+}
+
+void AppleVTEncoder::ProcessOutput(RefPtr<MediaRawData>&& aOutput) {
+  if (!mTaskQueue->IsCurrentThreadIn()) {
+    nsresult rv = mTaskQueue->Dispatch(NewRunnableMethod<RefPtr<MediaRawData>>(
+        "AppleVTEncoder::ProcessOutput", this, &AppleVTEncoder::ProcessOutput,
+        std::move(aOutput)));
+    MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+    Unused << rv;
+    return;
+  }
+  AssertOnTaskQueue();
+
+  if (aOutput) {
+    mEncodedData.AppendElement(std::move(aOutput));
+  } else {
+    mError = NS_ERROR_DOM_MEDIA_FATAL_ERR;
+  }
+}
+
+RefPtr<MediaDataEncoder::EncodePromise> AppleVTEncoder::Encode(
+    const MediaData* aSample) {
+  MOZ_ASSERT(aSample != nullptr);
+  RefPtr<const VideoData> sample(aSample->As<const VideoData>());
+
+  return InvokeAsync<RefPtr<const VideoData>>(mTaskQueue, this, __func__,
+                                              &AppleVTEncoder::ProcessEncode,
+                                              std::move(sample));
+}
+
+RefPtr<MediaDataEncoder::EncodePromise> AppleVTEncoder::ProcessEncode(
+    RefPtr<const VideoData> aSample) {
+  AssertOnTaskQueue();
+  MOZ_ASSERT(mSession);
+
+  if (NS_FAILED(mError)) {
+    return EncodePromise::CreateAndReject(mError, __func__);
+  }
+
+  AutoCVBufferRelease<CVImageBufferRef> buffer(
+      CreateCVPixelBuffer(aSample->mImage));
+  if (!buffer) {
+    return EncodePromise::CreateAndReject(NS_ERROR_OUT_OF_MEMORY, __func__);
+  }
+
+  CFDictionaryRef frameProps = nullptr;
+  if (aSample->mKeyframe) {
+    CFTypeRef keys[] = {kVTEncodeFrameOptionKey_ForceKeyFrame};
+    CFTypeRef values[] = {kCFBooleanTrue};
+    MOZ_ASSERT(ArrayLength(keys) == ArrayLength(values));
+    frameProps = CFDictionaryCreate(
+        kCFAllocatorDefault, keys, values, ArrayLength(keys),
+        &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
+  };
+
+  VTEncodeInfoFlags info;
+  OSStatus status = VTCompressionSessionEncodeFrame(
+      mSession, buffer,
+      CMTimeMake(aSample->mTime.ToMicroseconds(), USECS_PER_S),
+      CMTimeMake(aSample->mDuration.ToMicroseconds(), USECS_PER_S), frameProps,
+      nullptr /* sourceFrameRefcon */, &info);
+  if (status != noErr) {
+    return EncodePromise::CreateAndReject(NS_ERROR_DOM_MEDIA_FATAL_ERR,
+                                          __func__);
+  }
+
+  return EncodePromise::CreateAndResolve(std::move(mEncodedData), __func__);
+}
+
+static size_t NumberOfPlanes(MediaDataEncoder::PixelFormat aPixelFormat) {
+  switch (aPixelFormat) {
+    case MediaDataEncoder::PixelFormat::RGBA32:
+    case MediaDataEncoder::PixelFormat::BGRA32:
+    case MediaDataEncoder::PixelFormat::RGB24:
+    case MediaDataEncoder::PixelFormat::BGR24:
+    case MediaDataEncoder::PixelFormat::GRAY8:
+      return 1;
+    case MediaDataEncoder::PixelFormat::YUV444P:
+    case MediaDataEncoder::PixelFormat::YUV420P:
+      return 3;
+    case MediaDataEncoder::PixelFormat::YUV420SP_NV12:
+      return 2;
+    default:
+      VTENC_LOGE("Unsupported input pixel format");
+      return 0;
+  }
+}
+
+using namespace layers;
+
+static void ReleaseImage(void* aImageGrip, const void* aDataPtr,
+                         size_t aDataSize, size_t aNumOfPlanes,
+                         const void** aPlanes) {
+  (static_cast<PlanarYCbCrImage*>(aImageGrip))->Release();
+}
+
+CVPixelBufferRef AppleVTEncoder::CreateCVPixelBuffer(const Image* aSource) {
+  AssertOnTaskQueue();
+
+  // TODO: support types other than YUV
+  PlanarYCbCrImage* image = const_cast<Image*>(aSource)->AsPlanarYCbCrImage();
+  if (!image || !image->GetData()) {
+    return nullptr;
+  }
+
+  OSType format = MapPixelFormat(mConfig.mSourcePixelFormat).ref();
+  size_t numPlanes = NumberOfPlanes(mConfig.mSourcePixelFormat);
+  const PlanarYCbCrImage::Data* yuv = image->GetData();
+  if (!yuv) {
+    return nullptr;
+  }
+  auto ySize = yuv->YDataSize();
+  auto cbcrSize = yuv->CbCrDataSize();
+  void* addresses[3] = {};
+  size_t widths[3] = {};
+  size_t heights[3] = {};
+  size_t strides[3] = {};
+  switch (numPlanes) {
+    case 3:
+      addresses[2] = yuv->mCrChannel;
+      widths[2] = cbcrSize.width;
+      heights[2] = cbcrSize.height;
+      strides[2] = yuv->mCbCrStride;
+      [[fallthrough]];
+    case 2:
+      addresses[1] = yuv->mCbChannel;
+      widths[1] = cbcrSize.width;
+      heights[1] = cbcrSize.height;
+      strides[1] = yuv->mCbCrStride;
+      [[fallthrough]];
+    case 1:
+      addresses[0] = yuv->mYChannel;
+      widths[0] = ySize.width;
+      heights[0] = ySize.height;
+      strides[0] = yuv->mYStride;
+      break;
+    default:
+      return nullptr;
+  }
+
+  CVPixelBufferRef buffer = nullptr;
+  image->AddRef();  // Grip input buffers.
+  CVReturn rv = CVPixelBufferCreateWithPlanarBytes(
+      kCFAllocatorDefault, yuv->mPictureRect.width, yuv->mPictureRect.height,
+      format, nullptr /* dataPtr */, 0 /* dataSize */, numPlanes, addresses,
+      widths, heights, strides, ReleaseImage /* releaseCallback */,
+      image /* releaseRefCon */, nullptr /* pixelBufferAttributes */, &buffer);
+  if (rv == kCVReturnSuccess) {
+    return buffer;
+    // |image| will be released in |ReleaseImage()|.
+  } else {
+    image->Release();
+    return nullptr;
+  }
+}
+
+RefPtr<MediaDataEncoder::EncodePromise> AppleVTEncoder::Drain() {
+  return InvokeAsync(mTaskQueue, this, __func__, &AppleVTEncoder::ProcessDrain);
+}
+
+RefPtr<MediaDataEncoder::EncodePromise> AppleVTEncoder::ProcessDrain() {
+  AssertOnTaskQueue();
+  MOZ_ASSERT(mSession);
+
+  if (mFramesCompleted) {
+    MOZ_DIAGNOSTIC_ASSERT(mEncodedData.IsEmpty());
+    return EncodePromise::CreateAndResolve(EncodedData(), __func__);
+  }
+
+  OSStatus status =
+      VTCompressionSessionCompleteFrames(mSession, kCMTimeIndefinite);
+  if (status != noErr) {
+    return EncodePromise::CreateAndReject(NS_ERROR_DOM_MEDIA_FATAL_ERR,
+                                          __func__);
+  }
+  mFramesCompleted = true;
+  // VTCompressionSessionCompleteFrames() could have queued multiple tasks with
+  // the new drained frames. Dispatch a task after them to resolve the promise
+  // with those frames.
+  RefPtr<AppleVTEncoder> self = this;
+  return InvokeAsync(mTaskQueue, __func__, [self]() {
+    EncodedData pendingFrames(std::move(self->mEncodedData));
+    self->mEncodedData = EncodedData();
+    return EncodePromise::CreateAndResolve(std::move(pendingFrames), __func__);
+  });
+}
+
+RefPtr<ShutdownPromise> AppleVTEncoder::Shutdown() {
+  return InvokeAsync(mTaskQueue, this, __func__,
+                     &AppleVTEncoder::ProcessShutdown);
+}
+
+RefPtr<ShutdownPromise> AppleVTEncoder::ProcessShutdown() {
+  if (mSession) {
+    VTCompressionSessionInvalidate(mSession);
+    CFRelease(mSession);
+    mSession = nullptr;
+    mInited = false;
+  }
+  return ShutdownPromise::CreateAndResolve(true, __func__);
+}
+
+RefPtr<GenericPromise> AppleVTEncoder::SetBitrate(
+    MediaDataEncoder::Rate aBitsPerSec) {
+  RefPtr<AppleVTEncoder> self = this;
+  return InvokeAsync(mTaskQueue, __func__, [self, aBitsPerSec]() {
+    MOZ_ASSERT(self->mSession);
+    return SetAverageBitrate(self->mSession, aBitsPerSec)
+               ? GenericPromise::CreateAndResolve(true, __func__)
+               : GenericPromise::CreateAndReject(
+                     NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR, __func__);
+  });
+}
+
+}  // namespace mozilla
diff --git a/dom/media/platforms/apple/AppleVTEncoder.h b/dom/media/platforms/apple/AppleVTEncoder.h
new file mode 100644
index 0000000000..7f12f7ebb5
--- /dev/null
+++ b/dom/media/platforms/apple/AppleVTEncoder.h
@@ -0,0 +1,85 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_AppleVTEncoder_h_
+#define mozilla_AppleVTEncoder_h_
+
+#include <CoreMedia/CoreMedia.h>
+#include <VideoToolbox/VideoToolbox.h>
+
+#include "PlatformEncoderModule.h"
+#include "TimeUnits.h"
+
+namespace mozilla {
+
+namespace layers {
+class Image;
+}
+
+class AppleVTEncoder final : public MediaDataEncoder {
+ public:
+  using Config = H264Config;
+
+  AppleVTEncoder(const Config& aConfig, RefPtr<TaskQueue> aTaskQueue,
+                 const bool aHwardwareNotAllowed)
+      : mConfig(aConfig),
+        mTaskQueue(aTaskQueue),
+        mHardwareNotAllowed(aHwardwareNotAllowed),
+        mFramesCompleted(false),
+        mError(NS_OK),
+        mSession(nullptr) {
+    MOZ_ASSERT(mConfig.mSize.width > 0 && mConfig.mSize.height > 0);
+    MOZ_ASSERT(mTaskQueue);
+  }
+
+  RefPtr<InitPromise> Init() override;
+  RefPtr<EncodePromise> Encode(const MediaData* aSample) override;
+  RefPtr<EncodePromise> Drain() override;
+  RefPtr<ShutdownPromise> Shutdown() override;
+  RefPtr<GenericPromise> SetBitrate(Rate aBitsPerSec) override;
+
+  nsCString GetDescriptionName() const override {
+    MOZ_ASSERT(mSession);
+    return mIsHardwareAccelerated ? "apple hardware VT encoder"_ns
+                                  : "apple software VT encoder"_ns;
+  }
+
+  void OutputFrame(CMSampleBufferRef aBuffer);
+
+ private:
+  virtual ~AppleVTEncoder() { MOZ_ASSERT(!mSession); }
+  RefPtr<EncodePromise> ProcessEncode(RefPtr<const VideoData> aSample);
+  void ProcessOutput(RefPtr<MediaRawData>&& aOutput);
+  void ResolvePromise();
+  RefPtr<EncodePromise> ProcessDrain();
+  RefPtr<ShutdownPromise> ProcessShutdown();
+
+  CFDictionaryRef BuildSourceImageBufferAttributes();
+  CVPixelBufferRef CreateCVPixelBuffer(const layers::Image* aSource);
+  bool WriteExtraData(MediaRawData* aDst, CMSampleBufferRef aSrc,
+                      const bool aAsAnnexB);
+  void AssertOnTaskQueue() { MOZ_ASSERT(mTaskQueue->IsCurrentThreadIn()); }
+
+  const Config mConfig;
+  const RefPtr<TaskQueue> mTaskQueue;
+  const bool mHardwareNotAllowed;
+  // Access only in mTaskQueue.
+  EncodedData mEncodedData;
+  bool mFramesCompleted;
+  RefPtr<MediaByteBuffer> mAvcc;  // Stores latest avcC data.
+  MediaResult mError;
+
+  // Written by Init() but used only in task queue.
+  VTCompressionSessionRef mSession;
+  // Can be accessed on any thread, but only written on during init.
+  Atomic<bool> mIsHardwareAccelerated;
+  // Written during init and shutdown.
+  Atomic<bool> mInited;
+};
+
+}  // namespace mozilla
+
+#endif  // mozilla_AppleVTEncoder_h_
-- 
cgit v1.2.3