15 files changed, 2079 insertions, 215 deletions
diff --git a/dom/media/webcodecs/AudioData.cpp b/dom/media/webcodecs/AudioData.cpp
new file mode 100644
index 0000000000..0b21798be8
--- /dev/null
+++ b/dom/media/webcodecs/AudioData.cpp
@@ -0,0 +1,731 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Logging.h"
+#include "mozilla/dom/AudioData.h"
+#include "mozilla/dom/AudioDataBinding.h"
+#include "mozilla/dom/Promise.h"
+#include "mozilla/dom/StructuredCloneTags.h"
+#include "nsStringFwd.h"
+
+#include <utility>
+
+#include "AudioSampleFormat.h"
+#include "WebCodecsUtils.h"
+#include "js/StructuredClone.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/Result.h"
+
+extern mozilla::LazyLogModule gWebCodecsLog;
+
+namespace mozilla::dom {
+
+#ifdef LOG_INTERNAL
+#  undef LOG_INTERNAL
+#endif  // LOG_INTERNAL
+#define LOG_INTERNAL(level, msg, ...) \
+  MOZ_LOG(gWebCodecsLog, LogLevel::level, (msg, ##__VA_ARGS__))
+
+#ifdef LOGD
+#  undef LOGD
+#endif  // LOGD
+#define LOGD(msg, ...) LOG_INTERNAL(Debug, msg, ##__VA_ARGS__)
+
+#ifdef LOGE
+#  undef LOGE
+#endif  // LOGE
+#define LOGE(msg, ...) LOG_INTERNAL(Error, msg, ##__VA_ARGS__)
+
+// Only needed for refcounted objects.
+//
+NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE_CLASS(AudioData)
+NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(AudioData)
+  tmp->CloseIfNeeded();
+  NS_IMPL_CYCLE_COLLECTION_UNLINK(mParent)
+  NS_IMPL_CYCLE_COLLECTION_UNLINK_PRESERVED_WRAPPER
+NS_IMPL_CYCLE_COLLECTION_UNLINK_END
+NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(AudioData)
+  NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mParent)
+NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
+
+NS_IMPL_CYCLE_COLLECTING_ADDREF(AudioData)
+// AudioData should be released as soon as its refcount drops to zero,
+// without waiting for async deletion by the cycle collector, since it may hold
+// a large-size PCM buffer.
+NS_IMPL_CYCLE_COLLECTING_RELEASE_WITH_LAST_RELEASE(AudioData, CloseIfNeeded())
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(AudioData)
+  NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
+  NS_INTERFACE_MAP_ENTRY(nsISupports)
+NS_INTERFACE_MAP_END
+
+/*
+ * W3C Webcodecs AudioData implementation
+ */
+
+AudioData::AudioData(nsIGlobalObject* aParent,
+                     const AudioDataSerializedData& aData)
+    : mParent(aParent),
+      mTimestamp(aData.mTimestamp),
+      mNumberOfChannels(aData.mNumberOfChannels),
+      mNumberOfFrames(aData.mNumberOfFrames),
+      mSampleRate(aData.mSampleRate),
+      mAudioSampleFormat(aData.mAudioSampleFormat),
+      // The resource is not copied, but referenced
+      mResource(aData.mResource) {
+  MOZ_ASSERT(mParent);
+  MOZ_ASSERT(mResource,
+             "Resource should always be present then receiving a transfer.");
+}
+
+AudioData::AudioData(const AudioData& aOther)
+    : mParent(aOther.mParent),
+      mTimestamp(aOther.mTimestamp),
+      mNumberOfChannels(aOther.mNumberOfChannels),
+      mNumberOfFrames(aOther.mNumberOfFrames),
+      mSampleRate(aOther.mSampleRate),
+      mAudioSampleFormat(aOther.mAudioSampleFormat),
+      // The resource is not copied, but referenced
+      mResource(aOther.mResource) {
+  MOZ_ASSERT(mParent);
+}
+
+Result<already_AddRefed<AudioDataResource>, nsresult>
+AudioDataResource::Construct(
+    const OwningMaybeSharedArrayBufferViewOrMaybeSharedArrayBuffer& aInit) {
+  FallibleTArray<uint8_t> copied;
+  uint8_t* rv = ProcessTypedArraysFixed(
+      aInit, [&](const Span<uint8_t>& aData) -> uint8_t* {
+        return copied.AppendElements(aData.Elements(), aData.Length(),
+                                     fallible);
+      });
+  if (!rv) {
+    LOGE("AudioDataResource::Ctor: OOM");
+    return Err(NS_ERROR_OUT_OF_MEMORY);
+  }
+  return MakeAndAddRef<AudioDataResource>(std::move(copied));
+}
+
+AudioData::AudioData(
+    nsIGlobalObject* aParent,
+    already_AddRefed<mozilla::dom::AudioDataResource> aResource,
+    const AudioDataInit& aInit)
+    : mParent(aParent),
+      mTimestamp(aInit.mTimestamp),
+      mNumberOfChannels(aInit.mNumberOfChannels),
+      mNumberOfFrames(aInit.mNumberOfFrames),
+      mSampleRate(aInit.mSampleRate),
+      mAudioSampleFormat(Some(aInit.mFormat)),
+      mResource(std::move(aResource)) {
+  MOZ_ASSERT(mParent);
+}
+
+AudioData::AudioData(
+    nsIGlobalObject* aParent,
+    already_AddRefed<mozilla::dom::AudioDataResource> aResource,
+    int64_t aTimestamp, uint32_t aNumberOfChannels, uint32_t aNumberOfFrames,
+    float aSampleRate, AudioSampleFormat aAudioSampleFormat)
+    : mParent(aParent),
+      mTimestamp(aTimestamp),
+      mNumberOfChannels(aNumberOfChannels),
+      mNumberOfFrames(aNumberOfFrames),
+      mSampleRate(aSampleRate),
+      mAudioSampleFormat(Some(aAudioSampleFormat)),
+      mResource(aResource) {
+  MOZ_ASSERT(mParent);
+}
+
+nsIGlobalObject* AudioData::GetParentObject() const {
+  AssertIsOnOwningThread();
+
+  return mParent.get();
+}
+
+JSObject* AudioData::WrapObject(JSContext* aCx,
+                                JS::Handle<JSObject*> aGivenProto) {
+  AssertIsOnOwningThread();
+
+  return AudioData_Binding::Wrap(aCx, this, aGivenProto);
+}
+
+uint32_t BytesPerSamples(const mozilla::dom::AudioSampleFormat& aFormat) {
+  switch (aFormat) {
+    case AudioSampleFormat::U8:
+    case AudioSampleFormat::U8_planar:
+      return sizeof(uint8_t);
+    case AudioSampleFormat::S16:
+    case AudioSampleFormat::S16_planar:
+      return sizeof(int16_t);
+    case AudioSampleFormat::S32:
+    case AudioSampleFormat::F32:
+    case AudioSampleFormat::S32_planar:
+    case AudioSampleFormat::F32_planar:
+      return sizeof(float);
+    default:
+      MOZ_ASSERT_UNREACHABLE("wrong enum value");
+  }
+  return 0;
+}
+
+Result<Ok, nsCString> IsValidAudioDataInit(const AudioDataInit& aInit) {
+  if (aInit.mSampleRate <= 0.0) {
+    auto msg = nsLiteralCString("sampleRate must be positive");
+    LOGD("%s", msg.get());
+    return Err(msg);
+  }
+  if (aInit.mNumberOfFrames == 0) {
+    auto msg = nsLiteralCString("mNumberOfFrames must be positive");
+    LOGD("%s", msg.get());
+    return Err(msg);
+  }
+  if (aInit.mNumberOfChannels == 0) {
+    auto msg = nsLiteralCString("mNumberOfChannels must be positive");
+    LOGD("%s", msg.get());
+    return Err(msg);
+  }
+
+  uint64_t totalSamples = aInit.mNumberOfFrames * aInit.mNumberOfChannels;
+  uint32_t bytesPerSamples = BytesPerSamples(aInit.mFormat);
+  uint64_t totalSize = totalSamples * bytesPerSamples;
+  uint64_t arraySizeBytes = ProcessTypedArraysFixed(
+      aInit.mData, [&](const Span<uint8_t>& aData) -> uint64_t {
+        return aData.LengthBytes();
+      });
+  if (arraySizeBytes < totalSize) {
+    auto msg =
+        nsPrintfCString("Array of size %" PRIu64
+                        " not big enough, should be at least %" PRIu64 " bytes",
+                        arraySizeBytes, totalSize);
+    LOGD("%s", msg.get());
+    return Err(msg);
+  }
+  return Ok();
+}
+
+const char* FormatToString(AudioSampleFormat aFormat) {
+  switch (aFormat) {
+    case AudioSampleFormat::U8:
+      return "u8";
+    case AudioSampleFormat::S16:
+      return "s16";
+    case AudioSampleFormat::S32:
+      return "s32";
+    case AudioSampleFormat::F32:
+      return "f32";
+    case AudioSampleFormat::U8_planar:
+      return "u8-planar";
+    case AudioSampleFormat::S16_planar:
+      return "s16-planar";
+    case AudioSampleFormat::S32_planar:
+      return "s32-planar";
+    case AudioSampleFormat::F32_planar:
+      return "f32-planar";
+    default:
+      MOZ_ASSERT_UNREACHABLE("wrong enum value");
+  }
+  return "unsupported";
+}
+
+/* static */
+already_AddRefed<AudioData> AudioData::Constructor(const GlobalObject& aGlobal,
+                                                   const AudioDataInit& aInit,
+                                                   ErrorResult& aRv) {
+  nsCOMPtr<nsIGlobalObject> global = do_QueryInterface(aGlobal.GetAsSupports());
+  LOGD("[%p] AudioData(fmt: %s, rate: %f, ch: %" PRIu32 ", ts: %" PRId64 ")",
+       global.get(), FormatToString(aInit.mFormat), aInit.mSampleRate,
+       aInit.mNumberOfChannels, aInit.mTimestamp);
+  if (!global) {
+    LOGE("Global unavailable");
+    aRv.Throw(NS_ERROR_FAILURE);
+    return nullptr;
+  }
+  nsString errorMessage;
+  auto rv = IsValidAudioDataInit(aInit);
+  if (rv.isErr()) {
+    LOGD("AudioData::Constructor failure (IsValidAudioDataInit)");
+    aRv.ThrowTypeError(rv.inspectErr());
+    return nullptr;
+  }
+  auto resource = AudioDataResource::Construct(aInit.mData);
+  if (resource.isErr()) {
+    LOGD("AudioData::Constructor failure (OOM)");
+    aRv.Throw(NS_ERROR_OUT_OF_MEMORY);
+    return nullptr;
+  }
+
+  return MakeAndAddRef<mozilla::dom::AudioData>(global, resource.unwrap(),
+                                                aInit);
+}
+
+// https://w3c.github.io/webcodecs/#dom-audiodata-format
+Nullable<mozilla::dom::AudioSampleFormat> AudioData::GetFormat() const {
+  AssertIsOnOwningThread();
+  return MaybeToNullable(mAudioSampleFormat);
+}
+
+// https://w3c.github.io/webcodecs/#dom-audiodata-samplerate
+float AudioData::SampleRate() const {
+  AssertIsOnOwningThread();
+  return mSampleRate;
+}
+
+// https://w3c.github.io/webcodecs/#dom-audiodata-numberofframes
+uint32_t AudioData::NumberOfFrames() const {
+  AssertIsOnOwningThread();
+  return mNumberOfFrames;
+}
+
+// https://w3c.github.io/webcodecs/#dom-audiodata-numberofchannels
+uint32_t AudioData::NumberOfChannels() const {
+  AssertIsOnOwningThread();
+  return mNumberOfChannels;
+}
+
+// https://w3c.github.io/webcodecs/#dom-audiodata-duration
+uint64_t AudioData::Duration() const {
+  AssertIsOnOwningThread();
+  // The spec isn't clear in which direction to convert to integer.
+  // https://github.com/w3c/webcodecs/issues/726
+  return static_cast<uint64_t>(
+      static_cast<float>(USECS_PER_S * mNumberOfFrames) / mSampleRate);
+}
+
+// https://w3c.github.io/webcodecs/#dom-audiodata-timestamp
+int64_t AudioData::Timestamp() const {
+  AssertIsOnOwningThread();
+  return mTimestamp;
+}
+
+struct CopyToSpec {
+  CopyToSpec(uint32_t aFrameCount, uint32_t aFrameOffset, uint32_t mPlaneIndex,
+             AudioSampleFormat aFormat)
+      : mFrameCount(aFrameCount),
+        mFrameOffset(aFrameOffset),
+        mPlaneIndex(mPlaneIndex),
+        mFormat(aFormat) {}
+
+  const uint32_t mFrameCount;
+  const uint32_t mFrameOffset;
+  const uint32_t mPlaneIndex;
+  const AudioSampleFormat mFormat;
+};
+
+bool IsInterleaved(const AudioSampleFormat& aFormat) {
+  switch (aFormat) {
+    case AudioSampleFormat::U8:
+    case AudioSampleFormat::S16:
+    case AudioSampleFormat::S32:
+    case AudioSampleFormat::F32:
+      return true;
+    case AudioSampleFormat::U8_planar:
+    case AudioSampleFormat::S16_planar:
+    case AudioSampleFormat::S32_planar:
+    case AudioSampleFormat::F32_planar:
+      return false;
+  };
+  MOZ_ASSERT_UNREACHABLE("Invalid enum value");
+  return false;
+}
+
+size_t AudioData::ComputeCopyElementCount(
+    const AudioDataCopyToOptions& aOptions, ErrorResult& aRv) {
+  // https://w3c.github.io/webcodecs/#compute-copy-element-count
+  // 1, 2
+  auto destFormat = mAudioSampleFormat;
+  if (aOptions.mFormat.WasPassed()) {
+    destFormat = OptionalToMaybe(aOptions.mFormat);
+  }
+  // 3, 4
+  MOZ_ASSERT(destFormat.isSome());
+  if (IsInterleaved(destFormat.value())) {
+    if (aOptions.mPlaneIndex > 0) {
+      auto msg = "Interleaved format, but plane index > 0"_ns;
+      LOGD("%s", msg.get());
+      aRv.ThrowRangeError(msg);
+      return 0;
+    }
+  } else {
+    if (aOptions.mPlaneIndex >= mNumberOfChannels) {
+      auto msg = nsPrintfCString(
+          "Plane index %" PRIu32
+          " greater or equal than the number of channels %" PRIu32,
+          aOptions.mPlaneIndex, mNumberOfChannels);
+      LOGD("%s", msg.get());
+      aRv.ThrowRangeError(msg);
+      return 0;
+    }
+  }
+  // 5 -- conversion between all formats supported
+  // 6 -- all planes have the same number of frames, always
+  uint64_t frameCount = mNumberOfFrames;
+  // 7
+  if (aOptions.mFrameOffset >= frameCount) {
+    auto msg = nsPrintfCString("Frame offset of %" PRIu32
+                               " greater or equal than frame count %" PRIu64,
+                               aOptions.mFrameOffset, frameCount);
+    LOGD("%s", msg.get());
+    aRv.ThrowRangeError(msg);
+    return 0;
+  }
+  // 8, 9
+  uint64_t copyFrameCount = frameCount - aOptions.mFrameOffset;
+  if (aOptions.mFrameCount.WasPassed()) {
+    if (aOptions.mFrameCount.Value() > copyFrameCount) {
+      auto msg = nsPrintfCString(
+          "Passed copy frame count of %" PRIu32
+          " greater than available source frames for copy of %" PRIu64,
+          aOptions.mFrameCount.Value(), copyFrameCount);
+      LOGD("%s", msg.get());
+      aRv.ThrowRangeError(msg);
+      return 0;
+    }
+    copyFrameCount = aOptions.mFrameCount.Value();
+  }
+  // 10, 11
+  uint64_t elementCount = copyFrameCount;
+  if (IsInterleaved(destFormat.value())) {
+    elementCount *= mNumberOfChannels;
+  }
+  return elementCount;
+}
+
+// https://w3c.github.io/webcodecs/#dom-audiodata-allocationsize
+// This method returns an int, that can be zero in case of success or error.
+// Caller should check aRv to determine success or error.
+uint32_t AudioData::AllocationSize(const AudioDataCopyToOptions& aOptions,
+                                   ErrorResult& aRv) {
+  AssertIsOnOwningThread();
+  if (!mResource) {
+    auto msg = "allocationSize called on detached AudioData"_ns;
+    LOGD("%s", msg.get());
+    aRv.ThrowInvalidStateError(msg);
+    return 0;
+  }
+  size_t copyElementCount = ComputeCopyElementCount(aOptions, aRv);
+  if (aRv.Failed()) {
+    LOGD("AudioData::AllocationSize failure");
+    // ComputeCopyElementCount has set the exception type.
+    return 0;
+  }
+  Maybe<mozilla::dom::AudioSampleFormat> destFormat = mAudioSampleFormat;
+  if (aOptions.mFormat.WasPassed()) {
+    destFormat = OptionalToMaybe(aOptions.mFormat);
+  }
+  if (destFormat.isNothing()) {
+    auto msg = "AudioData has an unknown format"_ns;
+    LOGD("%s", msg.get());
+    // See https://github.com/w3c/webcodecs/issues/727 -- it isn't clear yet
+    // what to do here
+    aRv.ThrowRangeError(msg);
+    return 0;
+  }
+  CheckedInt<size_t> bytesPerSample = BytesPerSamples(destFormat.ref());
+
+  auto res = bytesPerSample * copyElementCount;
+  if (res.isValid()) {
+    return res.value();
+  }
+  aRv.ThrowRangeError("Allocation size too large");
+  return 0;
+}
+
+template <typename S, typename D>
+void CopySamples(Span<S> aSource, Span<D> aDest, uint32_t aSourceChannelCount,
+                 const AudioSampleFormat aSourceFormat,
+                 const CopyToSpec& aCopyToSpec) {
+  if (IsInterleaved(aSourceFormat) && IsInterleaved(aCopyToSpec.mFormat)) {
+    MOZ_ASSERT(aCopyToSpec.mPlaneIndex == 0);
+    MOZ_ASSERT(aDest.Length() >= aCopyToSpec.mFrameCount);
+    MOZ_ASSERT(aSource.Length() - aCopyToSpec.mFrameOffset >=
+               aCopyToSpec.mFrameCount);
+    // This turns into a regular memcpy if the types are in fact equal
+    ConvertAudioSamples(aSource.data() + aCopyToSpec.mFrameOffset, aDest.data(),
+                        aCopyToSpec.mFrameCount * aSourceChannelCount);
+    return;
+  }
+  if (IsInterleaved(aSourceFormat) && !IsInterleaved(aCopyToSpec.mFormat)) {
+    DebugOnly<size_t> sourceFrameCount = aSource.Length() / aSourceChannelCount;
+    MOZ_ASSERT(aDest.Length() >= aCopyToSpec.mFrameCount);
+    MOZ_ASSERT(aSource.Length() - aCopyToSpec.mFrameOffset >=
+               aCopyToSpec.mFrameCount);
+    // Interleaved to planar -- only copy samples of the correct channel to the
+    // destination
+    size_t readIndex = aCopyToSpec.mFrameOffset * aSourceChannelCount +
+                       aCopyToSpec.mPlaneIndex;
+    for (size_t i = 0; i < aCopyToSpec.mFrameCount; i++) {
+      aDest[i] = ConvertAudioSample<D>(aSource[readIndex]);
+      readIndex += aSourceChannelCount;
+    }
+    return;
+  }
+
+  if (!IsInterleaved(aSourceFormat) && IsInterleaved(aCopyToSpec.mFormat)) {
+    MOZ_CRASH("This should never be hit -- current spec doesn't support it");
+    // Planar to interleaved -- copy of all channels of the source into the
+    // destination buffer.
+    MOZ_ASSERT(aCopyToSpec.mPlaneIndex == 0);
+    MOZ_ASSERT(aDest.Length() >= aCopyToSpec.mFrameCount * aSourceChannelCount);
+    MOZ_ASSERT(aSource.Length() -
+                   aCopyToSpec.mFrameOffset * aSourceChannelCount >=
+               aCopyToSpec.mFrameCount * aSourceChannelCount);
+    size_t writeIndex = 0;
+    // Scan the source linearly and put each sample at the right position in the
+    // destination interleaved buffer.
+    size_t readIndex = 0;
+    for (size_t channel = 0; channel < aSourceChannelCount; channel++) {
+      writeIndex = channel;
+      for (size_t i = 0; i < aCopyToSpec.mFrameCount; i++) {
+        aDest[writeIndex] = ConvertAudioSample<D>(aSource[readIndex]);
+        readIndex++;
+        writeIndex += aSourceChannelCount;
+      }
+    }
+    return;
+  }
+  if (!IsInterleaved(aSourceFormat) && !IsInterleaved(aCopyToSpec.mFormat)) {
+    // Planar to Planar / convert + copy from the right index in the source.
+    size_t offset =
+        aCopyToSpec.mPlaneIndex * aSource.Length() / aSourceChannelCount;
+    MOZ_ASSERT(aDest.Length() >= aSource.Length() / aSourceChannelCount -
+                                     aCopyToSpec.mFrameOffset);
+    for (uint32_t i = 0; i < aCopyToSpec.mFrameCount; i++) {
+      aDest[i] =
+          ConvertAudioSample<D>(aSource[offset + aCopyToSpec.mFrameOffset + i]);
+    }
+  }
+}
+
+nsCString AudioData::ToString() const {
+  if (!mResource) {
+    return nsCString("AudioData[detached]");
+  }
+  return nsPrintfCString("AudioData[%zu bytes %s %fHz %" PRIu32 "x%" PRIu32
+                         "ch]",
+                         mResource->Data().LengthBytes(),
+                         FormatToString(mAudioSampleFormat.value()),
+                         mSampleRate, mNumberOfFrames, mNumberOfChannels);
+}
+
+nsCString CopyToToString(size_t aDestBufSize,
+                         const AudioDataCopyToOptions& aOptions) {
+  return nsPrintfCString(
+      "AudioDataCopyToOptions[data: %zu bytes %s frame count:%" PRIu32
+      " frame offset: %" PRIu32 "  plane: %" PRIu32 "]",
+      aDestBufSize,
+      aOptions.mFormat.WasPassed() ? FormatToString(aOptions.mFormat.Value())
+                                   : "null",
+      aOptions.mFrameCount.WasPassed() ? aOptions.mFrameCount.Value() : 0,
+      aOptions.mFrameOffset, aOptions.mPlaneIndex);
+}
+
+using DataSpanType =
+    Variant<Span<uint8_t>, Span<int16_t>, Span<int32_t>, Span<float>>;
+
+DataSpanType GetDataSpan(Span<uint8_t> aSpan, const AudioSampleFormat aFormat) {
+  const size_t Length = aSpan.Length() / BytesPerSamples(aFormat);
+  // TODO: Check size so Span can be reasonably constructed?
+  switch (aFormat) {
+    case AudioSampleFormat::U8:
+    case AudioSampleFormat::U8_planar:
+      return AsVariant(aSpan);
+    case AudioSampleFormat::S16:
+    case AudioSampleFormat::S16_planar:
+      return AsVariant(Span(reinterpret_cast<int16_t*>(aSpan.data()), Length));
+    case AudioSampleFormat::S32:
+    case AudioSampleFormat::S32_planar:
+      return AsVariant(Span(reinterpret_cast<int32_t*>(aSpan.data()), Length));
+    case AudioSampleFormat::F32:
+    case AudioSampleFormat::F32_planar:
+      return AsVariant(Span(reinterpret_cast<float*>(aSpan.data()), Length));
+  }
+  MOZ_ASSERT_UNREACHABLE("Invalid enum value");
+  return AsVariant(aSpan);
+}
+
+void CopySamples(DataSpanType& aSource, DataSpanType& aDest,
+                 uint32_t aSourceChannelCount,
+                 const AudioSampleFormat aSourceFormat,
+                 const CopyToSpec& aCopyToSpec) {
+  aSource.match([&](auto& src) {
+    aDest.match([&](auto& dst) {
+      CopySamples(src, dst, aSourceChannelCount, aSourceFormat, aCopyToSpec);
+    });
+  });
+}
+
+void DoCopy(Span<uint8_t> aSource, Span<uint8_t> aDest,
+            const uint32_t aSourceChannelCount,
+            const AudioSampleFormat aSourceFormat,
+            const CopyToSpec& aCopyToSpec) {
+  DataSpanType source = GetDataSpan(aSource, aSourceFormat);
+  DataSpanType dest = GetDataSpan(aDest, aCopyToSpec.mFormat);
+  CopySamples(source, dest, aSourceChannelCount, aSourceFormat, aCopyToSpec);
+}
+
+// https://w3c.github.io/webcodecs/#dom-audiodata-copyto
+void AudioData::CopyTo(
+    const MaybeSharedArrayBufferViewOrMaybeSharedArrayBuffer& aDestination,
+    const AudioDataCopyToOptions& aOptions, ErrorResult& aRv) {
+  AssertIsOnOwningThread();
+
+  size_t destLength = ProcessTypedArraysFixed(
+      aDestination, [&](const Span<uint8_t>& aData) -> size_t {
+        return aData.LengthBytes();
+      });
+
+  LOGD("AudioData::CopyTo %s -> %s", ToString().get(),
+       CopyToToString(destLength, aOptions).get());
+
+  if (!mResource) {
+    auto msg = "copyTo called on closed AudioData"_ns;
+    LOGD("%s", msg.get());
+    aRv.ThrowInvalidStateError(msg);
+    return;
+  }
+
+  uint64_t copyElementCount = ComputeCopyElementCount(aOptions, aRv);
+  if (aRv.Failed()) {
+    LOGD("AudioData::CopyTo failed in ComputeCopyElementCount");
+    return;
+  }
+  auto destFormat = mAudioSampleFormat;
+  if (aOptions.mFormat.WasPassed()) {
+    destFormat = OptionalToMaybe(aOptions.mFormat);
+  }
+
+  uint32_t bytesPerSample = BytesPerSamples(destFormat.value());
+  CheckedInt<uint32_t> copyLength = bytesPerSample;
+  copyLength *= copyElementCount;
+  if (copyLength.value() > destLength) {
+    auto msg = nsPrintfCString(
+        "destination buffer of length %zu too small for copying %" PRIu64
+        "  elements",
+        destLength, bytesPerSample * copyElementCount);
+    LOGD("%s", msg.get());
+    aRv.ThrowRangeError(msg);
+    return;
+  }
+
+  uint32_t framesToCopy = mNumberOfFrames - aOptions.mFrameOffset;
+  if (aOptions.mFrameCount.WasPassed()) {
+    framesToCopy = aOptions.mFrameCount.Value();
+  }
+
+  CopyToSpec copyToSpec(framesToCopy, aOptions.mFrameOffset,
+                        aOptions.mPlaneIndex, destFormat.value());
+
+  // Now a couple layers of macros to type the pointers and perform the actual
+  // copy.
+  ProcessTypedArraysFixed(aDestination, [&](const Span<uint8_t>& aData) {
+    DoCopy(mResource->Data(), aData, mNumberOfChannels,
+           mAudioSampleFormat.value(), copyToSpec);
+  });
+}
+
+// https://w3c.github.io/webcodecs/#dom-audiodata-clone
+already_AddRefed<AudioData> AudioData::Clone(ErrorResult& aRv) {
+  AssertIsOnOwningThread();
+
+  if (!mResource) {
+    auto msg = "No media resource in the AudioData now"_ns;
+    LOGD("%s", msg.get());
+    aRv.ThrowInvalidStateError(msg);
+    return nullptr;
+  }
+
+  return MakeAndAddRef<AudioData>(*this);
+}
+
+// https://w3c.github.io/webcodecs/#close-audiodata
+void AudioData::Close() {
+  AssertIsOnOwningThread();
+
+  mResource = nullptr;
+  mSampleRate = 0;
+  mNumberOfFrames = 0;
+  mNumberOfChannels = 0;
+  mAudioSampleFormat = Nothing();
+}
+
+// https://w3c.github.io/webcodecs/#ref-for-deserialization-steps%E2%91%A1
+/* static */
+JSObject* AudioData::ReadStructuredClone(JSContext* aCx,
+                                         nsIGlobalObject* aGlobal,
+                                         JSStructuredCloneReader* aReader,
+                                         const AudioDataSerializedData& aData) {
+  JS::Rooted<JS::Value> value(aCx, JS::NullValue());
+  // To avoid a rooting hazard error from returning a raw JSObject* before
+  // running the RefPtr destructor, RefPtr needs to be destructed before
+  // returning the raw JSObject*, which is why the RefPtr<AudioData> is created
+  // in the scope below. Otherwise, the static analysis infers the RefPtr cannot
+  // be safely destructed while the unrooted return JSObject* is on the stack.
+  {
+    RefPtr<AudioData> frame = MakeAndAddRef<AudioData>(aGlobal, aData);
+    if (!GetOrCreateDOMReflector(aCx, frame, &value) || !value.isObject()) {
+      LOGE("GetOrCreateDOMReflect failure");
+      return nullptr;
+    }
+  }
+  return value.toObjectOrNull();
+}
+
+// https://w3c.github.io/webcodecs/#ref-for-audiodata%E2%91%A2%E2%91%A2
+bool AudioData::WriteStructuredClone(JSStructuredCloneWriter* aWriter,
+                                     StructuredCloneHolder* aHolder) const {
+  AssertIsOnOwningThread();
+
+  // AudioData closed
+  if (!mResource) {
+    LOGD("AudioData was already close in WriteStructuredClone");
+    return false;
+  }
+  const uint32_t index = aHolder->AudioData().Length();
+  // https://github.com/w3c/webcodecs/issues/717
+  // For now, serialization is only allowed in the same address space, it's OK
+  // to send a refptr here instead of copying the backing buffer.
+  aHolder->AudioData().AppendElement(AudioDataSerializedData(*this));
+
+  return !NS_WARN_IF(!JS_WriteUint32Pair(aWriter, SCTAG_DOM_AUDIODATA, index));
+}
+
+// https://w3c.github.io/webcodecs/#ref-for-transfer-steps
+UniquePtr<AudioData::TransferredData> AudioData::Transfer() {
+  AssertIsOnOwningThread();
+
+  if (!mResource) {
+    // Closed
+    LOGD("AudioData was already close in Transfer");
+    return nullptr;
+  }
+
+  // This adds a ref to the resource
+  auto serialized = MakeUnique<AudioDataSerializedData>(*this);
+  // This removes the ref to the resource, effectively transfering the backing
+  // storage.
+  Close();
+  return serialized;
+}
+
+// https://w3c.github.io/webcodecs/#ref-for-transfer-receiving-steps
+/* static */
+already_AddRefed<AudioData> AudioData::FromTransferred(nsIGlobalObject* aGlobal,
+                                                       TransferredData* aData) {
+  MOZ_ASSERT(aData);
+
+  return MakeAndAddRef<AudioData>(aGlobal, *aData);
+}
+
+void AudioData::CloseIfNeeded() {
+  if (mResource) {
+    mResource = nullptr;
+  }
+}
+
+#undef LOGD
+#undef LOGE
+#undef LOG_INTERNAL
+
+}  // namespace mozilla::dom
diff --git a/dom/media/webcodecs/AudioData.h b/dom/media/webcodecs/AudioData.h
new file mode 100644
index 0000000000..4ae69a225a
--- /dev/null
+++ b/dom/media/webcodecs/AudioData.h
@@ -0,0 +1,176 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_AudioData_h
+#define mozilla_dom_AudioData_h
+
+#include "MediaData.h"
+#include "WebCodecsUtils.h"
+#include "js/TypeDecls.h"
+#include "mozilla/ErrorResult.h"
+#include "mozilla/Span.h"
+#include "mozilla/dom/AudioDataBinding.h"
+#include "mozilla/dom/BindingDeclarations.h"
+#include "mozilla/dom/StructuredCloneHolder.h"
+#include "nsCycleCollectionParticipant.h"
+#include "nsWrapperCache.h"
+
+class nsIGlobalObject;
+class nsIURI;
+
+namespace mozilla::dom {
+
+class MaybeSharedArrayBufferViewOrMaybeSharedArrayBuffer;
+class OwningMaybeSharedArrayBufferViewOrMaybeSharedArrayBuffer;
+class Promise;
+struct AudioDataBufferInit;
+struct AudioDataCopyToOptions;
+struct AudioDataInit;
+
+}  // namespace mozilla::dom
+
+namespace mozilla::dom {
+
+class AudioData;
+class AudioDataResource;
+struct AudioDataSerializedData;
+
+class AudioData final : public nsISupports, public nsWrapperCache {
+ public:
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_WRAPPERCACHE_CLASS(AudioData)
+
+ public:
+  AudioData(nsIGlobalObject* aParent, const AudioDataSerializedData& aData);
+  AudioData(nsIGlobalObject* aParent,
+            already_AddRefed<AudioDataResource> aResource,
+            const AudioDataInit& aInit);
+  AudioData(nsIGlobalObject* aParent,
+            already_AddRefed<mozilla::dom::AudioDataResource> aResource,
+            int64_t aTimestamp, uint32_t aNumberOfChannels,
+            uint32_t aNumberOfFrames, float aSampleRate,
+            AudioSampleFormat aAudioSampleFormat);
+  AudioData(const AudioData& aOther);
+
+ protected:
+  ~AudioData() = default;
+
+ public:
+  nsIGlobalObject* GetParentObject() const;
+
+  JSObject* WrapObject(JSContext* aCx,
+                       JS::Handle<JSObject*> aGivenProto) override;
+
+  static already_AddRefed<AudioData> Constructor(const GlobalObject& aGlobal,
+                                                 const AudioDataInit& aInit,
+                                                 ErrorResult& aRv);
+
+  Nullable<mozilla::dom::AudioSampleFormat> GetFormat() const;
+
+  float SampleRate() const;
+
+  uint32_t NumberOfFrames() const;
+
+  uint32_t NumberOfChannels() const;
+
+  uint64_t Duration() const;  // microseconds
+
+  int64_t Timestamp() const;  // microseconds
+
+  uint32_t AllocationSize(const AudioDataCopyToOptions& aOptions,
+                          ErrorResult& aRv);
+
+  void CopyTo(
+      const MaybeSharedArrayBufferViewOrMaybeSharedArrayBuffer& aDestination,
+      const AudioDataCopyToOptions& aOptions, ErrorResult& aRv);
+
+  already_AddRefed<AudioData> Clone(ErrorResult& aRv);
+
+  void Close();
+
+  // [Serializable] implementations: {Read, Write}StructuredClone
+  static JSObject* ReadStructuredClone(JSContext* aCx, nsIGlobalObject* aGlobal,
+                                       JSStructuredCloneReader* aReader,
+                                       const AudioDataSerializedData& aData);
+
+  bool WriteStructuredClone(JSStructuredCloneWriter* aWriter,
+                            StructuredCloneHolder* aHolder) const;
+
+  // [Transferable] implementations: Transfer, FromTransferred
+  using TransferredData = AudioDataSerializedData;
+
+  UniquePtr<TransferredData> Transfer();
+
+  static already_AddRefed<AudioData> FromTransferred(nsIGlobalObject* aGlobal,
+                                                     TransferredData* aData);
+
+ private:
+  size_t ComputeCopyElementCount(const AudioDataCopyToOptions& aOptions,
+                                 ErrorResult& aRv);
+
+  nsCString ToString() const;
+  // AudioData can run on either main thread or worker thread.
+  void AssertIsOnOwningThread() const { NS_ASSERT_OWNINGTHREAD(AudioData); }
+  void CloseIfNeeded();
+
+  nsCOMPtr<nsIGlobalObject> mParent;
+
+  friend struct AudioDataSerializedData;
+
+  int64_t mTimestamp;
+  uint32_t mNumberOfChannels;
+  uint32_t mNumberOfFrames;
+  float mSampleRate;
+  Maybe<AudioSampleFormat> mAudioSampleFormat;
+  RefPtr<mozilla::dom::AudioDataResource> mResource;
+};
+
+class AudioDataResource final {
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(AudioDataResource);
+  explicit AudioDataResource(FallibleTArray<uint8_t>&& aData)
+      : mData(std::move(aData)) {}
+
+  explicit AudioDataResource() : mData() {}
+
+  static AudioDataResource* Create(const Span<uint8_t>& aData) {
+    AudioDataResource* resource = new AudioDataResource();
+    if (!resource->mData.AppendElements(aData, mozilla::fallible_t())) {
+      return nullptr;
+    }
+    return resource;
+  }
+
+  static Result<already_AddRefed<AudioDataResource>, nsresult> Construct(
+      const OwningMaybeSharedArrayBufferViewOrMaybeSharedArrayBuffer& aInit);
+
+  Span<uint8_t> Data() { return Span(mData.Elements(), mData.Length()); };
+
+ private:
+  ~AudioDataResource() = default;
+  // It's always possible for the allocation to fail -- the size is
+  // controled by script.
+  FallibleTArray<uint8_t> mData;
+};
+
+struct AudioDataSerializedData {
+  explicit AudioDataSerializedData(const AudioData& aFrom)
+      : mTimestamp(aFrom.Timestamp()),
+        mNumberOfChannels(aFrom.NumberOfChannels()),
+        mNumberOfFrames(aFrom.NumberOfFrames()),
+        mSampleRate(aFrom.SampleRate()),
+        mAudioSampleFormat(NullableToMaybe(aFrom.GetFormat())),
+        mResource(aFrom.mResource) {}
+  int64_t mTimestamp{};
+  uint32_t mNumberOfChannels{};
+  uint32_t mNumberOfFrames{};
+  float mSampleRate{};
+  Maybe<AudioSampleFormat> mAudioSampleFormat;
+  RefPtr<AudioDataResource> mResource;
+};
+
+}  // namespace mozilla::dom
+
+#endif  // mozilla_dom_AudioData_h
diff --git a/dom/media/webcodecs/AudioDecoder.cpp b/dom/media/webcodecs/AudioDecoder.cpp
new file mode 100644
index 0000000000..6b554dcacf
--- /dev/null
+++ b/dom/media/webcodecs/AudioDecoder.cpp
@@ -0,0 +1,481 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/dom/AudioDecoder.h"
+#include "mozilla/dom/AudioDecoderBinding.h"
+
+#include "DecoderTraits.h"
+#include "MediaContainerType.h"
+#include "MediaData.h"
+#include "VideoUtils.h"
+#include "mozilla/Assertions.h"
+#include "mozilla/Logging.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/Try.h"
+#include "mozilla/Unused.h"
+#include "mozilla/dom/AudioDataBinding.h"
+#include "mozilla/dom/EncodedAudioChunk.h"
+#include "mozilla/dom/EncodedAudioChunkBinding.h"
+#include "mozilla/dom/ImageUtils.h"
+#include "mozilla/dom/Promise.h"
+#include "mozilla/dom/WebCodecsUtils.h"
+#include "nsPrintfCString.h"
+#include "nsReadableUtils.h"
+
+extern mozilla::LazyLogModule gWebCodecsLog;
+
+namespace mozilla::dom {
+
+#ifdef LOG_INTERNAL
+#  undef LOG_INTERNAL
+#endif  // LOG_INTERNAL
+#define LOG_INTERNAL(level, msg, ...) \
+  MOZ_LOG(gWebCodecsLog, LogLevel::level, (msg, ##__VA_ARGS__))
+
+#ifdef LOG
+#  undef LOG
+#endif  // LOG
+#define LOG(msg, ...) LOG_INTERNAL(Debug, msg, ##__VA_ARGS__)
+
+#ifdef LOGW
+#  undef LOGW
+#endif  // LOGW
+#define LOGW(msg, ...) LOG_INTERNAL(Warning, msg, ##__VA_ARGS__)
+
+#ifdef LOGE
+#  undef LOGE
+#endif  // LOGE
+#define LOGE(msg, ...) LOG_INTERNAL(Error, msg, ##__VA_ARGS__)
+
+#ifdef LOGV
+#  undef LOGV
+#endif  // LOGV
+#define LOGV(msg, ...) LOG_INTERNAL(Verbose, msg, ##__VA_ARGS__)
+
+NS_IMPL_CYCLE_COLLECTION_INHERITED(AudioDecoder, DOMEventTargetHelper,
+                                   mErrorCallback, mOutputCallback)
+NS_IMPL_ADDREF_INHERITED(AudioDecoder, DOMEventTargetHelper)
+NS_IMPL_RELEASE_INHERITED(AudioDecoder, DOMEventTargetHelper)
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(AudioDecoder)
+NS_INTERFACE_MAP_END_INHERITING(DOMEventTargetHelper)
+
+/*
+ * Below are helper classes
+ */
+
+AudioDecoderConfigInternal::AudioDecoderConfigInternal(
+    const nsAString& aCodec, uint32_t aSampleRate, uint32_t aNumberOfChannels,
+    Maybe<RefPtr<MediaByteBuffer>>&& aDescription)
+    : mCodec(aCodec),
+      mSampleRate(aSampleRate),
+      mNumberOfChannels(aNumberOfChannels),
+      mDescription(std::move(aDescription)) {}
+
+/*static*/
+UniquePtr<AudioDecoderConfigInternal> AudioDecoderConfigInternal::Create(
+    const AudioDecoderConfig& aConfig) {
+  nsCString errorMessage;
+  if (!AudioDecoderTraits::Validate(aConfig, errorMessage)) {
+    LOGE("Failed to create AudioDecoderConfigInternal: %s", errorMessage.get());
+    return nullptr;
+  }
+
+  Maybe<RefPtr<MediaByteBuffer>> description;
+  if (aConfig.mDescription.WasPassed()) {
+    auto rv = GetExtraDataFromArrayBuffer(aConfig.mDescription.Value());
+    if (rv.isErr()) {  // Invalid description data.
+      nsCString error;
+      GetErrorName(rv.unwrapErr(), error);
+      LOGE(
+          "Failed to create AudioDecoderConfigInternal due to invalid "
+          "description data. Error: %s",
+          error.get());
+      return nullptr;
+    }
+    description.emplace(rv.unwrap());
+  }
+
+  return UniquePtr<AudioDecoderConfigInternal>(new AudioDecoderConfigInternal(
+      aConfig.mCodec, aConfig.mSampleRate, aConfig.mNumberOfChannels,
+      std::move(description)));
+}
+
+/*
+ * The followings are helpers for AudioDecoder methods
+ */
+
+struct AudioMIMECreateParam {
+  explicit AudioMIMECreateParam(const AudioDecoderConfigInternal& aConfig)
+      : mParsedCodec(ParseCodecString(aConfig.mCodec).valueOr(EmptyString())) {}
+  explicit AudioMIMECreateParam(const AudioDecoderConfig& aConfig)
+      : mParsedCodec(ParseCodecString(aConfig.mCodec).valueOr(EmptyString())) {}
+
+  const nsString mParsedCodec;
+};
+
+// Map between WebCodecs pcm types as strings and codec numbers
+// All other codecs
+nsCString ConvertCodecName(const nsCString& aContainer,
+                           const nsCString& aCodec) {
+  if (!aContainer.EqualsLiteral("x-wav")) {
+    return aCodec;
+  }
+  if (aCodec.EqualsLiteral("ulaw")) {
+    return nsCString("7");
+  }
+  if (aCodec.EqualsLiteral("alaw")) {
+    return nsCString("6");
+  }
+  if (aCodec.Find("f32")) {
+    return nsCString("3");
+  }
+  // Linear PCM
+  return nsCString("1");
+}
+
+static nsTArray<nsCString> GuessMIMETypes(const AudioMIMECreateParam& aParam) {
+  nsCString codec = NS_ConvertUTF16toUTF8(aParam.mParsedCodec);
+  nsTArray<nsCString> types;
+  for (const nsCString& container : GuessContainers(aParam.mParsedCodec)) {
+    codec = ConvertCodecName(container, codec);
+    nsPrintfCString mime("audio/%s; codecs=%s", container.get(), codec.get());
+    types.AppendElement(mime);
+  }
+  return types;
+}
+
+static bool IsSupportedAudioCodec(const nsAString& aCodec) {
+  LOG("IsSupportedAudioCodec: %s", NS_ConvertUTF16toUTF8(aCodec).get());
+  return aCodec.EqualsLiteral("flac") || aCodec.EqualsLiteral("mp3") ||
+         IsAACCodecString(aCodec) || aCodec.EqualsLiteral("opus") ||
+         aCodec.EqualsLiteral("ulaw") || aCodec.EqualsLiteral("alaw") ||
+         aCodec.EqualsLiteral("pcm-u8") || aCodec.EqualsLiteral("pcm-s16") ||
+         aCodec.EqualsLiteral("pcm-s24") || aCodec.EqualsLiteral("pcm-s32") ||
+         aCodec.EqualsLiteral("pcm-f32");
+}
+
+// https://w3c.github.io/webcodecs/#check-configuration-support
+template <typename Config>
+static bool CanDecodeAudio(const Config& aConfig) {
+  auto param = AudioMIMECreateParam(aConfig);
+  if (!IsSupportedAudioCodec(param.mParsedCodec)) {
+    return false;
+  }
+  if (IsOnAndroid() && IsAACCodecString(param.mParsedCodec)) {
+    return false;
+  }
+  // TODO: Instead of calling CanHandleContainerType with the guessed the
+  // containers, DecoderTraits should provide an API to tell if a codec is
+  // decodable or not.
+  for (const nsCString& mime : GuessMIMETypes(param)) {
+    if (Maybe<MediaContainerType> containerType =
+            MakeMediaExtendedMIMEType(mime)) {
+      if (DecoderTraits::CanHandleContainerType(
+              *containerType, nullptr /* DecoderDoctorDiagnostics */) !=
+          CANPLAY_NO) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+static nsTArray<UniquePtr<TrackInfo>> GetTracksInfo(
+    const AudioDecoderConfigInternal& aConfig) {
+  // TODO: Instead of calling GetTracksInfo with the guessed containers,
+  // DecoderTraits should provide an API to create the TrackInfo directly.
+  for (const nsCString& mime : GuessMIMETypes(AudioMIMECreateParam(aConfig))) {
+    if (Maybe<MediaContainerType> containerType =
+            MakeMediaExtendedMIMEType(mime)) {
+      if (nsTArray<UniquePtr<TrackInfo>> tracks =
+              DecoderTraits::GetTracksInfo(*containerType);
+          !tracks.IsEmpty()) {
+        return tracks;
+      }
+    }
+  }
+  return {};
+}
+
+static Result<Ok, nsresult> CloneConfiguration(
+    RootedDictionary<AudioDecoderConfig>& aDest, JSContext* aCx,
+    const AudioDecoderConfig& aConfig, ErrorResult& aRv) {
+  aDest.mCodec = aConfig.mCodec;
+  if (aConfig.mDescription.WasPassed()) {
+    aDest.mDescription.Construct();
+    MOZ_TRY(CloneBuffer(aCx, aDest.mDescription.Value(),
+                        aConfig.mDescription.Value(), aRv));
+  }
+
+  aDest.mNumberOfChannels = aConfig.mNumberOfChannels;
+  aDest.mSampleRate = aConfig.mSampleRate;
+
+  return Ok();
+}
+
+// https://w3c.github.io/webcodecs/#create-a-audiodata
+static RefPtr<AudioData> CreateAudioData(nsIGlobalObject* aGlobalObject,
+                                         mozilla::AudioData* aData) {
+  MOZ_ASSERT(aGlobalObject);
+  MOZ_ASSERT(aData);
+
+  auto buf = aData->MoveableData();
+  // TODO: Ensure buf.Length() is a multiple of aData->mChannels and put it into
+  // AssertedCast<uint32_t> (sinze return type of buf.Length() is size_t).
+  uint32_t frames = buf.Length() / aData->mChannels;
+  RefPtr<AudioDataResource> resource = AudioDataResource::Create(Span{
+      reinterpret_cast<uint8_t*>(buf.Data()), buf.Length() * sizeof(float)});
+  return MakeRefPtr<AudioData>(aGlobalObject, resource.forget(),
+                               aData->mTime.ToMicroseconds(), aData->mChannels,
+                               frames, AssertedCast<float>(aData->mRate),
+                               mozilla::dom::AudioSampleFormat::F32);
+}
+
+/* static */
+bool AudioDecoderTraits::IsSupported(
+    const AudioDecoderConfigInternal& aConfig) {
+  return CanDecodeAudio(aConfig);
+}
+
+/* static */
+Result<UniquePtr<TrackInfo>, nsresult> AudioDecoderTraits::CreateTrackInfo(
+    const AudioDecoderConfigInternal& aConfig) {
+  LOG("Create a AudioInfo from %s config",
+      NS_ConvertUTF16toUTF8(aConfig.mCodec).get());
+
+  nsTArray<UniquePtr<TrackInfo>> tracks = GetTracksInfo(aConfig);
+  if (tracks.Length() != 1 || tracks[0]->GetType() != TrackInfo::kAudioTrack) {
+    LOGE("Failed to get TrackInfo");
+    return Err(NS_ERROR_INVALID_ARG);
+  }
+
+  UniquePtr<TrackInfo> track(std::move(tracks[0]));
+  AudioInfo* ai = track->GetAsAudioInfo();
+  if (!ai) {
+    LOGE("Failed to get AudioInfo");
+    return Err(NS_ERROR_INVALID_ARG);
+  }
+
+  if (aConfig.mDescription.isSome()) {
+    RefPtr<MediaByteBuffer> buf;
+    buf = aConfig.mDescription.value();
+    if (buf) {
+      LOG("The given config has %zu bytes of description data", buf->Length());
+      ai->mCodecSpecificConfig =
+          AudioCodecSpecificVariant{AudioCodecSpecificBinaryBlob{buf}};
+    }
+  }
+
+  ai->mChannels = aConfig.mNumberOfChannels;
+  ai->mRate = aConfig.mSampleRate;
+
+  LOG("Created AudioInfo %s (%" PRIu32 "ch %" PRIu32
+      "Hz - with extra-data: %s)",
+      NS_ConvertUTF16toUTF8(aConfig.mCodec).get(), ai->mChannels, ai->mChannels,
+      aConfig.mDescription.isSome() ? "yes" : "no");
+
+  return track;
+}
+
+// https://w3c.github.io/webcodecs/#valid-audiodecoderconfig
+/* static */
+bool AudioDecoderTraits::Validate(const AudioDecoderConfig& aConfig,
+                                  nsCString& aErrorMessage) {
+  Maybe<nsString> codec = ParseCodecString(aConfig.mCodec);
+  if (!codec || codec->IsEmpty()) {
+    LOGE("Validating AudioDecoderConfig: invalid codec string");
+
+    aErrorMessage.AppendPrintf("Invalid codec string %s",
+                               NS_ConvertUTF16toUTF8(aConfig.mCodec).get());
+    return false;
+  }
+
+  LOG("Validating AudioDecoderConfig: codec: %s %uch %uHz %s extradata",
+      NS_ConvertUTF16toUTF8(codec.value()).get(), aConfig.mNumberOfChannels,
+      aConfig.mSampleRate, aConfig.mDescription.WasPassed() ? "w/" : "no");
+
+  if (aConfig.mNumberOfChannels == 0) {
+    aErrorMessage.AppendPrintf("Invalid number of channels of %u",
+                               aConfig.mNumberOfChannels);
+    return false;
+  }
+
+  if (aConfig.mSampleRate == 0) {
+    aErrorMessage.AppendPrintf("Invalid sample-rate of %u",
+                               aConfig.mNumberOfChannels);
+    return false;
+  }
+
+  bool detached =
+      aConfig.mDescription.WasPassed() &&
+      (aConfig.mDescription.Value().IsArrayBuffer()
+           ? JS::ArrayBuffer::fromObject(
+                 aConfig.mDescription.Value().GetAsArrayBuffer().Obj())
+                 .isDetached()
+           : JS::ArrayBufferView::fromObject(
+                 aConfig.mDescription.Value().GetAsArrayBufferView().Obj())
+                 .isDetached());
+
+  if (detached) {
+    LOGE("description is detached.");
+    return false;
+  }
+
+  return true;
+}
+
+/* static */
+UniquePtr<AudioDecoderConfigInternal> AudioDecoderTraits::CreateConfigInternal(
+    const AudioDecoderConfig& aConfig) {
+  return AudioDecoderConfigInternal::Create(aConfig);
+}
+
+/* static */
+bool AudioDecoderTraits::IsKeyChunk(const EncodedAudioChunk& aInput) {
+  return aInput.Type() == EncodedAudioChunkType::Key;
+}
+
+/* static */
+UniquePtr<EncodedAudioChunkData> AudioDecoderTraits::CreateInputInternal(
+    const EncodedAudioChunk& aInput) {
+  return aInput.Clone();
+}
+
+/*
+ * Below are AudioDecoder implementation
+ */
+
+AudioDecoder::AudioDecoder(nsIGlobalObject* aParent,
+                           RefPtr<WebCodecsErrorCallback>&& aErrorCallback,
+                           RefPtr<AudioDataOutputCallback>&& aOutputCallback)
+    : DecoderTemplate(aParent, std::move(aErrorCallback),
+                      std::move(aOutputCallback)) {
+  MOZ_ASSERT(mErrorCallback);
+  MOZ_ASSERT(mOutputCallback);
+  LOG("AudioDecoder %p ctor", this);
+}
+
+AudioDecoder::~AudioDecoder() {
+  LOG("AudioDecoder %p dtor", this);
+  Unused << ResetInternal(NS_ERROR_DOM_ABORT_ERR);
+}
+
+JSObject* AudioDecoder::WrapObject(JSContext* aCx,
+                                   JS::Handle<JSObject*> aGivenProto) {
+  AssertIsOnOwningThread();
+
+  return AudioDecoder_Binding::Wrap(aCx, this, aGivenProto);
+}
+
+// https://w3c.github.io/webcodecs/#dom-audiodecoder-audiodecoder
+/* static */
+already_AddRefed<AudioDecoder> AudioDecoder::Constructor(
+    const GlobalObject& aGlobal, const AudioDecoderInit& aInit,
+    ErrorResult& aRv) {
+  nsCOMPtr<nsIGlobalObject> global = do_QueryInterface(aGlobal.GetAsSupports());
+  if (!global) {
+    aRv.Throw(NS_ERROR_FAILURE);
+    return nullptr;
+  }
+
+  return MakeAndAddRef<AudioDecoder>(
+      global.get(), RefPtr<WebCodecsErrorCallback>(aInit.mError),
+      RefPtr<AudioDataOutputCallback>(aInit.mOutput));
+}
+
+// https://w3c.github.io/webcodecs/#dom-audiodecoder-isconfigsupported
+/* static */
+already_AddRefed<Promise> AudioDecoder::IsConfigSupported(
+    const GlobalObject& aGlobal, const AudioDecoderConfig& aConfig,
+    ErrorResult& aRv) {
+  LOG("AudioDecoder::IsConfigSupported, config: %s",
+      NS_ConvertUTF16toUTF8(aConfig.mCodec).get());
+
+  nsCOMPtr<nsIGlobalObject> global = do_QueryInterface(aGlobal.GetAsSupports());
+  if (!global) {
+    aRv.Throw(NS_ERROR_FAILURE);
+    return nullptr;
+  }
+
+  RefPtr<Promise> p = Promise::Create(global.get(), aRv);
+  if (NS_WARN_IF(aRv.Failed())) {
+    return p.forget();
+  }
+
+  nsCString errorMessage;
+  if (!AudioDecoderTraits::Validate(aConfig, errorMessage)) {
+    p->MaybeRejectWithTypeError(errorMessage);
+    return p.forget();
+  }
+
+  RootedDictionary<AudioDecoderConfig> config(aGlobal.Context());
+  auto r = CloneConfiguration(config, aGlobal.Context(), aConfig, aRv);
+  if (r.isErr()) {
+    // This can only be an OOM: all members to clone are known to be valid
+    // because this is check by ::Validate above.
+    MOZ_ASSERT(r.inspectErr() == NS_ERROR_OUT_OF_MEMORY &&
+               aRv.ErrorCodeIs(NS_ERROR_OUT_OF_MEMORY));
+    return p.forget();
+  }
+
+  bool canDecode = CanDecodeAudio(config);
+  RootedDictionary<AudioDecoderSupport> s(aGlobal.Context());
+  s.mConfig.Construct(std::move(config));
+  s.mSupported.Construct(canDecode);
+
+  p->MaybeResolve(s);
+  return p.forget();
+}
+
+already_AddRefed<MediaRawData> AudioDecoder::InputDataToMediaRawData(
+    UniquePtr<EncodedAudioChunkData>&& aData, TrackInfo& aInfo,
+    const AudioDecoderConfigInternal& aConfig) {
+  AssertIsOnOwningThread();
+  MOZ_ASSERT(aInfo.GetAsAudioInfo());
+
+  if (!aData) {
+    LOGE("No data for conversion");
+    return nullptr;
+  }
+
+  RefPtr<MediaRawData> sample = aData->TakeData();
+  if (!sample) {
+    LOGE("Take no data for conversion");
+    return nullptr;
+  }
+
+  LOGV(
+      "EncodedAudioChunkData %p converted to %zu-byte MediaRawData - time: "
+      "%" PRIi64 "us, timecode: %" PRIi64 "us, duration: %" PRIi64
+      "us, key-frame: %s",
+      aData.get(), sample->Size(), sample->mTime.ToMicroseconds(),
+      sample->mTimecode.ToMicroseconds(), sample->mDuration.ToMicroseconds(),
+      sample->mKeyframe ? "yes" : "no");
+
+  return sample.forget();
+}
+
+nsTArray<RefPtr<AudioData>> AudioDecoder::DecodedDataToOutputType(
+    nsIGlobalObject* aGlobalObject, const nsTArray<RefPtr<MediaData>>&& aData,
+    AudioDecoderConfigInternal& aConfig) {
+  AssertIsOnOwningThread();
+
+  nsTArray<RefPtr<AudioData>> frames;
+  for (const RefPtr<MediaData>& data : aData) {
+    MOZ_RELEASE_ASSERT(data->mType == MediaData::Type::AUDIO_DATA);
+    RefPtr<mozilla::AudioData> d(data->As<mozilla::AudioData>());
+    frames.AppendElement(CreateAudioData(aGlobalObject, d.get()));
+  }
+  return frames;
+}
+
+#undef LOG
+#undef LOGW
+#undef LOGE
+#undef LOGV
+#undef LOG_INTERNAL
+
+}  // namespace mozilla::dom
diff --git a/dom/media/webcodecs/AudioDecoder.h b/dom/media/webcodecs/AudioDecoder.h
new file mode 100644
index 0000000000..54fad68f55
--- /dev/null
+++ b/dom/media/webcodecs/AudioDecoder.h
@@ -0,0 +1,83 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_AudioDecoder_h
+#define mozilla_dom_AudioDecoder_h
+
+#include "js/TypeDecls.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/ErrorResult.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/dom/AudioData.h"
+#include "mozilla/dom/BindingDeclarations.h"
+#include "mozilla/dom/DecoderTemplate.h"
+#include "mozilla/dom/DecoderTypes.h"
+#include "mozilla/dom/RootedDictionary.h"
+#include "nsCycleCollectionParticipant.h"
+#include "nsWrapperCache.h"
+
+class nsIGlobalObject;
+
+namespace mozilla {
+
+namespace dom {
+
+class AudioDataOutputCallback;
+class EncodedAudioChunk;
+class EncodedAudioChunkData;
+class EventHandlerNonNull;
+class GlobalObject;
+class Promise;
+class WebCodecsErrorCallback;
+struct AudioDecoderConfig;
+struct AudioDecoderInit;
+
+}  // namespace dom
+
+}  // namespace mozilla
+
+namespace mozilla::dom {
+
+class AudioDecoder final : public DecoderTemplate<AudioDecoderTraits> {
+ public:
+  NS_DECL_ISUPPORTS_INHERITED
+  NS_DECL_CYCLE_COLLECTION_CLASS_INHERITED(AudioDecoder, DOMEventTargetHelper)
+
+ public:
+  AudioDecoder(nsIGlobalObject* aParent,
+               RefPtr<WebCodecsErrorCallback>&& aErrorCallback,
+               RefPtr<AudioDataOutputCallback>&& aOutputCallback);
+
+ protected:
+  ~AudioDecoder();
+
+ public:
+  JSObject* WrapObject(JSContext* aCx,
+                       JS::Handle<JSObject*> aGivenProto) override;
+
+  static already_AddRefed<AudioDecoder> Constructor(
+      const GlobalObject& aGlobal, const AudioDecoderInit& aInit,
+      ErrorResult& aRv);
+
+  static already_AddRefed<Promise> IsConfigSupported(
+      const GlobalObject& aGlobal, const AudioDecoderConfig& aConfig,
+      ErrorResult& aRv);
+
+ protected:
+  virtual already_AddRefed<MediaRawData> InputDataToMediaRawData(
+      UniquePtr<EncodedAudioChunkData>&& aData, TrackInfo& aInfo,
+      const AudioDecoderConfigInternal& aConfig) override;
+
+  virtual nsTArray<RefPtr<AudioData>> DecodedDataToOutputType(
+      nsIGlobalObject* aGlobalObject, const nsTArray<RefPtr<MediaData>>&& aData,
+      AudioDecoderConfigInternal& aConfig) override;
+};
+
+}  // namespace mozilla::dom
+
+#endif  // mozilla_dom_AudioDecoder_h
diff --git a/dom/media/webcodecs/DecoderAgent.cpp b/dom/media/webcodecs/DecoderAgent.cpp
index 5c63e27d48..095852c01d 100644
--- a/dom/media/webcodecs/DecoderAgent.cpp
+++ b/dom/media/webcodecs/DecoderAgent.cpp
@@ -6,8 +6,6 @@
 
 #include "DecoderAgent.h"
 
-#include <atomic>
-
 #include "ImageContainer.h"
 #include "MediaDataDecoderProxy.h"
 #include "PDMFactory.h"
diff --git a/dom/media/webcodecs/DecoderTemplate.cpp b/dom/media/webcodecs/DecoderTemplate.cpp
index 0fa25a208b..4d1c310737 100644
--- a/dom/media/webcodecs/DecoderTemplate.cpp
+++ b/dom/media/webcodecs/DecoderTemplate.cpp
@@ -139,8 +139,8 @@ void DecoderTemplate<DecoderType>::Configure(const ConfigType& aConfig,
 
   nsCString errorMessage;
   if (!DecoderType::Validate(aConfig, errorMessage)) {
-    aRv.ThrowTypeError(
-        nsPrintfCString("config is invalid: %s", errorMessage.get()));
+    LOG("Configure: Validate error: %s", errorMessage.get());
+    aRv.ThrowTypeError(errorMessage);
     return;
   }
 
@@ -322,13 +322,13 @@ void DecoderTemplate<DecoderType>::OutputDecodedData(
   MOZ_ASSERT(mState == CodecState::Configured);
   MOZ_ASSERT(mActiveConfig);
 
-  nsTArray<RefPtr<VideoFrame>> frames = DecodedDataToOutputType(
+  nsTArray<RefPtr<OutputType>> frames = DecodedDataToOutputType(
       GetParentObject(), std::move(aData), *mActiveConfig);
-  RefPtr<VideoFrameOutputCallback> cb(mOutputCallback);
-  for (RefPtr<VideoFrame>& frame : frames) {
+  RefPtr<OutputCallbackType> cb(mOutputCallback);
+  for (RefPtr<OutputType>& frame : frames) {
     LOG("Outputing decoded data: ts: %" PRId64, frame->Timestamp());
-    RefPtr<VideoFrame> f = frame;
-    cb->Call((VideoFrame&)(*f));
+    RefPtr<OutputType> f = frame;
+    cb->Call((OutputType&)(*f));
   }
 }
 
@@ -881,6 +881,7 @@ void DecoderTemplate<DecoderType>::DestroyDecoderAgentIfAny() {
 }
 
 template class DecoderTemplate<VideoDecoderTraits>;
+template class DecoderTemplate<AudioDecoderTraits>;
 
 #undef LOG
 #undef LOGW
diff --git a/dom/media/webcodecs/DecoderTypes.h b/dom/media/webcodecs/DecoderTypes.h
index 56aa82046f..339a164f70 100644
--- a/dom/media/webcodecs/DecoderTypes.h
+++ b/dom/media/webcodecs/DecoderTypes.h
@@ -9,6 +9,9 @@
 
 #include "MediaData.h"
 #include "mozilla/Maybe.h"
+#include "mozilla/dom/AudioData.h"
+#include "mozilla/dom/AudioDecoderBinding.h"
+#include "mozilla/dom/EncodedAudioChunk.h"
 #include "mozilla/dom/EncodedVideoChunk.h"
 #include "mozilla/dom/VideoColorSpaceBinding.h"
 #include "mozilla/dom/VideoDecoderBinding.h"
@@ -58,17 +61,17 @@ class VideoDecoderConfigInternal {
 
   bool Equals(const VideoDecoderConfigInternal& aOther) const {
     if (mDescription.isSome() != aOther.mDescription.isSome()) {
-        return false;
+      return false;
     }
     if (mDescription.isSome() && aOther.mDescription.isSome()) {
-        auto lhs = mDescription.value();
-        auto rhs = aOther.mDescription.value();
-        if (lhs->Length() != rhs->Length()) {
-            return false;
-        }
-        if (!ArrayEqual(lhs->Elements(), rhs->Elements(), lhs->Length())) {
-            return false;
-        }
+      auto lhs = mDescription.value();
+      auto rhs = aOther.mDescription.value();
+      if (lhs->Length() != rhs->Length()) {
+        return false;
+      }
+      if (!ArrayEqual(lhs->Elements(), rhs->Elements(), lhs->Length())) {
+        return false;
+      }
     }
     return mCodec.Equals(aOther.mCodec) &&
            mCodedHeight == aOther.mCodedHeight &&
@@ -111,6 +114,49 @@ class VideoDecoderTraits {
       const InputType& aInput);
 };
 
+class AudioDecoderConfigInternal {
+ public:
+  static UniquePtr<AudioDecoderConfigInternal> Create(
+      const AudioDecoderConfig& aConfig);
+  ~AudioDecoderConfigInternal() = default;
+
+  nsString mCodec;
+  uint32_t mSampleRate;
+  uint32_t mNumberOfChannels;
+  Maybe<RefPtr<MediaByteBuffer>> mDescription;
+  // Compilation fix, should be abstracted by DecoderAgent since those are not
+  // supported
+  HardwareAcceleration mHardwareAcceleration =
+      HardwareAcceleration::No_preference;
+  Maybe<bool> mOptimizeForLatency;
+
+ private:
+  AudioDecoderConfigInternal(const nsAString& aCodec, uint32_t aSampleRate,
+                             uint32_t aNumberOfChannels,
+                             Maybe<RefPtr<MediaByteBuffer>>&& aDescription);
+};
+
+class AudioDecoderTraits {
+ public:
+  static constexpr nsLiteralCString Name = "AudioDecoder"_ns;
+  using ConfigType = AudioDecoderConfig;
+  using ConfigTypeInternal = AudioDecoderConfigInternal;
+  using InputType = EncodedAudioChunk;
+  using InputTypeInternal = EncodedAudioChunkData;
+  using OutputType = AudioData;
+  using OutputCallbackType = AudioDataOutputCallback;
+
+  static bool IsSupported(const ConfigTypeInternal& aConfig);
+  static Result<UniquePtr<TrackInfo>, nsresult> CreateTrackInfo(
+      const ConfigTypeInternal& aConfig);
+  static bool Validate(const ConfigType& aConfig, nsCString& aErrorMessage);
+  static UniquePtr<ConfigTypeInternal> CreateConfigInternal(
+      const ConfigType& aConfig);
+  static bool IsKeyChunk(const InputType& aInput);
+  static UniquePtr<InputTypeInternal> CreateInputInternal(
+      const InputType& aInput);
+};
+
 }  // namespace dom
 }  // namespace mozilla
 
diff --git a/dom/media/webcodecs/EncodedAudioChunk.cpp b/dom/media/webcodecs/EncodedAudioChunk.cpp
new file mode 100644
index 0000000000..1cbb2ff9bd
--- /dev/null
+++ b/dom/media/webcodecs/EncodedAudioChunk.cpp
@@ -0,0 +1,260 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/dom/EncodedAudioChunk.h"
+#include "mozilla/dom/EncodedAudioChunkBinding.h"
+
+#include <utility>
+
+#include "MediaData.h"
+#include "TimeUnits.h"
+#include "mozilla/CheckedInt.h"
+#include "mozilla/Logging.h"
+#include "mozilla/PodOperations.h"
+#include "mozilla/dom/StructuredCloneHolder.h"
+#include "mozilla/dom/StructuredCloneTags.h"
+#include "mozilla/dom/WebCodecsUtils.h"
+
+extern mozilla::LazyLogModule gWebCodecsLog;
+using mozilla::media::TimeUnit;
+
+namespace mozilla::dom {
+
+#ifdef LOG_INTERNAL
+#  undef LOG_INTERNAL
+#endif  // LOG_INTERNAL
+#define LOG_INTERNAL(level, msg, ...) \
+  MOZ_LOG(gWebCodecsLog, LogLevel::level, (msg, ##__VA_ARGS__))
+
+#ifdef LOGW
+#  undef LOGW
+#endif  // LOGW
+#define LOGW(msg, ...) LOG_INTERNAL(Warning, msg, ##__VA_ARGS__)
+
+#ifdef LOGE
+#  undef LOGE
+#endif  // LOGE
+#define LOGE(msg, ...) LOG_INTERNAL(Error, msg, ##__VA_ARGS__)
+
+// Only needed for refcounted objects.
+NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(EncodedAudioChunk, mParent)
+NS_IMPL_CYCLE_COLLECTING_ADDREF(EncodedAudioChunk)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(EncodedAudioChunk)
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(EncodedAudioChunk)
+  NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
+  NS_INTERFACE_MAP_ENTRY(nsISupports)
+NS_INTERFACE_MAP_END
+
+EncodedAudioChunkData::EncodedAudioChunkData(
+    already_AddRefed<MediaAlignedByteBuffer> aBuffer,
+    const EncodedAudioChunkType& aType, int64_t aTimestamp,
+    Maybe<uint64_t>&& aDuration)
+    : mBuffer(aBuffer),
+      mType(aType),
+      mTimestamp(aTimestamp),
+      mDuration(aDuration) {
+  MOZ_ASSERT(mBuffer);
+  MOZ_ASSERT(mBuffer->Length() == mBuffer->Size());
+  MOZ_ASSERT(mBuffer->Length() <=
+             static_cast<size_t>(std::numeric_limits<uint32_t>::max()));
+}
+
+UniquePtr<EncodedAudioChunkData> EncodedAudioChunkData::Clone() const {
+  if (!mBuffer) {
+    LOGE("No buffer in EncodedAudioChunkData %p to clone!", this);
+    return nullptr;
+  }
+
+  // Since EncodedAudioChunkData can be zero-sized, cloning a zero-sized chunk
+  // is allowed.
+  if (mBuffer->Size() == 0) {
+    LOGW("Cloning an empty EncodedAudioChunkData %p", this);
+  }
+
+  auto buffer =
+      MakeRefPtr<MediaAlignedByteBuffer>(mBuffer->Data(), mBuffer->Length());
+  if (!buffer || buffer->Size() != mBuffer->Size()) {
+    LOGE("OOM to copy EncodedAudioChunkData %p", this);
+    return nullptr;
+  }
+
+  return MakeUnique<EncodedAudioChunkData>(buffer.forget(), mType, mTimestamp,
+                                           Maybe<uint64_t>(mDuration));
+}
+
+already_AddRefed<MediaRawData> EncodedAudioChunkData::TakeData() {
+  if (!mBuffer || !(*mBuffer)) {
+    LOGE("EncodedAudioChunkData %p has no data!", this);
+    return nullptr;
+  }
+
+  RefPtr<MediaRawData> sample(new MediaRawData(std::move(*mBuffer)));
+  sample->mKeyframe = mType == EncodedAudioChunkType::Key;
+  sample->mTime = TimeUnit::FromMicroseconds(mTimestamp);
+  sample->mTimecode = TimeUnit::FromMicroseconds(mTimestamp);
+
+  if (mDuration) {
+    CheckedInt64 duration(*mDuration);
+    if (!duration.isValid()) {
+      LOGE("EncodedAudioChunkData %p 's duration exceeds TimeUnit's limit",
+           this);
+      return nullptr;
+    }
+    sample->mDuration = TimeUnit::FromMicroseconds(duration.value());
+  }
+
+  return sample.forget();
+}
+
+EncodedAudioChunk::EncodedAudioChunk(
+    nsIGlobalObject* aParent, already_AddRefed<MediaAlignedByteBuffer> aBuffer,
+    const EncodedAudioChunkType& aType, int64_t aTimestamp,
+    Maybe<uint64_t>&& aDuration)
+    : EncodedAudioChunkData(std::move(aBuffer), aType, aTimestamp,
+                            std::move(aDuration)),
+      mParent(aParent) {}
+
+EncodedAudioChunk::EncodedAudioChunk(nsIGlobalObject* aParent,
+                                     const EncodedAudioChunkData& aData)
+    : EncodedAudioChunkData(aData), mParent(aParent) {}
+
+nsIGlobalObject* EncodedAudioChunk::GetParentObject() const {
+  AssertIsOnOwningThread();
+
+  return mParent.get();
+}
+
+JSObject* EncodedAudioChunk::WrapObject(JSContext* aCx,
+                                        JS::Handle<JSObject*> aGivenProto) {
+  AssertIsOnOwningThread();
+
+  return EncodedAudioChunk_Binding::Wrap(aCx, this, aGivenProto);
+}
+
+// https://w3c.github.io/webcodecs/#encodedaudiochunk-constructors
+/* static */
+already_AddRefed<EncodedAudioChunk> EncodedAudioChunk::Constructor(
+    const GlobalObject& aGlobal, const EncodedAudioChunkInit& aInit,
+    ErrorResult& aRv) {
+  nsCOMPtr<nsIGlobalObject> global = do_QueryInterface(aGlobal.GetAsSupports());
+  if (!global) {
+    aRv.Throw(NS_ERROR_FAILURE);
+    return nullptr;
+  }
+
+  auto buffer = ProcessTypedArrays(
+      aInit.mData,
+      [&](const Span<uint8_t>& aData,
+          JS::AutoCheckCannotGC&&) -> RefPtr<MediaAlignedByteBuffer> {
+        // Make sure it's in uint32_t's range.
+        CheckedUint32 byteLength(aData.Length());
+        if (!byteLength.isValid()) {
+          aRv.Throw(NS_ERROR_INVALID_ARG);
+          return nullptr;
+        }
+        if (aData.Length() == 0) {
+          LOGW("Buffer for constructing EncodedAudioChunk is empty!");
+        }
+        RefPtr<MediaAlignedByteBuffer> buf = MakeRefPtr<MediaAlignedByteBuffer>(
+            aData.Elements(), aData.Length());
+
+        // Instead of checking *buf, size comparision is used to allow
+        // constructing a zero-sized EncodedAudioChunk.
+        if (!buf || buf->Size() != aData.Length()) {
+          aRv.Throw(NS_ERROR_OUT_OF_MEMORY);
+          return nullptr;
+        }
+        return buf;
+      });
+
+  RefPtr<EncodedAudioChunk> chunk(new EncodedAudioChunk(
+      global, buffer.forget(), aInit.mType, aInit.mTimestamp,
+      OptionalToMaybe(aInit.mDuration)));
+  return aRv.Failed() ? nullptr : chunk.forget();
+}
+
+EncodedAudioChunkType EncodedAudioChunk::Type() const {
+  AssertIsOnOwningThread();
+
+  return mType;
+}
+
+int64_t EncodedAudioChunk::Timestamp() const {
+  AssertIsOnOwningThread();
+
+  return mTimestamp;
+}
+
+Nullable<uint64_t> EncodedAudioChunk::GetDuration() const {
+  AssertIsOnOwningThread();
+  return MaybeToNullable(mDuration);
+}
+
+uint32_t EncodedAudioChunk::ByteLength() const {
+  AssertIsOnOwningThread();
+  MOZ_ASSERT(mBuffer);
+
+  return static_cast<uint32_t>(mBuffer->Length());
+}
+
+// https://w3c.github.io/webcodecs/#dom-encodedaudiochunk-copyto
+void EncodedAudioChunk::CopyTo(
+    const MaybeSharedArrayBufferViewOrMaybeSharedArrayBuffer& aDestination,
+    ErrorResult& aRv) {
+  AssertIsOnOwningThread();
+
+  ProcessTypedArraysFixed(aDestination, [&](const Span<uint8_t>& aData) {
+    if (mBuffer->Size() > aData.size_bytes()) {
+      aRv.ThrowTypeError(
+          "Destination ArrayBuffer smaller than source EncodedAudioChunk");
+      return;
+    }
+
+    PodCopy(aData.data(), mBuffer->Data(), mBuffer->Size());
+  });
+}
+
+// https://w3c.github.io/webcodecs/#ref-for-deserialization-steps
+/* static */
+JSObject* EncodedAudioChunk::ReadStructuredClone(
+    JSContext* aCx, nsIGlobalObject* aGlobal, JSStructuredCloneReader* aReader,
+    const EncodedAudioChunkData& aData) {
+  JS::Rooted<JS::Value> value(aCx, JS::NullValue());
+  // To avoid a rooting hazard error from returning a raw JSObject* before
+  // running the RefPtr destructor, RefPtr needs to be destructed before
+  // returning the raw JSObject*, which is why the RefPtr<EncodedAudioChunk> is
+  // created in the scope below. Otherwise, the static analysis infers the
+  // RefPtr cannot be safely destructed while the unrooted return JSObject* is
+  // on the stack.
+  {
+    auto frame = MakeRefPtr<EncodedAudioChunk>(aGlobal, aData);
+    if (!GetOrCreateDOMReflector(aCx, frame, &value) || !value.isObject()) {
+      return nullptr;
+    }
+  }
+  return value.toObjectOrNull();
+}
+
+// https://w3c.github.io/webcodecs/#ref-for-serialization-steps
+bool EncodedAudioChunk::WriteStructuredClone(
+    JSStructuredCloneWriter* aWriter, StructuredCloneHolder* aHolder) const {
+  AssertIsOnOwningThread();
+
+  // Indexing the chunk and send the index to the receiver.
+  const uint32_t index =
+      static_cast<uint32_t>(aHolder->EncodedAudioChunks().Length());
+  // The serialization is limited to the same process scope so it's ok to
+  // serialize a reference instead of a copy.
+  aHolder->EncodedAudioChunks().AppendElement(EncodedAudioChunkData(*this));
+  return !NS_WARN_IF(
+      !JS_WriteUint32Pair(aWriter, SCTAG_DOM_ENCODEDAUDIOCHUNK, index));
+}
+
+#undef LOGW
+#undef LOGE
+#undef LOG_INTERNAL
+
+}  // namespace mozilla::dom
diff --git a/dom/media/webcodecs/EncodedAudioChunk.h b/dom/media/webcodecs/EncodedAudioChunk.h
new file mode 100644
index 0000000000..53c059495a
--- /dev/null
+++ b/dom/media/webcodecs/EncodedAudioChunk.h
@@ -0,0 +1,117 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_EncodedAudioChunk_h
+#define mozilla_dom_EncodedAudioChunk_h
+
+#include "js/TypeDecls.h"
+#include "mozilla/ErrorResult.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/dom/BindingDeclarations.h"
+#include "nsCycleCollectionParticipant.h"
+#include "nsWrapperCache.h"
+
+class nsIGlobalObject;
+
+namespace mozilla {
+
+class MediaAlignedByteBuffer;
+class MediaRawData;
+
+namespace dom {
+
+class MaybeSharedArrayBufferViewOrMaybeSharedArrayBuffer;
+class OwningMaybeSharedArrayBufferViewOrMaybeSharedArrayBuffer;
+class StructuredCloneHolder;
+
+enum class EncodedAudioChunkType : uint8_t;
+struct EncodedAudioChunkInit;
+
+}  // namespace dom
+}  // namespace mozilla
+
+namespace mozilla::dom {
+
+class EncodedAudioChunkData {
+ public:
+  EncodedAudioChunkData(already_AddRefed<MediaAlignedByteBuffer> aBuffer,
+                        const EncodedAudioChunkType& aType, int64_t aTimestamp,
+                        Maybe<uint64_t>&& aDuration);
+  EncodedAudioChunkData(const EncodedAudioChunkData& aData) = default;
+  ~EncodedAudioChunkData() = default;
+
+  UniquePtr<EncodedAudioChunkData> Clone() const;
+  already_AddRefed<MediaRawData> TakeData();
+
+ protected:
+  // mBuffer's byte length is guaranteed to be smaller than UINT32_MAX.
+  RefPtr<MediaAlignedByteBuffer> mBuffer;
+  EncodedAudioChunkType mType;
+  int64_t mTimestamp;
+  Maybe<uint64_t> mDuration;
+};
+
+class EncodedAudioChunk final : public EncodedAudioChunkData,
+                                public nsISupports,
+                                public nsWrapperCache {
+ public:
+  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+  NS_DECL_CYCLE_COLLECTION_WRAPPERCACHE_CLASS(EncodedAudioChunk)
+
+ public:
+  EncodedAudioChunk(nsIGlobalObject* aParent,
+                    already_AddRefed<MediaAlignedByteBuffer> aBuffer,
+                    const EncodedAudioChunkType& aType, int64_t aTimestamp,
+                    Maybe<uint64_t>&& aDuration);
+
+  EncodedAudioChunk(nsIGlobalObject* aParent,
+                    const EncodedAudioChunkData& aData);
+
+ protected:
+  ~EncodedAudioChunk() = default;
+
+ public:
+  nsIGlobalObject* GetParentObject() const;
+
+  JSObject* WrapObject(JSContext* aCx,
+                       JS::Handle<JSObject*> aGivenProto) override;
+
+  static already_AddRefed<EncodedAudioChunk> Constructor(
+      const GlobalObject& aGlobal, const EncodedAudioChunkInit& aInit,
+      ErrorResult& aRv);
+
+  EncodedAudioChunkType Type() const;
+
+  int64_t Timestamp() const;
+
+  Nullable<uint64_t> GetDuration() const;
+
+  uint32_t ByteLength() const;
+
+  void CopyTo(
+      const MaybeSharedArrayBufferViewOrMaybeSharedArrayBuffer& aDestination,
+      ErrorResult& aRv);
+
+  // [Serializable] implementations: {Read, Write}StructuredClone
+  static JSObject* ReadStructuredClone(JSContext* aCx, nsIGlobalObject* aGlobal,
+                                       JSStructuredCloneReader* aReader,
+                                       const EncodedAudioChunkData& aData);
+
+  bool WriteStructuredClone(JSStructuredCloneWriter* aWriter,
+                            StructuredCloneHolder* aHolder) const;
+
+ private:
+  // EncodedAudioChunk can run on either main thread or worker thread.
+  void AssertIsOnOwningThread() const {
+    NS_ASSERT_OWNINGTHREAD(EncodedAudioChunk);
+  }
+
+  nsCOMPtr<nsIGlobalObject> mParent;
+};
+
+}  // namespace mozilla::dom
+
+#endif  // mozilla_dom_EncodedAudioChunk_h
diff --git a/dom/media/webcodecs/VideoDecoder.cpp b/dom/media/webcodecs/VideoDecoder.cpp
index 47ca5bb459..18855e5cea 100644
--- a/dom/media/webcodecs/VideoDecoder.cpp
+++ b/dom/media/webcodecs/VideoDecoder.cpp
@@ -15,11 +15,8 @@
 #include "MediaData.h"
 #include "VideoUtils.h"
 #include "mozilla/Assertions.h"
-#include "mozilla/CheckedInt.h"
-#include "mozilla/DebugOnly.h"
 #include "mozilla/Logging.h"
 #include "mozilla/Maybe.h"
-#include "mozilla/StaticPrefs_dom.h"
 #include "mozilla/Try.h"
 #include "mozilla/Unused.h"
 #include "mozilla/dom/EncodedVideoChunk.h"
@@ -31,7 +28,6 @@
 #include "mozilla/dom/WebCodecsUtils.h"
 #include "nsPrintfCString.h"
 #include "nsReadableUtils.h"
-#include "nsThreadUtils.h"
 
 #ifdef XP_MACOSX
 #  include "MacIOSurfaceImage.h"
@@ -97,9 +93,6 @@ VideoColorSpaceInit VideoColorSpaceInternal::ToColorSpaceInit() const {
   return init;
 };
 
-static Result<RefPtr<MediaByteBuffer>, nsresult> GetExtraData(
-    const OwningMaybeSharedArrayBufferViewOrMaybeSharedArrayBuffer& aBuffer);
-
 VideoDecoderConfigInternal::VideoDecoderConfigInternal(
     const nsAString& aCodec, Maybe<uint32_t>&& aCodedHeight,
     Maybe<uint32_t>&& aCodedWidth, Maybe<VideoColorSpaceInternal>&& aColorSpace,
@@ -129,7 +122,7 @@ UniquePtr<VideoDecoderConfigInternal> VideoDecoderConfigInternal::Create(
 
   Maybe<RefPtr<MediaByteBuffer>> description;
   if (aConfig.mDescription.WasPassed()) {
-    auto rv = GetExtraData(aConfig.mDescription.Value());
+    auto rv = GetExtraDataFromArrayBuffer(aConfig.mDescription.Value());
     if (rv.isErr()) {  // Invalid description data.
       LOGE(
           "Failed to create VideoDecoderConfigInternal due to invalid "
@@ -168,12 +161,10 @@ nsString VideoDecoderConfigInternal::ToString() const {
   if (mColorSpace.isSome()) {
     rv.AppendPrintf("colorspace %s", "todo");
   }
-  if (mDescription.isSome()) {
+  if (mDescription.isSome() && mDescription.value()) {
     rv.AppendPrintf("extradata: %zu bytes", mDescription.value()->Length());
   }
-  rv.AppendPrintf(
-      "hw accel: %s",
-      HardwareAccelerationValues::GetString(mHardwareAcceleration).data());
+  rv.AppendPrintf("hw accel: %s", GetEnumString(mHardwareAcceleration).get());
   if (mOptimizeForLatency.isSome()) {
     rv.AppendPrintf("optimize for latency: %s",
                     mOptimizeForLatency.value() ? "true" : "false");
@@ -217,24 +208,6 @@ static nsTArray<nsCString> GuessMIMETypes(const MIMECreateParam& aParam) {
   return types;
 }
 
-static bool IsSupportedCodec(const nsAString& aCodec) {
-  // H265 is unsupported.
-  if (!IsAV1CodecString(aCodec) && !IsVP9CodecString(aCodec) &&
-      !IsVP8CodecString(aCodec) && !IsH264CodecString(aCodec)) {
-    return false;
-  }
-
-  // Gecko allows codec string starts with vp9 or av1 but Webcodecs requires to
-  // starts with av01 and vp09.
-  // https://www.w3.org/TR/webcodecs-codec-registry/#video-codec-registry
-  if (StringBeginsWith(aCodec, u"vp9"_ns) ||
-      StringBeginsWith(aCodec, u"av1"_ns)) {
-    return false;
-  }
-
-  return true;
-}
-
 // https://w3c.github.io/webcodecs/#check-configuration-support
 template <typename Config>
 static bool CanDecode(const Config& aConfig) {
@@ -243,12 +216,7 @@ static bool CanDecode(const Config& aConfig) {
   if (IsOnAndroid()) {
     return false;
   }
-  if (!IsSupportedCodec(param.mParsedCodec)) {
-    return false;
-  }
-  if (IsOnMacOS() && IsH264CodecString(param.mParsedCodec) &&
-      !StaticPrefs::dom_media_webcodecs_force_osx_h264_enabled()) {
-    // This will be fixed in Bug 1846796.
+  if (!IsSupportedVideoCodec(param.mParsedCodec)) {
     return false;
   }
   // TODO: Instead of calling CanHandleContainerType with the guessed the
@@ -284,18 +252,9 @@ static nsTArray<UniquePtr<TrackInfo>> GetTracksInfo(
   return {};
 }
 
-static Result<RefPtr<MediaByteBuffer>, nsresult> GetExtraData(
-    const OwningMaybeSharedArrayBufferViewOrMaybeSharedArrayBuffer& aBuffer) {
-  RefPtr<MediaByteBuffer> data = MakeRefPtr<MediaByteBuffer>();
-  if (!AppendTypedArrayDataTo(aBuffer, *data)) {
-    return Err(NS_ERROR_OUT_OF_MEMORY);
-  }
-  return data->Length() > 0 ? data : nullptr;
-}
-
 static Result<Ok, nsresult> CloneConfiguration(
     RootedDictionary<VideoDecoderConfig>& aDest, JSContext* aCx,
-    const VideoDecoderConfig& aConfig) {
+    const VideoDecoderConfig& aConfig, ErrorResult& aRv) {
   DebugOnly<nsCString> str;
   MOZ_ASSERT(VideoDecoderTraits::Validate(aConfig, str));
 
@@ -312,7 +271,7 @@ static Result<Ok, nsresult> CloneConfiguration(
   if (aConfig.mDescription.WasPassed()) {
     aDest.mDescription.Construct();
     MOZ_TRY(CloneBuffer(aCx, aDest.mDescription.Value(),
-                        aConfig.mDescription.Value()));
+                        aConfig.mDescription.Value(), aRv));
   }
   if (aConfig.mDisplayAspectHeight.WasPassed()) {
     aDest.mDisplayAspectHeight.Construct(aConfig.mDisplayAspectHeight.Value());
@@ -334,8 +293,10 @@ static Maybe<VideoPixelFormat> GuessPixelFormat(layers::Image* aImage) {
     // DMABUFSurfaceImage?
     if (aImage->AsPlanarYCbCrImage() || aImage->AsNVImage()) {
       const ImageUtils imageUtils(aImage);
+      Maybe<dom::ImageBitmapFormat> format = imageUtils.GetFormat();
       Maybe<VideoPixelFormat> f =
-          ImageBitmapFormatToVideoPixelFormat(imageUtils.GetFormat());
+          format.isSome() ? ImageBitmapFormatToVideoPixelFormat(format.value())
+                          : Nothing();
 
       // ImageBitmapFormat cannot distinguish YUV420 or YUV420A.
       bool hasAlpha = aImage->AsPlanarYCbCrImage() &&
@@ -391,8 +352,6 @@ static VideoColorSpaceInternal GuessColorSpace(
     // Make an educated guess based on the coefficients.
     colorSpace.mPrimaries = colorSpace.mMatrix.map([](const auto& aMatrix) {
       switch (aMatrix) {
-        case VideoMatrixCoefficients::EndGuard_:
-          MOZ_CRASH("This should not happen");
         case VideoMatrixCoefficients::Bt2020_ncl:
           return VideoColorPrimaries::Bt2020;
         case VideoMatrixCoefficients::Rgb:
@@ -529,9 +488,6 @@ static VideoColorSpaceInternal GuessColorSpace(layers::Image* aImage) {
             case VideoMatrixCoefficients::Bt2020_ncl:
               colorSpace.mPrimaries = Some(VideoColorPrimaries::Bt2020);
               break;
-            case VideoMatrixCoefficients::EndGuard_:
-              MOZ_ASSERT_UNREACHABLE("bad enum value");
-              break;
           };
         }
       }
@@ -794,6 +750,21 @@ bool VideoDecoderTraits::Validate(const VideoDecoderConfig& aConfig,
     return false;
   }
 
+  bool detached =
+      aConfig.mDescription.WasPassed() &&
+      (aConfig.mDescription.Value().IsArrayBuffer()
+           ? JS::ArrayBuffer::fromObject(
+                 aConfig.mDescription.Value().GetAsArrayBuffer().Obj())
+                 .isDetached()
+           : JS::ArrayBufferView::fromObject(
+                 aConfig.mDescription.Value().GetAsArrayBufferView().Obj())
+                 .isDetached());
+
+  if (detached) {
+    LOGE("description is detached.");
+    return false;
+  }
+
   return true;
 }
 
@@ -828,9 +799,7 @@ VideoDecoder::VideoDecoder(nsIGlobalObject* aParent,
   LOG("VideoDecoder %p ctor", this);
 }
 
-VideoDecoder::~VideoDecoder() {
-  LOG("VideoDecoder %p dtor", this);
-}
+VideoDecoder::~VideoDecoder() { LOG("VideoDecoder %p dtor", this); }
 
 JSObject* VideoDecoder::WrapObject(JSContext* aCx,
                                    JS::Handle<JSObject*> aGivenProto) {
@@ -877,21 +846,17 @@ already_AddRefed<Promise> VideoDecoder::IsConfigSupported(
   nsCString errorMessage;
   if (!VideoDecoderTraits::Validate(aConfig, errorMessage)) {
     p->MaybeRejectWithTypeError(nsPrintfCString(
-        "VideoDecoderConfig is invalid: %s", errorMessage.get()));
+        "IsConfigSupported: config is invalid: %s", errorMessage.get()));
     return p.forget();
   }
 
-  // TODO: Move the following works to another thread to unblock the current
-  // thread, as what spec suggests.
-
   RootedDictionary<VideoDecoderConfig> config(aGlobal.Context());
-  auto r = CloneConfiguration(config, aGlobal.Context(), aConfig);
+  auto r = CloneConfiguration(config, aGlobal.Context(), aConfig, aRv);
   if (r.isErr()) {
-    nsresult e = r.unwrapErr();
-    LOGE("Failed to clone VideoDecoderConfig. Error: 0x%08" PRIx32,
-         static_cast<uint32_t>(e));
-    p->MaybeRejectWithTypeError("Failed to clone VideoDecoderConfig");
-    aRv.Throw(e);
+    // This can only be an OOM: all members to clone are known to be valid
+    // because this is check by ::Validate above.
+    MOZ_ASSERT(r.inspectErr() == NS_ERROR_OUT_OF_MEMORY &&
+               aRv.ErrorCodeIs(NS_ERROR_OUT_OF_MEMORY));
     return p.forget();
   }
 
diff --git a/dom/media/webcodecs/VideoEncoder.cpp b/dom/media/webcodecs/VideoEncoder.cpp
index 0e71417cb0..f593f70c77 100644
--- a/dom/media/webcodecs/VideoEncoder.cpp
+++ b/dom/media/webcodecs/VideoEncoder.cpp
@@ -101,8 +101,7 @@ VideoEncoderConfigInternal::VideoEncoderConfigInternal(
       mBitrateMode(aConfig.mBitrateMode),
       mLatencyMode(aConfig.mLatencyMode),
       mContentHint(aConfig.mContentHint),
-      mAvc(aConfig.mAvc) {
-}
+      mAvc(aConfig.mAvc) {}
 
 VideoEncoderConfigInternal::VideoEncoderConfigInternal(
     const VideoEncoderConfig& aConfig)
@@ -119,8 +118,7 @@ VideoEncoderConfigInternal::VideoEncoderConfigInternal(
       mBitrateMode(aConfig.mBitrateMode),
       mLatencyMode(aConfig.mLatencyMode),
       mContentHint(OptionalToMaybe(aConfig.mContentHint)),
-      mAvc(OptionalToMaybe(aConfig.mAvc)) {
-}
+      mAvc(OptionalToMaybe(aConfig.mAvc)) {}
 
 nsString VideoEncoderConfigInternal::ToString() const {
   nsString rv;
@@ -137,26 +135,20 @@ nsString VideoEncoderConfigInternal::ToString() const {
   if (mFramerate.isSome()) {
     rv.AppendPrintf(", %lfHz", mFramerate.value());
   }
-  rv.AppendPrintf(
-      " hw: %s",
-      HardwareAccelerationValues::GetString(mHardwareAcceleration).data());
-  rv.AppendPrintf(", alpha: %s", AlphaOptionValues::GetString(mAlpha).data());
+  rv.AppendPrintf(" hw: %s", GetEnumString(mHardwareAcceleration).get());
+  rv.AppendPrintf(", alpha: %s", GetEnumString(mAlpha).get());
   if (mScalabilityMode.isSome()) {
     rv.AppendPrintf(", scalability mode: %s",
                     NS_ConvertUTF16toUTF8(mScalabilityMode.value()).get());
   }
-  rv.AppendPrintf(
-      ", bitrate mode: %s",
-      VideoEncoderBitrateModeValues::GetString(mBitrateMode).data());
-  rv.AppendPrintf(", latency mode: %s",
-                  LatencyModeValues::GetString(mLatencyMode).data());
+  rv.AppendPrintf(", bitrate mode: %s", GetEnumString(mBitrateMode).get());
+  rv.AppendPrintf(", latency mode: %s", GetEnumString(mLatencyMode).get());
   if (mContentHint.isSome()) {
     rv.AppendPrintf(", content hint: %s",
                     NS_ConvertUTF16toUTF8(mContentHint.value()).get());
   }
   if (mAvc.isSome()) {
-    rv.AppendPrintf(", avc-specific: %s",
-                    AvcBitstreamFormatValues::GetString(mAvc->mFormat).data());
+    rv.AppendPrintf(", avc-specific: %s", GetEnumString(mAvc->mFormat).get());
   }
 
   return rv;
@@ -238,43 +230,57 @@ EncoderConfig VideoEncoderConfigInternal::ToEncoderConfig() const {
     if (ExtractH264CodecDetails(mCodec, profile, constraints, level)) {
       if (profile == H264_PROFILE_BASE || profile == H264_PROFILE_MAIN ||
           profile == H264_PROFILE_EXTENDED || profile == H264_PROFILE_HIGH) {
-        specific.emplace(
-            H264Specific(static_cast<H264_PROFILE>(profile), static_cast<H264_LEVEL>(level), format));
+        specific.emplace(H264Specific(static_cast<H264_PROFILE>(profile),
+                                      static_cast<H264_LEVEL>(level), format));
       }
     }
   }
-  // Only for vp9, not vp8
-  if (codecType == CodecType::VP9) {
-    uint8_t profile, level, bitdepth, chromasubsampling;
-    mozilla::VideoColorSpace colorspace;
-    DebugOnly<bool> rv = ExtractVPXCodecDetails(
-        mCodec, profile, level, bitdepth, chromasubsampling, colorspace);
-#ifdef DEBUG
-    if (!rv) {
-      LOGE("Error extracting VPX codec details, non fatal");
-    }
-#endif
-    specific.emplace(VP9Specific());
-  }
+  uint8_t numTemporalLayers = 1;
   MediaDataEncoder::ScalabilityMode scalabilityMode;
   if (mScalabilityMode) {
     if (mScalabilityMode->EqualsLiteral("L1T2")) {
       scalabilityMode = MediaDataEncoder::ScalabilityMode::L1T2;
+      numTemporalLayers = 2;
     } else if (mScalabilityMode->EqualsLiteral("L1T3")) {
       scalabilityMode = MediaDataEncoder::ScalabilityMode::L1T3;
+      numTemporalLayers = 3;
     } else {
       scalabilityMode = MediaDataEncoder::ScalabilityMode::None;
     }
   } else {
     scalabilityMode = MediaDataEncoder::ScalabilityMode::None;
   }
-  return EncoderConfig(
-      codecType, {mWidth, mHeight}, usage, ImageBitmapFormat::RGBA32, ImageBitmapFormat::RGBA32,
-      AssertedCast<uint8_t>(mFramerate.refOr(0.f)), 0, mBitrate.refOr(0),
-      mBitrateMode == VideoEncoderBitrateMode::Constant
-          ? MediaDataEncoder::BitrateMode::Constant
-          : MediaDataEncoder::BitrateMode::Variable,
-      hwPref, scalabilityMode, specific);
+  // Only for vp9, not vp8
+  if (codecType == CodecType::VP9) {
+    uint8_t profile, level, bitdepth, chromasubsampling;
+    mozilla::VideoColorSpace colorspace;
+    DebugOnly<bool> rv = ExtractVPXCodecDetails(
+        mCodec, profile, level, bitdepth, chromasubsampling, colorspace);
+#ifdef DEBUG
+    if (!rv) {
+      LOGE("Error extracting VPX codec details, non fatal");
+    }
+#endif
+    specific.emplace(VP9Specific(
+        VPXComplexity::Normal, /* Complexity */
+        true,                  /* Resilience */
+        numTemporalLayers,     /* Number of temporal layers */
+        true,                  /* Denoising */
+        false,                 /* Auto resize */
+        false,                 /* Frame dropping */
+        true,                  /* Adaptive Qp */
+        1,                     /* Number of spatial layers */
+        false                  /* Flexible */
+        ));
+  }
+  return EncoderConfig(codecType, {mWidth, mHeight}, usage,
+                       ImageBitmapFormat::RGBA32, ImageBitmapFormat::RGBA32,
+                       AssertedCast<uint8_t>(mFramerate.refOr(0.f)), 0,
+                       mBitrate.refOr(0),
+                       mBitrateMode == VideoEncoderBitrateMode::Constant
+                           ? MediaDataEncoder::BitrateMode::Constant
+                           : MediaDataEncoder::BitrateMode::Variable,
+                       hwPref, scalabilityMode, specific);
 }
 already_AddRefed<WebCodecsConfigurationChangeList>
 VideoEncoderConfigInternal::Diff(
@@ -326,27 +332,6 @@ VideoEncoderConfigInternal::Diff(
   return list.forget();
 }
 
-/*
- * The followings are helpers for VideoEncoder methods
- */
-static bool IsEncodeSupportedCodec(const nsAString& aCodec) {
-  LOG("IsEncodeSupported: %s", NS_ConvertUTF16toUTF8(aCodec).get());
-  if (!IsVP9CodecString(aCodec) && !IsVP8CodecString(aCodec) &&
-      !IsH264CodecString(aCodec) && !IsAV1CodecString(aCodec)) {
-    return false;
-  }
-
-  // Gecko allows codec string starts with vp9 or av1 but Webcodecs requires to
-  // starts with av01 and vp09.
-  // https://www.w3.org/TR/webcodecs-codec-registry/#video-codec-registry
-  if (StringBeginsWith(aCodec, u"vp9"_ns) ||
-      StringBeginsWith(aCodec, u"av1"_ns)) {
-    return false;
-  }
-
-  return true;
-}
-
 // https://w3c.github.io/webcodecs/#check-configuration-support
 static bool CanEncode(const RefPtr<VideoEncoderConfigInternal>& aConfig) {
   auto parsedCodecString =
@@ -355,7 +340,7 @@ static bool CanEncode(const RefPtr<VideoEncoderConfigInternal>& aConfig) {
   if (IsOnAndroid()) {
     return false;
   }
-  if (!IsEncodeSupportedCodec(parsedCodecString)) {
+  if (!IsSupportedVideoCodec(parsedCodecString)) {
     return false;
   }
 
diff --git a/dom/media/webcodecs/VideoFrame.cpp b/dom/media/webcodecs/VideoFrame.cpp
index 602bc95c29..bea5611e39 100644
--- a/dom/media/webcodecs/VideoFrame.cpp
+++ b/dom/media/webcodecs/VideoFrame.cpp
@@ -598,8 +598,6 @@ static bool IsYUVFormat(const VideoPixelFormat& aFormat) {
     case VideoPixelFormat::BGRA:
     case VideoPixelFormat::BGRX:
       return false;
-    case VideoPixelFormat::EndGuard_:
-      MOZ_ASSERT_UNREACHABLE("unsupported format");
   }
   return false;
 }
@@ -641,8 +639,6 @@ static VideoColorSpaceInit PickColorSpace(
       colorSpace.mPrimaries.SetValue(VideoColorPrimaries::Bt709);
       colorSpace.mTransfer.SetValue(VideoTransferCharacteristics::Iec61966_2_1);
       break;
-    case VideoPixelFormat::EndGuard_:
-      MOZ_ASSERT_UNREACHABLE("unsupported format");
   }
 
   return colorSpace;
@@ -881,8 +877,6 @@ static Result<RefPtr<layers::Image>, nsCString> CreateImageFromBuffer(
     case VideoPixelFormat::BGRA:
     case VideoPixelFormat::BGRX:
       return CreateRGBAImageFromBuffer(aFormat, aSize, aBuffer);
-    case VideoPixelFormat::EndGuard_:
-      MOZ_ASSERT_UNREACHABLE("unsupported format");
   }
   return Err(nsCString("Invalid image format"));
 }
@@ -1390,8 +1384,9 @@ already_AddRefed<VideoFrame> VideoFrame::Constructor(
   }
 
   const ImageUtils imageUtils(image);
+  Maybe<dom::ImageBitmapFormat> f = imageUtils.GetFormat();
   Maybe<VideoPixelFormat> format =
-      ImageBitmapFormatToVideoPixelFormat(imageUtils.GetFormat());
+      f.isSome() ? ImageBitmapFormatToVideoPixelFormat(f.value()) : Nothing();
 
   // TODO: Retrive/infer the duration, and colorspace.
   auto r = InitializeFrameFromOtherFrame(
@@ -1789,15 +1784,13 @@ nsCString VideoFrame::ToString() const {
     return rv;
   }
 
-  rv.AppendPrintf(
-      "VideoFrame ts: %" PRId64
-      ", %s, coded[%dx%d] visible[%dx%d], display[%dx%d] color: %s",
-      mTimestamp,
-      dom::VideoPixelFormatValues::GetString(mResource->mFormat->PixelFormat())
-          .data(),
-      mCodedSize.width, mCodedSize.height, mVisibleRect.width,
-      mVisibleRect.height, mDisplaySize.width, mDisplaySize.height,
-      ColorSpaceInitToString(mColorSpace).get());
+  rv.AppendPrintf("VideoFrame ts: %" PRId64
+                  ", %s, coded[%dx%d] visible[%dx%d], display[%dx%d] color: %s",
+                  mTimestamp,
+                  dom::GetEnumString(mResource->mFormat->PixelFormat()).get(),
+                  mCodedSize.width, mCodedSize.height, mVisibleRect.width,
+                  mVisibleRect.height, mDisplaySize.width, mDisplaySize.height,
+                  ColorSpaceInitToString(mColorSpace).get());
 
   if (mDuration) {
     rv.AppendPrintf(" dur: %" PRId64, mDuration.value());
@@ -2032,8 +2025,6 @@ gfx::SurfaceFormat VideoFrame::Format::ToSurfaceFormat() const {
     case VideoPixelFormat::BGRX:
       format = gfx::SurfaceFormat::B8G8R8X8;
       break;
-    case VideoPixelFormat::EndGuard_:
-      MOZ_ASSERT_UNREACHABLE("unsupported format");
   }
   return format;
 }
@@ -2056,8 +2047,6 @@ void VideoFrame::Format::MakeOpaque() {
     case VideoPixelFormat::RGBX:
     case VideoPixelFormat::BGRX:
       return;
-    case VideoPixelFormat::EndGuard_:
-      break;
   }
   MOZ_ASSERT_UNREACHABLE("unsupported format");
 }
@@ -2077,8 +2066,6 @@ nsTArray<VideoFrame::Format::Plane> VideoFrame::Format::Planes() const {
     case VideoPixelFormat::BGRA:
     case VideoPixelFormat::BGRX:
       return {Plane::RGBA};
-    case VideoPixelFormat::EndGuard_:
-      break;
   }
   MOZ_ASSERT_UNREACHABLE("unsupported format");
   return {};
@@ -2125,8 +2112,6 @@ uint32_t VideoFrame::Format::SampleBytes(const Plane& aPlane) const {
     case VideoPixelFormat::BGRA:
     case VideoPixelFormat::BGRX:
       return 4;  // 8 bits/sample, 32 bits/pixel
-    case VideoPixelFormat::EndGuard_:
-      break;
   }
   MOZ_ASSERT_UNREACHABLE("unsupported format");
   return 0;
@@ -2154,7 +2139,6 @@ gfx::IntSize VideoFrame::Format::SampleSize(const Plane& aPlane) const {
         case VideoPixelFormat::RGBX:
         case VideoPixelFormat::BGRA:
         case VideoPixelFormat::BGRX:
-        case VideoPixelFormat::EndGuard_:
           MOZ_ASSERT_UNREACHABLE("invalid format");
           return {0, 0};
       }
@@ -2177,8 +2161,6 @@ bool VideoFrame::Format::IsValidSize(const gfx::IntSize& aSize) const {
     case VideoPixelFormat::BGRA:
     case VideoPixelFormat::BGRX:
       return true;
-    case VideoPixelFormat::EndGuard_:
-      break;
   }
   MOZ_ASSERT_UNREACHABLE("unsupported format");
   return false;
@@ -2205,8 +2187,6 @@ size_t VideoFrame::Format::SampleCount(const gfx::IntSize& aSize) const {
     case VideoPixelFormat::BGRA:
     case VideoPixelFormat::BGRX:
       return (count * 4).value();
-    case VideoPixelFormat::EndGuard_:
-      break;
   }
 
   MOZ_ASSERT_UNREACHABLE("unsupported format");
@@ -2252,8 +2232,6 @@ uint32_t VideoFrame::Resource::Stride(const Format::Plane& aPlane) const {
         case VideoPixelFormat::BGRA:
         case VideoPixelFormat::BGRX:
           return (width * mFormat->SampleBytes(aPlane)).value();
-        case VideoPixelFormat::EndGuard_:
-          MOZ_ASSERT_UNREACHABLE("invalid format");
       }
       return 0;
     case Format::Plane::U:  // and UV
@@ -2269,7 +2247,6 @@ uint32_t VideoFrame::Resource::Stride(const Format::Plane& aPlane) const {
         case VideoPixelFormat::RGBX:
         case VideoPixelFormat::BGRA:
         case VideoPixelFormat::BGRX:
-        case VideoPixelFormat::EndGuard_:
           MOZ_ASSERT_UNREACHABLE("invalid format");
       }
       return 0;
diff --git a/dom/media/webcodecs/WebCodecsUtils.cpp b/dom/media/webcodecs/WebCodecsUtils.cpp
index 1e03f616db..3507aba440 100644
--- a/dom/media/webcodecs/WebCodecsUtils.cpp
+++ b/dom/media/webcodecs/WebCodecsUtils.cpp
@@ -37,7 +37,7 @@ std::atomic<WebCodecsId> sNextId = 0;
 namespace mozilla::dom {
 
 /*
- * The followings are helpers for VideoDecoder methods
+ * The followings are helpers for AudioDecoder and VideoDecoder methods
  */
 
 nsTArray<nsCString> GuessContainers(const nsAString& aCodec) {
@@ -57,6 +57,29 @@ nsTArray<nsCString> GuessContainers(const nsAString& aCodec) {
     return {"mp4"_ns, "3gpp"_ns, "3gpp2"_ns, "3gp2"_ns};
   }
 
+  if (IsAACCodecString(aCodec)) {
+    return {"adts"_ns, "mp4"_ns};
+  }
+
+  if (aCodec.EqualsLiteral("vorbis") || aCodec.EqualsLiteral("opus")) {
+    return {"ogg"_ns};
+  }
+
+  if (aCodec.EqualsLiteral("flac")) {
+    return {"flac"_ns};
+  }
+
+  if (aCodec.EqualsLiteral("mp3")) {
+    return {"mp3"_ns};
+  }
+
+  if (aCodec.EqualsLiteral("ulaw") || aCodec.EqualsLiteral("alaw") ||
+      aCodec.EqualsLiteral("pcm-u8") || aCodec.EqualsLiteral("pcm-s16") ||
+      aCodec.EqualsLiteral("pcm-s24") || aCodec.EqualsLiteral("pcm-s32") ||
+      aCodec.EqualsLiteral("pcm-f32")) {
+    return {"x-wav"_ns};
+  }
+
   return {};
 }
 
@@ -106,7 +129,8 @@ static std::tuple<JS::ArrayBufferOrView, size_t, size_t> GetArrayBufferInfo(
 Result<Ok, nsresult> CloneBuffer(
     JSContext* aCx,
     OwningMaybeSharedArrayBufferViewOrMaybeSharedArrayBuffer& aDest,
-    const OwningMaybeSharedArrayBufferViewOrMaybeSharedArrayBuffer& aSrc) {
+    const OwningMaybeSharedArrayBufferViewOrMaybeSharedArrayBuffer& aSrc,
+    ErrorResult& aRv) {
   std::tuple<JS::ArrayBufferOrView, size_t, size_t> info =
       GetArrayBufferInfo(aCx, aSrc);
   JS::Rooted<JS::ArrayBufferOrView> abov(aCx);
@@ -121,6 +145,8 @@ Result<Ok, nsresult> CloneBuffer(
   JS::Rooted<JSObject*> cloned(aCx,
                                JS::ArrayBufferClone(aCx, obj, offset, len));
   if (NS_WARN_IF(!cloned)) {
+    aRv.MightThrowJSException();
+    aRv.StealExceptionFromJSContext(aCx);
     return Err(NS_ERROR_OUT_OF_MEMORY);
   }
 
@@ -151,8 +177,6 @@ gfx::YUVColorSpace ToColorSpace(VideoMatrixCoefficients aMatrix) {
       return gfx::YUVColorSpace::BT601;
     case VideoMatrixCoefficients::Bt2020_ncl:
       return gfx::YUVColorSpace::BT2020;
-    case VideoMatrixCoefficients::EndGuard_:
-      break;
   }
   MOZ_ASSERT_UNREACHABLE("unsupported VideoMatrixCoefficients");
   return gfx::YUVColorSpace::Default;
@@ -171,8 +195,7 @@ gfx::TransferFunction ToTransferFunction(
     case VideoTransferCharacteristics::Hlg:
       return gfx::TransferFunction::HLG;
     case VideoTransferCharacteristics::Linear:
-    case VideoTransferCharacteristics::EndGuard_:
-      break;
+      return gfx::TransferFunction::Default;
   }
   MOZ_ASSERT_UNREACHABLE("unsupported VideoTransferCharacteristics");
   return gfx::TransferFunction::Default;
@@ -190,8 +213,6 @@ gfx::ColorSpace2 ToPrimaries(VideoColorPrimaries aPrimaries) {
       return gfx::ColorSpace2::BT2020;
     case VideoColorPrimaries::Smpte432:
       return gfx::ColorSpace2::DISPLAY_P3;
-    case VideoColorPrimaries::EndGuard_:
-      break;
   }
   MOZ_ASSERT_UNREACHABLE("unsupported VideoTransferCharacteristics");
   return gfx::ColorSpace2::UNKNOWN;
@@ -300,13 +321,6 @@ Maybe<VideoPixelFormat> ImageBitmapFormatToVideoPixelFormat(
   return Nothing();
 }
 
-Result<RefPtr<MediaByteBuffer>, nsresult> GetExtraDataFromArrayBuffer(
-    const OwningMaybeSharedArrayBufferViewOrMaybeSharedArrayBuffer& aBuffer) {
-  RefPtr<MediaByteBuffer> data = MakeRefPtr<MediaByteBuffer>();
-  Unused << AppendTypedArrayDataTo(aBuffer, *data);
-  return data->Length() > 0 ? data : nullptr;
-}
-
 bool IsOnAndroid() {
 #if defined(ANDROID)
   return true;
@@ -364,15 +378,13 @@ struct ConfigurationChangeToString {
   }
   nsCString operator()(
       const HardwareAccelerationChange& aHardwareAccelerationChange) {
-    return nsPrintfCString("HW acceleration: %s",
-                           dom::HardwareAccelerationValues::GetString(
-                               aHardwareAccelerationChange.get())
-                               .data());
+    return nsPrintfCString(
+        "HW acceleration: %s",
+        dom::GetEnumString(aHardwareAccelerationChange.get()).get());
   }
   nsCString operator()(const AlphaChange& aAlphaChange) {
-    return nsPrintfCString(
-        "Alpha: %s",
-        dom::AlphaOptionValues::GetString(aAlphaChange.get()).data());
+    return nsPrintfCString("Alpha: %s",
+                           dom::GetEnumString(aAlphaChange.get()).get());
   }
   nsCString operator()(const ScalabilityModeChange& aScalabilityModeChange) {
     if (aScalabilityModeChange.get().isNothing()) {
@@ -383,15 +395,12 @@ struct ConfigurationChangeToString {
         NS_ConvertUTF16toUTF8(aScalabilityModeChange.get().value()).get());
   }
   nsCString operator()(const BitrateModeChange& aBitrateModeChange) {
-    return nsPrintfCString(
-        "Bitrate mode: %s",
-        dom::VideoEncoderBitrateModeValues::GetString(aBitrateModeChange.get())
-            .data());
+    return nsPrintfCString("Bitrate mode: %s",
+                           dom::GetEnumString(aBitrateModeChange.get()).get());
   }
   nsCString operator()(const LatencyModeChange& aLatencyModeChange) {
-    return nsPrintfCString(
-        "Latency mode: %s",
-        dom::LatencyModeValues::GetString(aLatencyModeChange.get()).data());
+    return nsPrintfCString("Latency mode: %s",
+                           dom::GetEnumString(aLatencyModeChange.get()).get());
   }
   nsCString operator()(const ContentHintChange& aContentHintChange) {
     return nsPrintfCString("Content hint: %s",
@@ -489,9 +498,6 @@ WebCodecsConfigurationChangeList::ToPEMChangeList() const {
   return rv.forget();
 }
 
-#define ENUM_TO_STRING(enumType, enumValue) \
-  enumType##Values::GetString(enumValue).data()
-
 nsCString ColorSpaceInitToString(
     const dom::VideoColorSpaceInit& aColorSpaceInit) {
   nsCString rv("VideoColorSpace");
@@ -502,18 +508,15 @@ nsCString ColorSpaceInitToString(
   }
   if (!aColorSpaceInit.mMatrix.IsNull()) {
     rv.AppendPrintf(" matrix: %s",
-                    ENUM_TO_STRING(dom::VideoMatrixCoefficients,
-                                   aColorSpaceInit.mMatrix.Value()));
+                    GetEnumString(aColorSpaceInit.mMatrix.Value()).get());
   }
   if (!aColorSpaceInit.mTransfer.IsNull()) {
     rv.AppendPrintf(" transfer: %s",
-                    ENUM_TO_STRING(dom::VideoTransferCharacteristics,
-                                   aColorSpaceInit.mTransfer.Value()));
+                    GetEnumString(aColorSpaceInit.mTransfer.Value()).get());
   }
   if (!aColorSpaceInit.mPrimaries.IsNull()) {
     rv.AppendPrintf(" primaries: %s",
-                    ENUM_TO_STRING(dom::VideoColorPrimaries,
-                                   aColorSpaceInit.mPrimaries.Value()));
+                    GetEnumString(aColorSpaceInit.mPrimaries.Value()).get());
   }
 
   return rv;
@@ -575,4 +578,32 @@ nsString ConfigToString(const VideoDecoderConfig& aConfig) {
   return internal->ToString();
 }
 
-};  // namespace mozilla::dom
+Result<RefPtr<MediaByteBuffer>, nsresult> GetExtraDataFromArrayBuffer(
+    const OwningMaybeSharedArrayBufferViewOrMaybeSharedArrayBuffer& aBuffer) {
+  RefPtr<MediaByteBuffer> data = MakeRefPtr<MediaByteBuffer>();
+  if (!AppendTypedArrayDataTo(aBuffer, *data)) {
+    return Err(NS_ERROR_OUT_OF_MEMORY);
+  }
+  return data->Length() > 0 ? data : nullptr;
+}
+
+bool IsSupportedVideoCodec(const nsAString& aCodec) {
+  LOG("IsSupportedVideoCodec: %s", NS_ConvertUTF16toUTF8(aCodec).get());
+  // The only codec string accepted for vp8 is "vp8"
+  if (!IsVP9CodecString(aCodec) && !IsH264CodecString(aCodec) &&
+      !IsAV1CodecString(aCodec) && !aCodec.EqualsLiteral("vp8")) {
+    return false;
+  }
+
+  // Gecko allows codec string starts with vp9 or av1 but Webcodecs requires to
+  // starts with av01 and vp09.
+  // https://w3c.github.io/webcodecs/codec_registry.html.
+  if (StringBeginsWith(aCodec, u"vp9"_ns) ||
+      StringBeginsWith(aCodec, u"av1"_ns)) {
+    return false;
+  }
+
+  return true;
+}
+
+}  // namespace mozilla::dom
diff --git a/dom/media/webcodecs/WebCodecsUtils.h b/dom/media/webcodecs/WebCodecsUtils.h
index 7c0e6b6bbc..196c57421d 100644
--- a/dom/media/webcodecs/WebCodecsUtils.h
+++ b/dom/media/webcodecs/WebCodecsUtils.h
@@ -8,6 +8,7 @@
 #define MOZILLA_DOM_WEBCODECS_WEBCODECSUTILS_H
 
 #include "ErrorList.h"
+#include "MediaData.h"
 #include "js/TypeDecls.h"
 #include "mozilla/Maybe.h"
 #include "mozilla/MozPromise.h"
@@ -86,7 +87,11 @@ Nullable<T> MaybeToNullable(const Maybe<T>& aOptional) {
 Result<Ok, nsresult> CloneBuffer(
     JSContext* aCx,
     OwningMaybeSharedArrayBufferViewOrMaybeSharedArrayBuffer& aDest,
-    const OwningMaybeSharedArrayBufferViewOrMaybeSharedArrayBuffer& aSrc);
+    const OwningMaybeSharedArrayBufferViewOrMaybeSharedArrayBuffer& aSrc,
+    ErrorResult& aRv);
+
+Result<RefPtr<MediaByteBuffer>, nsresult> GetExtraDataFromArrayBuffer(
+    const OwningMaybeSharedArrayBufferViewOrMaybeSharedArrayBuffer& aBuffer);
 
 /*
  * The following are utilities to convert between VideoColorSpace values to
@@ -232,6 +237,8 @@ Maybe<CodecType> CodecStringToCodecType(const nsAString& aCodecString);
 
 nsString ConfigToString(const VideoDecoderConfig& aConfig);
 
+bool IsSupportedVideoCodec(const nsAString& aCodec);
+
 }  // namespace dom
 
 }  // namespace mozilla
diff --git a/dom/media/webcodecs/moz.build b/dom/media/webcodecs/moz.build
index 267a822286..ddb5aad5cb 100644
--- a/dom/media/webcodecs/moz.build
+++ b/dom/media/webcodecs/moz.build
@@ -21,8 +21,11 @@ EXPORTS.mozilla += [
 ]
 
 EXPORTS.mozilla.dom += [
+    "AudioData.h",
+    "AudioDecoder.h",
     "DecoderTemplate.h",
     "DecoderTypes.h",
+    "EncodedAudioChunk.h",
     "EncodedVideoChunk.h",
     "EncoderAgent.h",
     "EncoderTemplate.h",
@@ -35,8 +38,11 @@ EXPORTS.mozilla.dom += [
 ]
 
 UNIFIED_SOURCES += [
+    "AudioData.cpp",
+    "AudioDecoder.cpp",
     "DecoderAgent.cpp",
     "DecoderTemplate.cpp",
+    "EncodedAudioChunk.cpp",
     "EncodedVideoChunk.cpp",
     "EncoderAgent.cpp",
     "EncoderTemplate.cpp",