Adding upstream version 115.8.0esr.upstream/115.8.0esr

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 01:47:29 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 01:47:29 +0000
commit: 0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d (patch)
tree: a31f07c9bcca9d56ce61e9a1ffd30ef350d513aa /dom/media/mp4
parent: Initial commit. (diff)
download: firefox-esr-0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d.tar.xz
firefox-esr-0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d.zip
25 files changed, 5457 insertions, 0 deletions
diff --git a/dom/media/mp4/Atom.h b/dom/media/mp4/Atom.h
new file mode 100644
index 0000000000..f008dfe148
--- /dev/null
+++ b/dom/media/mp4/Atom.h
@@ -0,0 +1,21 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ATOM_H_
+#define ATOM_H_
+
+namespace mozilla {
+
+class Atom {
+ public:
+  Atom() : mValid(false) {}
+  virtual bool IsValid() { return mValid; }
+
+ protected:
+  bool mValid;
+};
+
+}  // namespace mozilla
+
+#endif  // ATOM_H_
diff --git a/dom/media/mp4/AtomType.h b/dom/media/mp4/AtomType.h
new file mode 100644
index 0000000000..dcecde845d
--- /dev/null
+++ b/dom/media/mp4/AtomType.h
@@ -0,0 +1,29 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ATOM_TYPE_H_
+#define ATOM_TYPE_H_
+
+#include <stdint.h>
+#include "mozilla/EndianUtils.h"
+
+namespace mozilla {
+
+class AtomType {
+ public:
+  AtomType() : mType(0) {}
+  MOZ_IMPLICIT AtomType(uint32_t aType) : mType(aType) {}
+  MOZ_IMPLICIT AtomType(const char* aType)
+      : mType(BigEndian::readUint32(aType)) {}
+  bool operator==(const AtomType& aType) const { return mType == aType.mType; }
+  bool operator!() const { return !mType; }
+
+ private:
+  uint32_t mType;
+};
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/mp4/Box.cpp b/dom/media/mp4/Box.cpp
new file mode 100644
index 0000000000..334ba3e3f8
--- /dev/null
+++ b/dom/media/mp4/Box.cpp
@@ -0,0 +1,230 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "Box.h"
+#include "ByteStream.h"
+#include "mozilla/EndianUtils.h"
+#include "mozilla/Unused.h"
+#include <algorithm>
+
+namespace mozilla {
+
+// Limit reads to 32MiB max.
+// static
+const uint64_t Box::kMAX_BOX_READ = 32 * 1024 * 1024;
+
+// Returns the offset from the start of the body of a box of type |aType|
+// to the start of its first child.
+static uint32_t BoxOffset(AtomType aType) {
+  const uint32_t FULLBOX_OFFSET = 4;
+
+  if (aType == AtomType("mp4a") || aType == AtomType("enca")) {
+    // AudioSampleEntry; ISO 14496-12, section 8.16
+    return 28;
+  } else if (aType == AtomType("mp4v") || aType == AtomType("encv")) {
+    // VideoSampleEntry; ISO 14496-12, section 8.16
+    return 78;
+  } else if (aType == AtomType("stsd")) {
+    // SampleDescriptionBox; ISO 14496-12, section 8.16
+    // This is a FullBox, and contains a |count| member before its child
+    // boxes.
+    return FULLBOX_OFFSET + 4;
+  }
+
+  return 0;
+}
+
+Box::Box(BoxContext* aContext, uint64_t aOffset, const Box* aParent)
+    : mContext(aContext), mParent(aParent) {
+  uint8_t header[8];
+
+  if (aOffset > INT64_MAX - sizeof(header)) {
+    return;
+  }
+
+  MediaByteRange headerRange(aOffset, aOffset + sizeof(header));
+  if (mParent && !mParent->mRange.Contains(headerRange)) {
+    return;
+  }
+
+  const MediaByteRange* byteRange;
+  for (int i = 0;; i++) {
+    if (i == mContext->mByteRanges.Length()) {
+      return;
+    }
+
+    byteRange = static_cast<const MediaByteRange*>(&mContext->mByteRanges[i]);
+    if (byteRange->Contains(headerRange)) {
+      break;
+    }
+  }
+
+  size_t bytes;
+  if (!mContext->mSource->CachedReadAt(aOffset, header, sizeof(header),
+                                       &bytes) ||
+      bytes != sizeof(header)) {
+    return;
+  }
+
+  uint64_t size = BigEndian::readUint32(header);
+  if (size == 1) {
+    uint8_t bigLength[8];
+    if (aOffset > INT64_MAX - sizeof(header) - sizeof(bigLength)) {
+      return;
+    }
+    MediaByteRange bigLengthRange(headerRange.mEnd,
+                                  headerRange.mEnd + sizeof(bigLength));
+    if ((mParent && !mParent->mRange.Contains(bigLengthRange)) ||
+        !byteRange->Contains(bigLengthRange) ||
+        !mContext->mSource->CachedReadAt(aOffset + sizeof(header), bigLength,
+                                         sizeof(bigLength), &bytes) ||
+        bytes != sizeof(bigLength)) {
+      return;
+    }
+    size = BigEndian::readUint64(bigLength);
+    mBodyOffset = bigLengthRange.mEnd;
+  } else if (size == 0) {
+    // box extends to end of file.
+    size = mContext->mByteRanges.LastInterval().mEnd - aOffset;
+    mBodyOffset = headerRange.mEnd;
+  } else {
+    mBodyOffset = headerRange.mEnd;
+  }
+
+  if (size > INT64_MAX) {
+    return;
+  }
+  int64_t end = static_cast<int64_t>(aOffset) + static_cast<int64_t>(size);
+  if (end < static_cast<int64_t>(aOffset)) {
+    // Overflowed.
+    return;
+  }
+
+  mType = BigEndian::readUint32(&header[4]);
+  mChildOffset = mBodyOffset + BoxOffset(mType);
+
+  MediaByteRange boxRange(aOffset, end);
+  if (mChildOffset > boxRange.mEnd ||
+      (mParent && !mParent->mRange.Contains(boxRange)) ||
+      !byteRange->Contains(boxRange)) {
+    return;
+  }
+
+  mRange = boxRange;
+}
+
+Box::Box()
+    : mContext(nullptr), mBodyOffset(0), mChildOffset(0), mParent(nullptr) {}
+
+Box Box::Next() const {
+  MOZ_ASSERT(IsAvailable());
+  return Box(mContext, mRange.mEnd, mParent);
+}
+
+Box Box::FirstChild() const {
+  MOZ_ASSERT(IsAvailable());
+  if (mChildOffset == mRange.mEnd) {
+    return Box();
+  }
+  return Box(mContext, mChildOffset, this);
+}
+
+nsTArray<uint8_t> Box::ReadCompleteBox() const {
+  const size_t length = mRange.mEnd - mRange.mStart;
+  nsTArray<uint8_t> out(length);
+  out.SetLength(length);
+  size_t bytesRead = 0;
+  if (!mContext->mSource->CachedReadAt(mRange.mStart, out.Elements(), length,
+                                       &bytesRead) ||
+      bytesRead != length) {
+    // Byte ranges are being reported incorrectly
+    NS_WARNING("Read failed in mozilla::Box::ReadCompleteBox()");
+    return nsTArray<uint8_t>(0);
+  }
+  return out;
+}
+
+nsTArray<uint8_t> Box::Read() const {
+  nsTArray<uint8_t> out;
+  Unused << Read(&out, mRange);
+  return out;
+}
+
+bool Box::Read(nsTArray<uint8_t>* aDest, const MediaByteRange& aRange) const {
+  int64_t length;
+  if (!mContext->mSource->Length(&length)) {
+    // The HTTP server didn't give us a length to work with.
+    // Limit the read to kMAX_BOX_READ max.
+    length = std::min(aRange.mEnd - mChildOffset, kMAX_BOX_READ);
+  } else {
+    length = aRange.mEnd - mChildOffset;
+  }
+  aDest->SetLength(length);
+  size_t bytes;
+  if (!mContext->mSource->CachedReadAt(mChildOffset, aDest->Elements(),
+                                       aDest->Length(), &bytes) ||
+      bytes != aDest->Length()) {
+    // Byte ranges are being reported incorrectly
+    NS_WARNING("Read failed in mozilla::Box::Read()");
+    aDest->Clear();
+    return false;
+  }
+  return true;
+}
+
+ByteSlice Box::ReadAsSlice() {
+  if (!mContext || mRange.IsEmpty()) {
+    return ByteSlice{nullptr, 0};
+  }
+
+  int64_t length;
+  if (!mContext->mSource->Length(&length)) {
+    // The HTTP server didn't give us a length to work with.
+    // Limit the read to kMAX_BOX_READ max.
+    length = std::min(mRange.mEnd - mChildOffset, kMAX_BOX_READ);
+  } else {
+    length = mRange.mEnd - mChildOffset;
+  }
+
+  const uint8_t* data =
+      mContext->mSource->GetContiguousAccess(mChildOffset, length);
+  if (data) {
+    // We can direct access the underlying storage of the ByteStream.
+    return ByteSlice{data, size_t(length)};
+  }
+
+  uint8_t* p = mContext->mAllocator.Allocate(size_t(length));
+  size_t bytes;
+  if (!mContext->mSource->CachedReadAt(mChildOffset, p, length, &bytes) ||
+      bytes != length) {
+    // Byte ranges are being reported incorrectly
+    NS_WARNING("Read failed in mozilla::Box::ReadAsSlice()");
+    return ByteSlice{nullptr, 0};
+  }
+  return ByteSlice{p, size_t(length)};
+}
+
+const size_t BLOCK_CAPACITY = 16 * 1024;
+
+uint8_t* BumpAllocator::Allocate(size_t aNumBytes) {
+  if (aNumBytes > BLOCK_CAPACITY) {
+    mBuffers.AppendElement(nsTArray<uint8_t>(aNumBytes));
+    mBuffers.LastElement().SetLength(aNumBytes);
+    return mBuffers.LastElement().Elements();
+  }
+  for (nsTArray<uint8_t>& buffer : mBuffers) {
+    if (buffer.Length() + aNumBytes < BLOCK_CAPACITY) {
+      size_t offset = buffer.Length();
+      buffer.SetLength(buffer.Length() + aNumBytes);
+      return buffer.Elements() + offset;
+    }
+  }
+  mBuffers.AppendElement(nsTArray<uint8_t>(BLOCK_CAPACITY));
+  mBuffers.LastElement().SetLength(aNumBytes);
+  return mBuffers.LastElement().Elements();
+}
+
+}  // namespace mozilla
diff --git a/dom/media/mp4/Box.h b/dom/media/mp4/Box.h
new file mode 100644
index 0000000000..e63bfbcc90
--- /dev/null
+++ b/dom/media/mp4/Box.h
@@ -0,0 +1,100 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BOX_H_
+#define BOX_H_
+
+#include <stdint.h>
+#include "nsTArray.h"
+#include "MediaResource.h"
+#include "mozilla/EndianUtils.h"
+#include "AtomType.h"
+#include "BufferReader.h"
+
+namespace mozilla {
+class ByteStream;
+
+class BumpAllocator {
+ public:
+  uint8_t* Allocate(size_t aNumBytes);
+
+ private:
+  nsTArray<nsTArray<uint8_t>> mBuffers;
+};
+
+class BoxContext {
+ public:
+  BoxContext(ByteStream* aSource, const MediaByteRangeSet& aByteRanges)
+      : mSource(aSource), mByteRanges(aByteRanges) {}
+
+  RefPtr<ByteStream> mSource;
+  const MediaByteRangeSet& mByteRanges;
+  BumpAllocator mAllocator;
+};
+
+struct ByteSlice {
+  const uint8_t* mBytes;
+  size_t mSize;
+};
+
+class Box {
+ public:
+  Box(BoxContext* aContext, uint64_t aOffset, const Box* aParent = nullptr);
+  Box();
+
+  bool IsAvailable() const { return !mRange.IsEmpty(); }
+  uint64_t Offset() const { return mRange.mStart; }
+  uint64_t Length() const { return mRange.mEnd - mRange.mStart; }
+  uint64_t NextOffset() const { return mRange.mEnd; }
+  const MediaByteRange& Range() const { return mRange; }
+  const Box* Parent() const { return mParent; }
+  bool IsType(const char* aType) const { return mType == AtomType(aType); }
+
+  Box Next() const;
+  Box FirstChild() const;
+  // Reads the box contents, excluding the header.
+  nsTArray<uint8_t> Read() const;
+
+  // Reads the complete box; its header and body.
+  nsTArray<uint8_t> ReadCompleteBox() const;
+
+  // Reads from the content of the box, excluding header.
+  bool Read(nsTArray<uint8_t>* aDest, const MediaByteRange& aRange) const;
+
+  static const uint64_t kMAX_BOX_READ;
+
+  // Returns a slice, pointing to the data of this box. The lifetime of
+  // the memory this slice points to matches the box's context's lifetime.
+  ByteSlice ReadAsSlice();
+
+ private:
+  bool Contains(MediaByteRange aRange) const;
+  BoxContext* mContext;
+  mozilla::MediaByteRange mRange;
+  uint64_t mBodyOffset;
+  uint64_t mChildOffset;
+  AtomType mType;
+  const Box* mParent;
+};
+
+// BoxReader serves box data through an AutoByteReader. The box data is
+// stored either in the box's context's bump allocator, or in the ByteStream
+// itself if the ByteStream implements the Access() method.
+// NOTE: The data the BoxReader reads may be stored in the Box's BoxContext.
+// Ensure that the BoxReader doesn't outlive the BoxContext!
+class MOZ_RAII BoxReader {
+ public:
+  explicit BoxReader(Box& aBox)
+      : mData(aBox.ReadAsSlice()), mReader(mData.mBytes, mData.mSize) {}
+  BufferReader* operator->() { return &mReader; }
+
+ private:
+  ByteSlice mData;
+  BufferReader mReader;
+};
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/mp4/BufferStream.cpp b/dom/media/mp4/BufferStream.cpp
new file mode 100644
index 0000000000..c2fa40cb8a
--- /dev/null
+++ b/dom/media/mp4/BufferStream.cpp
@@ -0,0 +1,59 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "BufferStream.h"
+#include "MediaData.h"
+#include "MediaResource.h"
+#include <algorithm>
+
+namespace mozilla {
+
+BufferStream::BufferStream()
+    : mStartOffset(0), mData(new mozilla::MediaByteBuffer) {}
+
+BufferStream::BufferStream(mozilla::MediaByteBuffer* aBuffer)
+    : mStartOffset(0), mData(aBuffer) {}
+
+BufferStream::~BufferStream() = default;
+
+/*virtual*/
+bool BufferStream::ReadAt(int64_t aOffset, void* aData, size_t aLength,
+                          size_t* aBytesRead) {
+  if (aOffset < mStartOffset || aOffset > mStartOffset + mData->Length()) {
+    return false;
+  }
+  *aBytesRead =
+      std::min(aLength, size_t(mStartOffset + mData->Length() - aOffset));
+  memcpy(aData, mData->Elements() + aOffset - mStartOffset, *aBytesRead);
+  return true;
+}
+
+/*virtual*/
+bool BufferStream::CachedReadAt(int64_t aOffset, void* aData, size_t aLength,
+                                size_t* aBytesRead) {
+  return ReadAt(aOffset, aData, aLength, aBytesRead);
+}
+
+/*virtual*/
+bool BufferStream::Length(int64_t* aLength) {
+  *aLength = mStartOffset + mData->Length();
+  return true;
+}
+
+/* virtual */
+void BufferStream::DiscardBefore(int64_t aOffset) {
+  if (aOffset > mStartOffset) {
+    mData->RemoveElementsAt(0, aOffset - mStartOffset);
+    mStartOffset = aOffset;
+  }
+}
+
+bool BufferStream::AppendBytes(const uint8_t* aData, size_t aLength) {
+  return mData->AppendElements(aData, aLength, fallible);
+}
+
+MediaByteRange BufferStream::GetByteRange() {
+  return MediaByteRange(mStartOffset, mStartOffset + mData->Length());
+}
+}  // namespace mozilla
diff --git a/dom/media/mp4/BufferStream.h b/dom/media/mp4/BufferStream.h
new file mode 100644
index 0000000000..fb817b5916
--- /dev/null
+++ b/dom/media/mp4/BufferStream.h
@@ -0,0 +1,45 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BUFFER_STREAM_H_
+#define BUFFER_STREAM_H_
+
+#include "ByteStream.h"
+#include "nsTArray.h"
+#include "MediaResource.h"
+
+namespace mozilla {
+class MediaByteBuffer;
+
+DDLoggedTypeDeclNameAndBase(BufferStream, ByteStream);
+
+class BufferStream : public ByteStream,
+                     public mozilla::DecoderDoctorLifeLogger<BufferStream> {
+ public:
+  /* BufferStream does not take ownership of aData nor does it make a copy.
+   * Therefore BufferStream shouldn't get used after aData is destroyed.
+   */
+  BufferStream();
+  explicit BufferStream(mozilla::MediaByteBuffer* aBuffer);
+
+  virtual bool ReadAt(int64_t aOffset, void* aData, size_t aLength,
+                      size_t* aBytesRead) override;
+  virtual bool CachedReadAt(int64_t aOffset, void* aData, size_t aLength,
+                            size_t* aBytesRead) override;
+  virtual bool Length(int64_t* aLength) override;
+
+  virtual void DiscardBefore(int64_t aOffset) override;
+
+  bool AppendBytes(const uint8_t* aData, size_t aLength);
+
+  mozilla::MediaByteRange GetByteRange();
+
+ private:
+  ~BufferStream();
+  int64_t mStartOffset;
+  RefPtr<mozilla::MediaByteBuffer> mData;
+};
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/mp4/ByteStream.h b/dom/media/mp4/ByteStream.h
new file mode 100644
index 0000000000..0f733dfb97
--- /dev/null
+++ b/dom/media/mp4/ByteStream.h
@@ -0,0 +1,41 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef STREAM_H_
+#define STREAM_H_
+
+#include "DecoderDoctorLogger.h"
+#include "nsISupportsImpl.h"
+
+namespace mozilla {
+
+DDLoggedTypeDeclName(ByteStream);
+
+class ByteStream : public DecoderDoctorLifeLogger<ByteStream> {
+ public:
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(ByteStream);
+
+  virtual bool ReadAt(int64_t offset, void* data, size_t size,
+                      size_t* bytes_read) = 0;
+  virtual bool CachedReadAt(int64_t offset, void* data, size_t size,
+                            size_t* bytes_read) = 0;
+  virtual bool Length(int64_t* size) = 0;
+
+  virtual void DiscardBefore(int64_t offset) {}
+
+  // If this ByteStream's underlying storage of media is in-memory, this
+  // function returns a pointer to the in-memory storage of data at offset.
+  // Note that even if a ByteStream stores data in memory, it may not be
+  // stored contiguously, in which case this returns nullptr.
+  virtual const uint8_t* GetContiguousAccess(int64_t aOffset, size_t aSize) {
+    return nullptr;
+  }
+
+ protected:
+  virtual ~ByteStream() = default;
+};
+
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/mp4/DecoderData.cpp b/dom/media/mp4/DecoderData.cpp
new file mode 100644
index 0000000000..b7c9c86954
--- /dev/null
+++ b/dom/media/mp4/DecoderData.cpp
@@ -0,0 +1,357 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "Adts.h"
+#include "AnnexB.h"
+#include "BufferReader.h"
+#include "DecoderData.h"
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/EndianUtils.h"
+#include "mozilla/Telemetry.h"
+#include "VideoUtils.h"
+#include "MP4Metadata.h"
+#include "mozilla/Logging.h"
+
+// OpusDecoder header is really needed only by MP4 in rust
+#include "OpusDecoder.h"
+#include "mp4parse.h"
+
+#define LOG(...) \
+  MOZ_LOG(gMP4MetadataLog, mozilla::LogLevel::Debug, (__VA_ARGS__))
+
+using mozilla::media::TimeUnit;
+
+namespace mozilla {
+
+mozilla::Result<mozilla::Ok, nsresult> CryptoFile::DoUpdate(
+    const uint8_t* aData, size_t aLength) {
+  BufferReader reader(aData, aLength);
+  while (reader.Remaining()) {
+    PsshInfo psshInfo;
+    if (!reader.ReadArray(psshInfo.uuid, 16)) {
+      return mozilla::Err(NS_ERROR_FAILURE);
+    }
+
+    if (!reader.CanReadType<uint32_t>()) {
+      return mozilla::Err(NS_ERROR_FAILURE);
+    }
+    auto length = reader.ReadType<uint32_t>();
+
+    if (!reader.ReadArray(psshInfo.data, length)) {
+      return mozilla::Err(NS_ERROR_FAILURE);
+    }
+    pssh.AppendElement(std::move(psshInfo));
+  }
+  return mozilla::Ok();
+}
+
+static MediaResult UpdateTrackProtectedInfo(mozilla::TrackInfo& aConfig,
+                                            const Mp4parseSinfInfo& aSinf) {
+  if (aSinf.is_encrypted != 0) {
+    if (aSinf.scheme_type == MP4_PARSE_ENCRYPTION_SCHEME_TYPE_CENC) {
+      aConfig.mCrypto.mCryptoScheme = CryptoScheme::Cenc;
+    } else if (aSinf.scheme_type == MP4_PARSE_ENCRYPTION_SCHEME_TYPE_CBCS) {
+      aConfig.mCrypto.mCryptoScheme = CryptoScheme::Cbcs;
+    } else {
+      // Unsupported encryption type;
+      return MediaResult(
+          NS_ERROR_DOM_MEDIA_METADATA_ERR,
+          RESULT_DETAIL(
+              "Unsupported encryption scheme encountered aSinf.scheme_type=%d",
+              static_cast<int>(aSinf.scheme_type)));
+    }
+    aConfig.mCrypto.mIVSize = aSinf.iv_size;
+    aConfig.mCrypto.mKeyId.AppendElements(aSinf.kid.data, aSinf.kid.length);
+    aConfig.mCrypto.mCryptByteBlock = aSinf.crypt_byte_block;
+    aConfig.mCrypto.mSkipByteBlock = aSinf.skip_byte_block;
+    aConfig.mCrypto.mConstantIV.AppendElements(aSinf.constant_iv.data,
+                                               aSinf.constant_iv.length);
+  }
+  return NS_OK;
+}
+
+// Verify various information shared by Mp4ParseTrackAudioInfo and
+// Mp4ParseTrackVideoInfo and record telemetry on that info. Returns an
+// appropriate MediaResult indicating if the info is valid or not.
+// This verifies:
+// - That we have a sample_info_count > 0 (valid tracks should have at least one
+//   sample description entry)
+// - That only a single codec is used across all sample infos, as we don't
+//   handle multiple.
+// - If more than one sample information structures contain crypto info. This
+//   case is not fatal (we don't return an error), but does record telemetry
+//   to help judge if we need more handling in gecko for multiple crypto.
+//
+// Telemetry is also recorded on the above. As of writing, the
+// telemetry is recorded to give us early warning if MP4s exist that we're not
+// handling. Note, if adding new checks and telemetry to this function,
+// telemetry should be recorded before returning to ensure it is gathered.
+template <typename Mp4ParseTrackAudioOrVideoInfo>
+static MediaResult VerifyAudioOrVideoInfoAndRecordTelemetry(
+    Mp4ParseTrackAudioOrVideoInfo* audioOrVideoInfo) {
+  Telemetry::Accumulate(
+      Telemetry::MEDIA_MP4_PARSE_NUM_SAMPLE_DESCRIPTION_ENTRIES,
+      audioOrVideoInfo->sample_info_count);
+
+  bool hasMultipleCodecs = false;
+  uint32_t cryptoCount = 0;
+  Mp4parseCodec codecType = audioOrVideoInfo->sample_info[0].codec_type;
+  for (uint32_t i = 0; i < audioOrVideoInfo->sample_info_count; i++) {
+    if (audioOrVideoInfo->sample_info[0].codec_type != codecType) {
+      hasMultipleCodecs = true;
+    }
+
+    // Update our encryption info if any is present on the sample info.
+    if (audioOrVideoInfo->sample_info[i].protected_data.is_encrypted) {
+      cryptoCount += 1;
+    }
+  }
+
+  Telemetry::Accumulate(
+      Telemetry::
+          MEDIA_MP4_PARSE_SAMPLE_DESCRIPTION_ENTRIES_HAVE_MULTIPLE_CODECS,
+      hasMultipleCodecs);
+
+  // Accumulate if we have multiple (2 or more) crypto entries.
+  // TODO(1715283): rework this to count number of crypto entries + gather
+  // richer data.
+  Telemetry::Accumulate(
+      Telemetry::
+          MEDIA_MP4_PARSE_SAMPLE_DESCRIPTION_ENTRIES_HAVE_MULTIPLE_CRYPTO,
+      cryptoCount >= 2);
+
+  if (audioOrVideoInfo->sample_info_count == 0) {
+    return MediaResult(
+        NS_ERROR_DOM_MEDIA_METADATA_ERR,
+        RESULT_DETAIL("Got 0 sample info while verifying track."));
+  }
+
+  if (hasMultipleCodecs) {
+    // Different codecs in a single track. We don't handle this.
+    return MediaResult(
+        NS_ERROR_DOM_MEDIA_METADATA_ERR,
+        RESULT_DETAIL("Multiple codecs encountered while verifying track."));
+  }
+
+  return NS_OK;
+}
+
+MediaResult MP4AudioInfo::Update(const Mp4parseTrackInfo* aTrack,
+                                 const Mp4parseTrackAudioInfo* aAudio,
+                                 const IndiceWrapper* aIndices) {
+  auto rv = VerifyAudioOrVideoInfoAndRecordTelemetry(aAudio);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  Mp4parseCodec codecType = aAudio->sample_info[0].codec_type;
+  for (uint32_t i = 0; i < aAudio->sample_info_count; i++) {
+    if (aAudio->sample_info[i].protected_data.is_encrypted) {
+      auto rv = UpdateTrackProtectedInfo(*this,
+                                         aAudio->sample_info[i].protected_data);
+      NS_ENSURE_SUCCESS(rv, rv);
+      break;
+    }
+  }
+
+  // We assume that the members of the first sample info are representative of
+  // the entire track. This code will need to be updated should this assumption
+  // ever not hold. E.g. if we need to handle different codecs in a single
+  // track, or if we have different numbers or channels in a single track.
+  Mp4parseByteData mp4ParseSampleCodecSpecific =
+      aAudio->sample_info[0].codec_specific_config;
+  Mp4parseByteData extraData = aAudio->sample_info[0].extra_data;
+  MOZ_ASSERT(mCodecSpecificConfig.is<NoCodecSpecificData>(),
+             "Should have no codec specific data yet");
+  if (codecType == MP4PARSE_CODEC_OPUS) {
+    mMimeType = "audio/opus"_ns;
+    OpusCodecSpecificData opusCodecSpecificData{};
+    // The Opus decoder expects the container's codec delay or
+    // pre-skip value, in microseconds, as a 64-bit int at the
+    // start of the codec-specific config blob.
+    if (mp4ParseSampleCodecSpecific.data &&
+        mp4ParseSampleCodecSpecific.length >= 12) {
+      uint16_t preskip = mozilla::LittleEndian::readUint16(
+          mp4ParseSampleCodecSpecific.data + 10);
+      opusCodecSpecificData.mContainerCodecDelayMicroSeconds =
+          mozilla::FramesToUsecs(preskip, 48000).value();
+      LOG("Opus stream in MP4 container, %" PRId64
+          " microseconds of encoder delay (%" PRIu16 ").",
+          opusCodecSpecificData.mContainerCodecDelayMicroSeconds, preskip);
+    } else {
+      // This file will error later as it will be rejected by the opus decoder.
+      opusCodecSpecificData.mContainerCodecDelayMicroSeconds = 0;
+    }
+    opusCodecSpecificData.mHeadersBinaryBlob->AppendElements(
+        mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length);
+    mCodecSpecificConfig =
+        AudioCodecSpecificVariant{std::move(opusCodecSpecificData)};
+  } else if (codecType == MP4PARSE_CODEC_AAC) {
+    mMimeType = "audio/mp4a-latm"_ns;
+    int64_t codecDelayUS = aTrack->media_time;
+    double USECS_PER_S = 1e6;
+    // We can't use mozilla::UsecsToFrames here because we need to round, and it
+    // floors.
+    uint32_t encoderDelayFrameCount = 0;
+    if (codecDelayUS > 0) {
+      encoderDelayFrameCount = static_cast<uint32_t>(
+          std::lround(static_cast<double>(codecDelayUS) *
+                      aAudio->sample_info->sample_rate / USECS_PER_S));
+      LOG("AAC stream in MP4 container, %" PRIu32 " frames of encoder delay.",
+          encoderDelayFrameCount);
+    }
+
+    uint64_t mediaFrameCount = 0;
+    // Pass the padding number, in frames, to the AAC decoder as well.
+    if (aIndices) {
+      MP4SampleIndex::Indice firstIndice = {0};
+      MP4SampleIndex::Indice lastIndice = {0};
+      bool rv = aIndices->GetIndice(0, firstIndice);
+      rv |= aIndices->GetIndice(aIndices->Length() - 1, lastIndice);
+      if (rv) {
+        if (firstIndice.start_composition > lastIndice.end_composition) {
+          return MediaResult(
+              NS_ERROR_DOM_MEDIA_METADATA_ERR,
+              RESULT_DETAIL("Inconsistent start and end time in index"));
+        }
+        // The `end_composition` member of the very last index member is the
+        // duration of the media in microseconds, excluding decoder delay and
+        // padding. Convert to frames and give to the decoder so that trimming
+        // can be done properly.
+        mediaFrameCount =
+            lastIndice.end_composition - firstIndice.start_composition;
+        LOG("AAC stream in MP4 container, total media duration is %" PRIu64
+            " frames",
+            mediaFrameCount);
+      } else {
+        LOG("AAC stream in MP4 container, couldn't determine total media time");
+      }
+    }
+
+    AacCodecSpecificData aacCodecSpecificData{};
+
+    aacCodecSpecificData.mEncoderDelayFrames = encoderDelayFrameCount;
+    aacCodecSpecificData.mMediaFrameCount = mediaFrameCount;
+
+    // codec specific data is used to store the DecoderConfigDescriptor.
+    aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob->AppendElements(
+        mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length);
+    // extra data stores the ES_Descriptor.
+    aacCodecSpecificData.mEsDescriptorBinaryBlob->AppendElements(
+        extraData.data, extraData.length);
+    mCodecSpecificConfig =
+        AudioCodecSpecificVariant{std::move(aacCodecSpecificData)};
+  } else if (codecType == MP4PARSE_CODEC_FLAC) {
+    MOZ_ASSERT(extraData.length == 0,
+               "FLAC doesn't expect extra data so doesn't handle it!");
+    mMimeType = "audio/flac"_ns;
+    FlacCodecSpecificData flacCodecSpecificData{};
+    flacCodecSpecificData.mStreamInfoBinaryBlob->AppendElements(
+        mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length);
+    mCodecSpecificConfig =
+        AudioCodecSpecificVariant{std::move(flacCodecSpecificData)};
+  } else if (codecType == MP4PARSE_CODEC_MP3) {
+    // mp3 in mp4 can contain ES_Descriptor info (it also has a flash in mp4
+    // specific box, which the rust parser recognizes). However, we don't
+    // handle any such data here.
+    mMimeType = "audio/mpeg"_ns;
+    // TODO(bug 1705812): parse the encoder delay values from the mp4.
+    mCodecSpecificConfig = AudioCodecSpecificVariant{Mp3CodecSpecificData{}};
+  }
+
+  mRate = aAudio->sample_info[0].sample_rate;
+  mChannels = aAudio->sample_info[0].channels;
+  mBitDepth = aAudio->sample_info[0].bit_depth;
+  mExtendedProfile =
+      AssertedCast<int8_t>(aAudio->sample_info[0].extended_profile);
+  if (aTrack->duration > TimeUnit::MaxTicks()) {
+    mDuration = TimeUnit::FromInfinity();
+  } else {
+    mDuration =
+        TimeUnit(AssertedCast<int64_t>(aTrack->duration), aTrack->time_scale);
+  }
+  mMediaTime = TimeUnit(aTrack->media_time, aTrack->time_scale);
+  mTrackId = aTrack->track_id;
+
+  // In stagefright, mProfile is kKeyAACProfile, mExtendedProfile is kKeyAACAOT.
+  if (aAudio->sample_info[0].profile <= 4) {
+    mProfile = AssertedCast<int8_t>(aAudio->sample_info[0].profile);
+  }
+
+  if (mCodecSpecificConfig.is<NoCodecSpecificData>()) {
+    // Handle codecs that are not explicitly handled above.
+    MOZ_ASSERT(
+        extraData.length == 0,
+        "Codecs that use extra data should be explicitly handled already");
+    AudioCodecSpecificBinaryBlob codecSpecificBinaryBlob;
+    // No codec specific metadata set, use the generic form.
+    codecSpecificBinaryBlob.mBinaryBlob->AppendElements(
+        mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length);
+    mCodecSpecificConfig =
+        AudioCodecSpecificVariant{std::move(codecSpecificBinaryBlob)};
+  }
+
+  return NS_OK;
+}
+
+bool MP4AudioInfo::IsValid() const {
+  return mChannels > 0 && mRate > 0 &&
+         // Accept any mime type here, but if it's aac, validate the profile.
+         (!mMimeType.EqualsLiteral("audio/mp4a-latm") || mProfile > 0 ||
+          mExtendedProfile > 0);
+}
+
+MediaResult MP4VideoInfo::Update(const Mp4parseTrackInfo* track,
+                                 const Mp4parseTrackVideoInfo* video) {
+  auto rv = VerifyAudioOrVideoInfoAndRecordTelemetry(video);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  Mp4parseCodec codecType = video->sample_info[0].codec_type;
+  for (uint32_t i = 0; i < video->sample_info_count; i++) {
+    if (video->sample_info[i].protected_data.is_encrypted) {
+      auto rv =
+          UpdateTrackProtectedInfo(*this, video->sample_info[i].protected_data);
+      NS_ENSURE_SUCCESS(rv, rv);
+      break;
+    }
+  }
+
+  // We assume that the members of the first sample info are representative of
+  // the entire track. This code will need to be updated should this assumption
+  // ever not hold. E.g. if we need to handle different codecs in a single
+  // track, or if we have different numbers or channels in a single track.
+  if (codecType == MP4PARSE_CODEC_AVC) {
+    mMimeType = "video/avc"_ns;
+  } else if (codecType == MP4PARSE_CODEC_VP9) {
+    mMimeType = "video/vp9"_ns;
+  } else if (codecType == MP4PARSE_CODEC_AV1) {
+    mMimeType = "video/av1"_ns;
+  } else if (codecType == MP4PARSE_CODEC_MP4V) {
+    mMimeType = "video/mp4v-es"_ns;
+  }
+  mTrackId = track->track_id;
+  if (track->duration > TimeUnit::MaxTicks()) {
+    mDuration = TimeUnit::FromInfinity();
+  } else {
+    mDuration =
+        TimeUnit(AssertedCast<int64_t>(track->duration), track->time_scale);
+  }
+  mMediaTime = TimeUnit(track->media_time, track->time_scale);
+  mDisplay.width = AssertedCast<int32_t>(video->display_width);
+  mDisplay.height = AssertedCast<int32_t>(video->display_height);
+  mImage.width = video->sample_info[0].image_width;
+  mImage.height = video->sample_info[0].image_height;
+  mRotation = ToSupportedRotation(video->rotation);
+  Mp4parseByteData extraData = video->sample_info[0].extra_data;
+  // If length is 0 we append nothing
+  mExtraData->AppendElements(extraData.data, extraData.length);
+  return NS_OK;
+}
+
+bool MP4VideoInfo::IsValid() const {
+  return (mDisplay.width > 0 && mDisplay.height > 0) ||
+         (mImage.width > 0 && mImage.height > 0);
+}
+
+}  // namespace mozilla
+
+#undef LOG
diff --git a/dom/media/mp4/DecoderData.h b/dom/media/mp4/DecoderData.h
new file mode 100644
index 0000000000..a8d38d0abc
--- /dev/null
+++ b/dom/media/mp4/DecoderData.h
@@ -0,0 +1,76 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef DECODER_DATA_H_
+#define DECODER_DATA_H_
+
+#include "MediaInfo.h"
+#include "MediaResult.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/Result.h"
+#include "mozilla/Types.h"
+#include "mozilla/Vector.h"
+#include "nsString.h"
+#include "nsTArray.h"
+#include "mp4parse.h"
+
+namespace mozilla {
+
+class IndiceWrapper;
+class MP4Demuxer;
+
+struct PsshInfo {
+  PsshInfo() = default;
+  PsshInfo(const PsshInfo& aOther) = delete;
+  PsshInfo(PsshInfo&& aOther) = default;
+
+  nsTArray<uint8_t> uuid;
+  nsTArray<uint8_t> data;
+
+  bool operator==(const PsshInfo& aOther) const {
+    return uuid == aOther.uuid && data == aOther.data;
+  }
+};
+
+class CryptoFile {
+ public:
+  CryptoFile() : valid(false) {}
+  CryptoFile(const CryptoFile& aCryptoFile) = delete;
+
+  void Update(const uint8_t* aData, size_t aLength) {
+    valid = DoUpdate(aData, aLength).isOk();
+  }
+
+  bool valid;
+  nsTArray<PsshInfo> pssh;
+
+ private:
+  mozilla::Result<mozilla::Ok, nsresult> DoUpdate(const uint8_t* aData,
+                                                  size_t aLength);
+};
+
+class MP4AudioInfo : public mozilla::AudioInfo {
+ public:
+  MP4AudioInfo() = default;
+
+  MediaResult Update(const Mp4parseTrackInfo* aTrack,
+                     const Mp4parseTrackAudioInfo* aAudio,
+                     const IndiceWrapper* aIndices);
+
+  virtual bool IsValid() const override;
+};
+
+class MP4VideoInfo : public mozilla::VideoInfo {
+ public:
+  MP4VideoInfo() = default;
+
+  MediaResult Update(const Mp4parseTrackInfo* track,
+                     const Mp4parseTrackVideoInfo* video);
+
+  virtual bool IsValid() const override;
+};
+
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/mp4/MP4Decoder.cpp b/dom/media/mp4/MP4Decoder.cpp
new file mode 100644
index 0000000000..7e2fdf63d9
--- /dev/null
+++ b/dom/media/mp4/MP4Decoder.cpp
@@ -0,0 +1,222 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "MP4Decoder.h"
+#include "H264.h"
+#include "VPXDecoder.h"
+#ifdef MOZ_AV1
+#  include "AOMDecoder.h"
+#endif
+#include "MP4Demuxer.h"
+#include "MediaContainerType.h"
+#include "PDMFactory.h"
+#include "PlatformDecoderModule.h"
+#include "VideoUtils.h"
+#include "mozilla/StaticPrefs_media.h"
+#include "mozilla/gfx/Tools.h"
+#include "nsMimeTypes.h"
+#include "nsReadableUtils.h"
+
+namespace mozilla {
+
+static bool IsWhitelistedH264Codec(const nsAString& aCodec) {
+  uint8_t profile = 0, constraint = 0, level = 0;
+
+  if (!ExtractH264CodecDetails(aCodec, profile, constraint, level)) {
+    return false;
+  }
+
+  // Just assume what we can play on all platforms the codecs/formats that
+  // WMF can play, since we don't have documentation about what other
+  // platforms can play... According to the WMF documentation:
+  // http://msdn.microsoft.com/en-us/library/windows/desktop/dd797815%28v=vs.85%29.aspx
+  // "The Media Foundation H.264 video decoder is a Media Foundation Transform
+  // that supports decoding of Baseline, Main, and High profiles, up to level
+  // 5.1.". We extend the limit to level 5.2, relying on the decoder to handle
+  // any potential errors, the level limit being rather arbitrary.
+  // We also report that we can play Extended profile, as there are
+  // bitstreams that are Extended compliant that are also Baseline compliant.
+  return level >= H264_LEVEL_1 && level <= H264_LEVEL_5_2 &&
+         (profile == H264_PROFILE_BASE || profile == H264_PROFILE_MAIN ||
+          profile == H264_PROFILE_EXTENDED || profile == H264_PROFILE_HIGH);
+}
+
+static bool IsTypeValid(const MediaContainerType& aType) {
+  // Whitelist MP4 types, so they explicitly match what we encounter on
+  // the web, as opposed to what we use internally (i.e. what our demuxers
+  // etc output).
+  return aType.Type() == MEDIAMIMETYPE("audio/mp4") ||
+         aType.Type() == MEDIAMIMETYPE("audio/x-m4a") ||
+         aType.Type() == MEDIAMIMETYPE("video/mp4") ||
+         aType.Type() == MEDIAMIMETYPE("video/quicktime") ||
+         aType.Type() == MEDIAMIMETYPE("video/x-m4v");
+}
+
+/* statis */
+nsTArray<UniquePtr<TrackInfo>> MP4Decoder::GetTracksInfo(
+    const MediaContainerType& aType, MediaResult& aError) {
+  nsTArray<UniquePtr<TrackInfo>> tracks;
+
+  if (!IsTypeValid(aType)) {
+    aError = MediaResult(
+        NS_ERROR_DOM_MEDIA_FATAL_ERR,
+        RESULT_DETAIL("Invalid type:%s", aType.Type().AsString().get()));
+    return tracks;
+  }
+
+  aError = NS_OK;
+
+  const MediaCodecs& codecs = aType.ExtendedType().Codecs();
+  if (codecs.IsEmpty()) {
+    return tracks;
+  }
+
+  const bool isVideo = aType.Type() == MEDIAMIMETYPE("video/mp4") ||
+                       aType.Type() == MEDIAMIMETYPE("video/quicktime") ||
+                       aType.Type() == MEDIAMIMETYPE("video/x-m4v");
+
+  for (const auto& codec : codecs.Range()) {
+    if (IsAACCodecString(codec)) {
+      tracks.AppendElement(
+          CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters(
+              "audio/mp4a-latm"_ns, aType));
+      continue;
+    }
+    if (codec.EqualsLiteral("mp3")) {
+      tracks.AppendElement(
+          CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters(
+              "audio/mpeg"_ns, aType));
+      continue;
+    }
+    // The valid codecs parameter value with mp4 MIME types should be "Opus" and
+    // "fLaC", but "opus" and "flac" are acceptable due to historical reasons.
+    if (codec.EqualsLiteral("opus") || codec.EqualsLiteral("Opus") ||
+        codec.EqualsLiteral("flac") || codec.EqualsLiteral("fLaC")) {
+      NS_ConvertUTF16toUTF8 c(codec);
+      ToLowerCase(c);
+      tracks.AppendElement(
+          CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters(
+              "audio/"_ns + c, aType));
+      continue;
+    }
+    if (IsVP9CodecString(codec)) {
+      auto trackInfo =
+          CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters(
+              "video/vp9"_ns, aType);
+      VPXDecoder::SetVideoInfo(trackInfo->GetAsVideoInfo(), codec);
+      tracks.AppendElement(std::move(trackInfo));
+      continue;
+    }
+#ifdef MOZ_AV1
+    if (StaticPrefs::media_av1_enabled() && IsAV1CodecString(codec)) {
+      auto trackInfo =
+          CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters(
+              "video/av1"_ns, aType);
+      AOMDecoder::SetVideoInfo(trackInfo->GetAsVideoInfo(), codec);
+      tracks.AppendElement(std::move(trackInfo));
+      continue;
+    }
+#endif
+    if (isVideo && IsWhitelistedH264Codec(codec)) {
+      auto trackInfo =
+          CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters(
+              "video/avc"_ns, aType);
+      uint8_t profile = 0, constraint = 0, level = 0;
+      MOZ_ALWAYS_TRUE(
+          ExtractH264CodecDetails(codec, profile, constraint, level));
+      uint32_t width = aType.ExtendedType().GetWidth().refOr(1280);
+      uint32_t height = aType.ExtendedType().GetHeight().refOr(720);
+      trackInfo->GetAsVideoInfo()->mExtraData =
+          H264::CreateExtraData(profile, constraint, level, {width, height});
+      tracks.AppendElement(std::move(trackInfo));
+      continue;
+    }
+    // Unknown codec
+    aError = MediaResult(
+        NS_ERROR_DOM_MEDIA_FATAL_ERR,
+        RESULT_DETAIL("Unknown codec:%s", NS_ConvertUTF16toUTF8(codec).get()));
+  }
+  return tracks;
+}
+
+/* static */
+bool MP4Decoder::IsSupportedType(const MediaContainerType& aType,
+                                 DecoderDoctorDiagnostics* aDiagnostics) {
+  if (!IsEnabled()) {
+    return false;
+  }
+
+  MediaResult rv = NS_OK;
+  auto tracks = GetTracksInfo(aType, rv);
+  if (NS_FAILED(rv)) {
+    return false;
+  }
+
+  if (!tracks.IsEmpty()) {
+    // Look for exact match as we know used codecs.
+    RefPtr<PDMFactory> platform = new PDMFactory();
+    for (const auto& track : tracks) {
+      if (!track ||
+          platform->Supports(SupportDecoderParams(*track), aDiagnostics) ==
+              media::DecodeSupport::Unsupported) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // We have only container info so try to guess the content type.
+  // Assume H.264/AV1 or AAC
+  if (aType.Type() == MEDIAMIMETYPE("audio/mp4") ||
+      aType.Type() == MEDIAMIMETYPE("audio/x-m4a")) {
+    tracks.AppendElement(
+        CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters(
+            "audio/mp4a-latm"_ns, aType));
+  } else {
+    tracks.AppendElement(
+        CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters(
+            "video/avc"_ns, aType));
+    if (StaticPrefs::media_av1_enabled()) {
+      tracks.AppendElement(
+          CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters(
+              "video/av1"_ns, aType));
+    }
+  }
+
+  // Check that something is supported at least.
+  RefPtr<PDMFactory> platform = new PDMFactory();
+  for (const auto& track : tracks) {
+    if (track &&
+        platform->Supports(SupportDecoderParams(*track), aDiagnostics) !=
+            media::DecodeSupport::Unsupported) {
+      return true;
+    }
+  }
+  return false;
+}
+
+/* static */
+bool MP4Decoder::IsH264(const nsACString& aMimeType) {
+  return aMimeType.EqualsLiteral("video/mp4") ||
+         aMimeType.EqualsLiteral("video/avc");
+}
+
+/* static */
+bool MP4Decoder::IsAAC(const nsACString& aMimeType) {
+  return aMimeType.EqualsLiteral("audio/mp4a-latm");
+}
+
+/* static */
+bool MP4Decoder::IsEnabled() { return StaticPrefs::media_mp4_enabled(); }
+
+/* static */
+nsTArray<UniquePtr<TrackInfo>> MP4Decoder::GetTracksInfo(
+    const MediaContainerType& aType) {
+  MediaResult rv = NS_OK;
+  return GetTracksInfo(aType, rv);
+}
+
+}  // namespace mozilla
diff --git a/dom/media/mp4/MP4Decoder.h b/dom/media/mp4/MP4Decoder.h
new file mode 100644
index 0000000000..07b085929b
--- /dev/null
+++ b/dom/media/mp4/MP4Decoder.h
@@ -0,0 +1,52 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#if !defined(MP4Decoder_h_)
+#  define MP4Decoder_h_
+
+#  include "mozilla/UniquePtr.h"
+#  include "nsStringFwd.h"
+#  include "nsTArray.h"
+
+namespace mozilla {
+
+class MediaContainerType;
+class MediaResult;
+class DecoderDoctorDiagnostics;
+class TrackInfo;
+
+// Decoder that uses a bundled MP4 demuxer and platform decoders to play MP4.
+class MP4Decoder {
+ public:
+  // Returns true if aContainerType is an MP4 type that we think we can render
+  // with the a platform decoder backend.
+  // If provided, codecs are checked for support.
+  static bool IsSupportedType(const MediaContainerType& aContainerType,
+                              DecoderDoctorDiagnostics* aDiagnostics);
+
+  // Return true if aMimeType is a one of the strings used by our demuxers to
+  // identify H264. Does not parse general content type strings, i.e. white
+  // space matters.
+  static bool IsH264(const nsACString& aMimeType);
+
+  // Return true if aMimeType is a one of the strings used by our demuxers to
+  // identify AAC. Does not parse general content type strings, i.e. white
+  // space matters.
+  static bool IsAAC(const nsACString& aMimeType);
+
+  // Returns true if the MP4 backend is preffed on.
+  static bool IsEnabled();
+
+  static nsTArray<UniquePtr<TrackInfo>> GetTracksInfo(
+      const MediaContainerType& aType);
+
+ private:
+  static nsTArray<UniquePtr<TrackInfo>> GetTracksInfo(
+      const MediaContainerType& aType, MediaResult& aError);
+};
+
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/mp4/MP4Demuxer.cpp b/dom/media/mp4/MP4Demuxer.cpp
new file mode 100644
index 0000000000..f8b9e12810
--- /dev/null
+++ b/dom/media/mp4/MP4Demuxer.cpp
@@ -0,0 +1,620 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <algorithm>
+#include <limits>
+#include <stdint.h>
+
+#include "MP4Demuxer.h"
+
+#include "AnnexB.h"
+#include "BufferStream.h"
+#include "H264.h"
+#include "MP4Decoder.h"
+#include "MP4Metadata.h"
+#include "MoofParser.h"
+#include "ResourceStream.h"
+#include "TimeUnits.h"
+#include "VPXDecoder.h"
+#include "mozilla/Span.h"
+#include "mozilla/StaticPrefs_media.h"
+#include "mozilla/Telemetry.h"
+#include "nsPrintfCString.h"
+#include "SampleIterator.h"
+
+extern mozilla::LazyLogModule gMediaDemuxerLog;
+mozilla::LogModule* GetDemuxerLog() { return gMediaDemuxerLog; }
+
+#define LOG(arg, ...)                                                 \
+  DDMOZ_LOG(gMediaDemuxerLog, mozilla::LogLevel::Debug, "::%s: " arg, \
+            __func__, ##__VA_ARGS__)
+
+namespace mozilla {
+
+using TimeUnit = media::TimeUnit;
+using TimeInterval = media::TimeInterval;
+using TimeIntervals = media::TimeIntervals;
+
+DDLoggedTypeDeclNameAndBase(MP4TrackDemuxer, MediaTrackDemuxer);
+
+class MP4TrackDemuxer : public MediaTrackDemuxer,
+                        public DecoderDoctorLifeLogger<MP4TrackDemuxer> {
+ public:
+  MP4TrackDemuxer(MediaResource* aResource, UniquePtr<TrackInfo>&& aInfo,
+                  const IndiceWrapper& aIndices, uint32_t aTimeScale);
+
+  UniquePtr<TrackInfo> GetInfo() const override;
+
+  RefPtr<SeekPromise> Seek(const TimeUnit& aTime) override;
+
+  RefPtr<SamplesPromise> GetSamples(int32_t aNumSamples = 1) override;
+
+  void Reset() override;
+
+  nsresult GetNextRandomAccessPoint(TimeUnit* aTime) override;
+
+  RefPtr<SkipAccessPointPromise> SkipToNextRandomAccessPoint(
+      const TimeUnit& aTimeThreshold) override;
+
+  TimeIntervals GetBuffered() override;
+
+  void NotifyDataRemoved();
+  void NotifyDataArrived();
+
+ private:
+  already_AddRefed<MediaRawData> GetNextSample();
+  void EnsureUpToDateIndex();
+  void SetNextKeyFrameTime();
+  RefPtr<MediaResource> mResource;
+  RefPtr<ResourceStream> mStream;
+  UniquePtr<TrackInfo> mInfo;
+  RefPtr<MP4SampleIndex> mIndex;
+  UniquePtr<SampleIterator> mIterator;
+  Maybe<TimeUnit> mNextKeyframeTime;
+  // Queued samples extracted by the demuxer, but not yet returned.
+  RefPtr<MediaRawData> mQueuedSample;
+  bool mNeedReIndex;
+  enum CodecType { kH264, kVP9, kAAC, kOther } mType = kOther;
+};
+
+MP4Demuxer::MP4Demuxer(MediaResource* aResource)
+    : mResource(aResource),
+      mStream(new ResourceStream(aResource)),
+      mIsSeekable(false) {
+  DDLINKCHILD("resource", aResource);
+  DDLINKCHILD("stream", mStream.get());
+}
+
+RefPtr<MP4Demuxer::InitPromise> MP4Demuxer::Init() {
+  AutoPinned<ResourceStream> stream(mStream);
+
+  // 'result' will capture the first warning, if any.
+  MediaResult result{NS_OK};
+
+  MP4Metadata::ResultAndByteBuffer initData = MP4Metadata::Metadata(stream);
+  if (!initData.Ref()) {
+    return InitPromise::CreateAndReject(
+        NS_FAILED(initData.Result())
+            ? std::move(initData.Result())
+            : MediaResult(NS_ERROR_DOM_MEDIA_DEMUXER_ERR,
+                          RESULT_DETAIL("Invalid MP4 metadata or OOM")),
+        __func__);
+  } else if (NS_FAILED(initData.Result()) && result == NS_OK) {
+    result = std::move(initData.Result());
+  }
+
+  RefPtr<BufferStream> bufferstream = new BufferStream(initData.Ref());
+
+  MP4Metadata metadata{bufferstream};
+  DDLINKCHILD("metadata", &metadata);
+  nsresult rv = metadata.Parse();
+  if (NS_FAILED(rv)) {
+    return InitPromise::CreateAndReject(
+        MediaResult(rv, RESULT_DETAIL("Parse MP4 metadata failed")), __func__);
+  }
+
+  auto audioTrackCount = metadata.GetNumberTracks(TrackInfo::kAudioTrack);
+  if (audioTrackCount.Ref() == MP4Metadata::NumberTracksError()) {
+    if (StaticPrefs::media_playback_warnings_as_errors()) {
+      return InitPromise::CreateAndReject(
+          MediaResult(
+              NS_ERROR_DOM_MEDIA_DEMUXER_ERR,
+              RESULT_DETAIL("Invalid audio track (%s)",
+                            audioTrackCount.Result().Description().get())),
+          __func__);
+    }
+    audioTrackCount.Ref() = 0;
+  }
+
+  auto videoTrackCount = metadata.GetNumberTracks(TrackInfo::kVideoTrack);
+  if (videoTrackCount.Ref() == MP4Metadata::NumberTracksError()) {
+    if (StaticPrefs::media_playback_warnings_as_errors()) {
+      return InitPromise::CreateAndReject(
+          MediaResult(
+              NS_ERROR_DOM_MEDIA_DEMUXER_ERR,
+              RESULT_DETAIL("Invalid video track (%s)",
+                            videoTrackCount.Result().Description().get())),
+          __func__);
+    }
+    videoTrackCount.Ref() = 0;
+  }
+
+  if (audioTrackCount.Ref() == 0 && videoTrackCount.Ref() == 0) {
+    return InitPromise::CreateAndReject(
+        MediaResult(
+            NS_ERROR_DOM_MEDIA_DEMUXER_ERR,
+            RESULT_DETAIL("No MP4 audio (%s) or video (%s) tracks",
+                          audioTrackCount.Result().Description().get(),
+                          videoTrackCount.Result().Description().get())),
+        __func__);
+  }
+
+  if (NS_FAILED(audioTrackCount.Result()) && result == NS_OK) {
+    result = std::move(audioTrackCount.Result());
+  }
+  if (NS_FAILED(videoTrackCount.Result()) && result == NS_OK) {
+    result = std::move(videoTrackCount.Result());
+  }
+
+  if (audioTrackCount.Ref() != 0) {
+    for (size_t i = 0; i < audioTrackCount.Ref(); i++) {
+      MP4Metadata::ResultAndTrackInfo info =
+          metadata.GetTrackInfo(TrackInfo::kAudioTrack, i);
+      if (!info.Ref()) {
+        if (StaticPrefs::media_playback_warnings_as_errors()) {
+          return InitPromise::CreateAndReject(
+              MediaResult(NS_ERROR_DOM_MEDIA_DEMUXER_ERR,
+                          RESULT_DETAIL("Invalid MP4 audio track (%s)",
+                                        info.Result().Description().get())),
+              __func__);
+        }
+        if (result == NS_OK) {
+          result =
+              MediaResult(NS_ERROR_DOM_MEDIA_DEMUXER_ERR,
+                          RESULT_DETAIL("Invalid MP4 audio track (%s)",
+                                        info.Result().Description().get()));
+        }
+        continue;
+      } else if (NS_FAILED(info.Result()) && result == NS_OK) {
+        result = std::move(info.Result());
+      }
+      MP4Metadata::ResultAndIndice indices =
+          metadata.GetTrackIndice(info.Ref()->mTrackId);
+      if (!indices.Ref()) {
+        if (NS_FAILED(info.Result()) && result == NS_OK) {
+          result = std::move(indices.Result());
+        }
+        continue;
+      }
+      RefPtr<MP4TrackDemuxer> demuxer =
+          new MP4TrackDemuxer(mResource, std::move(info.Ref()),
+                              *indices.Ref().get(), info.Ref()->mTimeScale);
+      DDLINKCHILD("audio demuxer", demuxer.get());
+      mAudioDemuxers.AppendElement(std::move(demuxer));
+    }
+  }
+
+  if (videoTrackCount.Ref() != 0) {
+    for (size_t i = 0; i < videoTrackCount.Ref(); i++) {
+      MP4Metadata::ResultAndTrackInfo info =
+          metadata.GetTrackInfo(TrackInfo::kVideoTrack, i);
+      if (!info.Ref()) {
+        if (StaticPrefs::media_playback_warnings_as_errors()) {
+          return InitPromise::CreateAndReject(
+              MediaResult(NS_ERROR_DOM_MEDIA_DEMUXER_ERR,
+                          RESULT_DETAIL("Invalid MP4 video track (%s)",
+                                        info.Result().Description().get())),
+              __func__);
+        }
+        if (result == NS_OK) {
+          result =
+              MediaResult(NS_ERROR_DOM_MEDIA_DEMUXER_ERR,
+                          RESULT_DETAIL("Invalid MP4 video track (%s)",
+                                        info.Result().Description().get()));
+        }
+        continue;
+      } else if (NS_FAILED(info.Result()) && result == NS_OK) {
+        result = std::move(info.Result());
+      }
+      MP4Metadata::ResultAndIndice indices =
+          metadata.GetTrackIndice(info.Ref()->mTrackId);
+      if (!indices.Ref()) {
+        if (NS_FAILED(info.Result()) && result == NS_OK) {
+          result = std::move(indices.Result());
+        }
+        continue;
+      }
+      RefPtr<MP4TrackDemuxer> demuxer =
+          new MP4TrackDemuxer(mResource, std::move(info.Ref()),
+                              *indices.Ref().get(), info.Ref()->mTimeScale);
+      DDLINKCHILD("video demuxer", demuxer.get());
+      mVideoDemuxers.AppendElement(std::move(demuxer));
+    }
+  }
+
+  MP4Metadata::ResultAndCryptoFile cryptoFile = metadata.Crypto();
+  if (NS_FAILED(cryptoFile.Result()) && result == NS_OK) {
+    result = std::move(cryptoFile.Result());
+  }
+  MOZ_ASSERT(cryptoFile.Ref());
+  if (cryptoFile.Ref()->valid) {
+    const nsTArray<PsshInfo>& psshs = cryptoFile.Ref()->pssh;
+    for (uint32_t i = 0; i < psshs.Length(); i++) {
+      mCryptoInitData.AppendElements(psshs[i].data);
+    }
+  }
+
+  mIsSeekable = metadata.CanSeek();
+
+  return InitPromise::CreateAndResolve(result, __func__);
+}
+
+uint32_t MP4Demuxer::GetNumberTracks(TrackInfo::TrackType aType) const {
+  switch (aType) {
+    case TrackInfo::kAudioTrack:
+      return uint32_t(mAudioDemuxers.Length());
+    case TrackInfo::kVideoTrack:
+      return uint32_t(mVideoDemuxers.Length());
+    default:
+      return 0;
+  }
+}
+
+already_AddRefed<MediaTrackDemuxer> MP4Demuxer::GetTrackDemuxer(
+    TrackInfo::TrackType aType, uint32_t aTrackNumber) {
+  switch (aType) {
+    case TrackInfo::kAudioTrack:
+      if (aTrackNumber >= uint32_t(mAudioDemuxers.Length())) {
+        return nullptr;
+      }
+      return RefPtr<MediaTrackDemuxer>(mAudioDemuxers[aTrackNumber]).forget();
+    case TrackInfo::kVideoTrack:
+      if (aTrackNumber >= uint32_t(mVideoDemuxers.Length())) {
+        return nullptr;
+      }
+      return RefPtr<MediaTrackDemuxer>(mVideoDemuxers[aTrackNumber]).forget();
+    default:
+      return nullptr;
+  }
+}
+
+bool MP4Demuxer::IsSeekable() const { return mIsSeekable; }
+
+void MP4Demuxer::NotifyDataArrived() {
+  for (auto& dmx : mAudioDemuxers) {
+    dmx->NotifyDataArrived();
+  }
+  for (auto& dmx : mVideoDemuxers) {
+    dmx->NotifyDataArrived();
+  }
+}
+
+void MP4Demuxer::NotifyDataRemoved() {
+  for (auto& dmx : mAudioDemuxers) {
+    dmx->NotifyDataRemoved();
+  }
+  for (auto& dmx : mVideoDemuxers) {
+    dmx->NotifyDataRemoved();
+  }
+}
+
+UniquePtr<EncryptionInfo> MP4Demuxer::GetCrypto() {
+  UniquePtr<EncryptionInfo> crypto;
+  if (!mCryptoInitData.IsEmpty()) {
+    crypto.reset(new EncryptionInfo{});
+    crypto->AddInitData(u"cenc"_ns, mCryptoInitData);
+  }
+  return crypto;
+}
+
+MP4TrackDemuxer::MP4TrackDemuxer(MediaResource* aResource,
+                                 UniquePtr<TrackInfo>&& aInfo,
+                                 const IndiceWrapper& aIndices,
+                                 uint32_t aTimeScale)
+    : mResource(aResource),
+      mStream(new ResourceStream(aResource)),
+      mInfo(std::move(aInfo)),
+      mIndex(new MP4SampleIndex(aIndices, mStream, mInfo->mTrackId,
+                                mInfo->IsAudio(), aTimeScale)),
+      mIterator(MakeUnique<SampleIterator>(mIndex)),
+      mNeedReIndex(true) {
+  EnsureUpToDateIndex();  // Force update of index
+
+  VideoInfo* videoInfo = mInfo->GetAsVideoInfo();
+  AudioInfo* audioInfo = mInfo->GetAsAudioInfo();
+  if (videoInfo && MP4Decoder::IsH264(mInfo->mMimeType)) {
+    mType = kH264;
+    RefPtr<MediaByteBuffer> extraData = videoInfo->mExtraData;
+    SPSData spsdata;
+    if (H264::DecodeSPSFromExtraData(extraData, spsdata) &&
+        spsdata.pic_width > 0 && spsdata.pic_height > 0 &&
+        H264::EnsureSPSIsSane(spsdata)) {
+      videoInfo->mImage.width = spsdata.pic_width;
+      videoInfo->mImage.height = spsdata.pic_height;
+      videoInfo->mDisplay.width = spsdata.display_width;
+      videoInfo->mDisplay.height = spsdata.display_height;
+    }
+  } else if (videoInfo && VPXDecoder::IsVP9(mInfo->mMimeType)) {
+    mType = kVP9;
+  } else if (audioInfo && MP4Decoder::IsAAC(mInfo->mMimeType)) {
+    mType = kAAC;
+  }
+}
+
+UniquePtr<TrackInfo> MP4TrackDemuxer::GetInfo() const { return mInfo->Clone(); }
+
+void MP4TrackDemuxer::EnsureUpToDateIndex() {
+  if (!mNeedReIndex) {
+    return;
+  }
+  AutoPinned<MediaResource> resource(mResource);
+  MediaByteRangeSet byteRanges;
+  nsresult rv = resource->GetCachedRanges(byteRanges);
+  if (NS_FAILED(rv)) {
+    return;
+  }
+  mIndex->UpdateMoofIndex(byteRanges);
+  mNeedReIndex = false;
+}
+
+RefPtr<MP4TrackDemuxer::SeekPromise> MP4TrackDemuxer::Seek(
+    const TimeUnit& aTime) {
+  auto seekTime = aTime;
+  mQueuedSample = nullptr;
+
+  mIterator->Seek(seekTime);
+
+  // Check what time we actually seeked to.
+  do {
+    RefPtr<MediaRawData> sample = GetNextSample();
+    if (!sample) {
+      return SeekPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_END_OF_STREAM,
+                                          __func__);
+    }
+    if (!sample->Size()) {
+      // This sample can't be decoded, continue searching.
+      continue;
+    }
+    if (sample->mKeyframe) {
+      MOZ_DIAGNOSTIC_ASSERT(sample->HasValidTime());
+      mQueuedSample = sample;
+      seekTime = mQueuedSample->mTime;
+    }
+  } while (!mQueuedSample);
+
+  SetNextKeyFrameTime();
+
+  return SeekPromise::CreateAndResolve(seekTime, __func__);
+}
+
+already_AddRefed<MediaRawData> MP4TrackDemuxer::GetNextSample() {
+  RefPtr<MediaRawData> sample = mIterator->GetNext();
+  if (!sample) {
+    return nullptr;
+  }
+  if (mInfo->GetAsVideoInfo()) {
+    sample->mExtraData = mInfo->GetAsVideoInfo()->mExtraData;
+    if (mType == kH264 && !sample->mCrypto.IsEncrypted()) {
+      H264::FrameType type = H264::GetFrameType(sample);
+      switch (type) {
+        case H264::FrameType::I_FRAME:
+          [[fallthrough]];
+        case H264::FrameType::OTHER: {
+          bool keyframe = type == H264::FrameType::I_FRAME;
+          if (sample->mKeyframe != keyframe) {
+            NS_WARNING(nsPrintfCString("Frame incorrectly marked as %skeyframe "
+                                       "@ pts:%" PRId64 " dur:%" PRId64
+                                       " dts:%" PRId64,
+                                       keyframe ? "" : "non-",
+                                       sample->mTime.ToMicroseconds(),
+                                       sample->mDuration.ToMicroseconds(),
+                                       sample->mTimecode.ToMicroseconds())
+                           .get());
+            sample->mKeyframe = keyframe;
+          }
+          break;
+        }
+        case H264::FrameType::INVALID:
+          NS_WARNING(nsPrintfCString("Invalid H264 frame @ pts:%" PRId64
+                                     " dur:%" PRId64 " dts:%" PRId64,
+                                     sample->mTime.ToMicroseconds(),
+                                     sample->mDuration.ToMicroseconds(),
+                                     sample->mTimecode.ToMicroseconds())
+                         .get());
+          // We could reject the sample now, however demuxer errors are fatal.
+          // So we keep the invalid frame, relying on the H264 decoder to
+          // handle the error later.
+          // TODO: make demuxer errors non-fatal.
+          break;
+      }
+    } else if (mType == kVP9 && !sample->mCrypto.IsEncrypted()) {
+      bool keyframe = VPXDecoder::IsKeyframe(
+          Span<const uint8_t>(sample->Data(), sample->Size()),
+          VPXDecoder::Codec::VP9);
+      if (sample->mKeyframe != keyframe) {
+        NS_WARNING(nsPrintfCString(
+                       "Frame incorrectly marked as %skeyframe "
+                       "@ pts:%" PRId64 " dur:%" PRId64 " dts:%" PRId64,
+                       keyframe ? "" : "non-", sample->mTime.ToMicroseconds(),
+                       sample->mDuration.ToMicroseconds(),
+                       sample->mTimecode.ToMicroseconds())
+                       .get());
+        sample->mKeyframe = keyframe;
+      }
+    }
+  }
+
+  // Adjust trimming information if needed.
+  if (mInfo->GetAsAudioInfo()) {
+    AudioInfo* info = mInfo->GetAsAudioInfo();
+    TimeUnit originalPts = sample->mTime;
+    TimeUnit originalEnd = sample->GetEndTime();
+    if (sample->mTime.IsNegative()) {
+      sample->mTime = TimeUnit::Zero(originalPts);
+      sample->mDuration = std::max(TimeUnit::Zero(sample->mTime),
+                                   originalPts + sample->mDuration);
+      sample->mOriginalPresentationWindow =
+          Some(TimeInterval{originalPts, originalEnd});
+    }
+    // The demuxer only knows the presentation time of the packet, not the
+    // actual number of samples that will be decoded from this packet.
+    // However we need to trim the last packet to the correct duration.
+    // Find the actual size of the decoded packet to know how many samples to
+    // trim. This only works because the packet size are constant.
+    TimeUnit totalMediaDurationIncludingTrimming =
+        info->mDuration - info->mMediaTime;
+    if (mType == kAAC &&
+        sample->GetEndTime() >= totalMediaDurationIncludingTrimming &&
+        totalMediaDurationIncludingTrimming.IsPositive()) {
+      // Seek backward a bit.
+      mIterator->Seek(sample->mTime - sample->mDuration);
+      RefPtr<MediaRawData> previousSample = mIterator->GetNext();
+      if (previousSample) {
+        TimeInterval fullPacketDuration{previousSample->mTime,
+                                        previousSample->GetEndTime()};
+        sample->mOriginalPresentationWindow = Some(TimeInterval{
+            originalPts, originalPts + fullPacketDuration.Length()});
+      }
+      // Seek back so we're back at the original location -- there's no packet
+      // left anyway.
+      mIterator->Seek(sample->mTime);
+      RefPtr<MediaRawData> dummy = mIterator->GetNext();
+    }
+  }
+
+  if (MOZ_LOG_TEST(GetDemuxerLog(), LogLevel::Verbose)) {
+    bool isAudio = mInfo->GetAsAudioInfo();
+    TimeUnit originalStart = TimeUnit::Invalid();
+    TimeUnit originalEnd = TimeUnit::Invalid();
+    if (sample->mOriginalPresentationWindow) {
+      originalStart = sample->mOriginalPresentationWindow->mStart;
+      originalEnd = sample->mOriginalPresentationWindow->mEnd;
+    }
+    LOG("%s packet demuxed (track id: %d): [%s,%s], duration: %s (original "
+        "time: [%s,%s])",
+        isAudio ? "Audio" : "Video", mInfo->mTrackId,
+        sample->mTime.ToString().get(), sample->GetEndTime().ToString().get(),
+        sample->mDuration.ToString().get(), originalStart.ToString().get(),
+        originalEnd.ToString().get());
+  }
+
+  return sample.forget();
+}
+
+RefPtr<MP4TrackDemuxer::SamplesPromise> MP4TrackDemuxer::GetSamples(
+    int32_t aNumSamples) {
+  EnsureUpToDateIndex();
+  RefPtr<SamplesHolder> samples = new SamplesHolder;
+  if (!aNumSamples) {
+    return SamplesPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_DEMUXER_ERR,
+                                           __func__);
+  }
+
+  if (mQueuedSample) {
+    NS_ASSERTION(mQueuedSample->mKeyframe, "mQueuedSample must be a keyframe");
+    samples->AppendSample(mQueuedSample);
+    mQueuedSample = nullptr;
+    aNumSamples--;
+  }
+  RefPtr<MediaRawData> sample;
+  while (aNumSamples && (sample = GetNextSample())) {
+    if (!sample->Size()) {
+      continue;
+    }
+    MOZ_DIAGNOSTIC_ASSERT(sample->HasValidTime());
+    samples->AppendSample(sample);
+    aNumSamples--;
+  }
+
+  if (samples->GetSamples().IsEmpty()) {
+    return SamplesPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_END_OF_STREAM,
+                                           __func__);
+  }
+
+  if (mNextKeyframeTime.isNothing() ||
+      samples->GetSamples().LastElement()->mTime >= mNextKeyframeTime.value()) {
+    SetNextKeyFrameTime();
+  }
+  return SamplesPromise::CreateAndResolve(samples, __func__);
+}
+
+void MP4TrackDemuxer::SetNextKeyFrameTime() {
+  mNextKeyframeTime.reset();
+  TimeUnit frameTime = mIterator->GetNextKeyframeTime();
+  if (frameTime.IsValid()) {
+    mNextKeyframeTime.emplace(frameTime);
+  }
+}
+
+void MP4TrackDemuxer::Reset() {
+  mQueuedSample = nullptr;
+  // TODO: verify this
+  mIterator->Seek(TimeUnit::FromNegativeInfinity());
+  SetNextKeyFrameTime();
+}
+
+nsresult MP4TrackDemuxer::GetNextRandomAccessPoint(TimeUnit* aTime) {
+  if (mNextKeyframeTime.isNothing()) {
+    // There's no next key frame.
+    *aTime = TimeUnit::FromInfinity();
+  } else {
+    *aTime = mNextKeyframeTime.value();
+  }
+  return NS_OK;
+}
+
+RefPtr<MP4TrackDemuxer::SkipAccessPointPromise>
+MP4TrackDemuxer::SkipToNextRandomAccessPoint(const TimeUnit& aTimeThreshold) {
+  mQueuedSample = nullptr;
+  // Loop until we reach the next keyframe after the threshold.
+  uint32_t parsed = 0;
+  bool found = false;
+  RefPtr<MediaRawData> sample;
+  while (!found && (sample = GetNextSample())) {
+    parsed++;
+    MOZ_DIAGNOSTIC_ASSERT(sample->HasValidTime());
+    if (sample->mKeyframe && sample->mTime >= aTimeThreshold) {
+      found = true;
+      mQueuedSample = sample;
+    }
+  }
+  SetNextKeyFrameTime();
+  if (found) {
+    return SkipAccessPointPromise::CreateAndResolve(parsed, __func__);
+  }
+  SkipFailureHolder failure(NS_ERROR_DOM_MEDIA_END_OF_STREAM, parsed);
+  return SkipAccessPointPromise::CreateAndReject(std::move(failure), __func__);
+}
+
+TimeIntervals MP4TrackDemuxer::GetBuffered() {
+  EnsureUpToDateIndex();
+  AutoPinned<MediaResource> resource(mResource);
+  MediaByteRangeSet byteRanges;
+  nsresult rv = resource->GetCachedRanges(byteRanges);
+
+  if (NS_FAILED(rv)) {
+    return TimeIntervals();
+  }
+
+  return mIndex->ConvertByteRangesToTimeRanges(byteRanges);
+}
+
+void MP4TrackDemuxer::NotifyDataArrived() { mNeedReIndex = true; }
+
+void MP4TrackDemuxer::NotifyDataRemoved() {
+  AutoPinned<MediaResource> resource(mResource);
+  MediaByteRangeSet byteRanges;
+  nsresult rv = resource->GetCachedRanges(byteRanges);
+  if (NS_FAILED(rv)) {
+    return;
+  }
+  mIndex->UpdateMoofIndex(byteRanges, true /* can evict */);
+  mNeedReIndex = false;
+}
+
+}  // namespace mozilla
+
+#undef LOG
diff --git a/dom/media/mp4/MP4Demuxer.h b/dom/media/mp4/MP4Demuxer.h
new file mode 100644
index 0000000000..22fa5b137f
--- /dev/null
+++ b/dom/media/mp4/MP4Demuxer.h
@@ -0,0 +1,52 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#if !defined(MP4Demuxer_h_)
+#  define MP4Demuxer_h_
+
+#  include "mozilla/Maybe.h"
+#  include "mozilla/Monitor.h"
+#  include "MediaDataDemuxer.h"
+#  include "MediaResource.h"
+
+namespace mozilla {
+class MP4TrackDemuxer;
+class ResourceStream;
+
+DDLoggedTypeDeclNameAndBase(MP4Demuxer, MediaDataDemuxer);
+
+class MP4Demuxer : public MediaDataDemuxer,
+                   public DecoderDoctorLifeLogger<MP4Demuxer> {
+ public:
+  explicit MP4Demuxer(MediaResource* aResource);
+
+  RefPtr<InitPromise> Init() override;
+
+  uint32_t GetNumberTracks(TrackInfo::TrackType aType) const override;
+
+  already_AddRefed<MediaTrackDemuxer> GetTrackDemuxer(
+      TrackInfo::TrackType aType, uint32_t aTrackNumber) override;
+
+  bool IsSeekable() const override;
+
+  UniquePtr<EncryptionInfo> GetCrypto() override;
+
+  void NotifyDataArrived() override;
+
+  void NotifyDataRemoved() override;
+
+ private:
+  RefPtr<MediaResource> mResource;
+  RefPtr<ResourceStream> mStream;
+  AutoTArray<RefPtr<MP4TrackDemuxer>, 1> mAudioDemuxers;
+  AutoTArray<RefPtr<MP4TrackDemuxer>, 1> mVideoDemuxers;
+  nsTArray<uint8_t> mCryptoInitData;
+  bool mIsSeekable;
+};
+
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/mp4/MP4Interval.h b/dom/media/mp4/MP4Interval.h
new file mode 100644
index 0000000000..70e6daeadd
--- /dev/null
+++ b/dom/media/mp4/MP4Interval.h
@@ -0,0 +1,137 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef INTERVAL_H_
+#define INTERVAL_H_
+
+#include "nsTArray.h"
+#include <algorithm>
+#include <nsString.h>
+
+namespace mozilla {
+
+template <typename T>
+struct MP4Interval {
+  MP4Interval() : start{}, end{} {}
+  MP4Interval(T aStart, T aEnd) : start(aStart), end(aEnd) {
+    MOZ_ASSERT(aStart <= aEnd);
+  }
+  T Length() { return end - start; }
+  MP4Interval Intersection(const MP4Interval& aOther) const {
+    T s = start > aOther.start ? start : aOther.start;
+    T e = end < aOther.end ? end : aOther.end;
+    if (s > e) {
+      return MP4Interval();
+    }
+    return MP4Interval(s, e);
+  }
+  bool Contains(const MP4Interval& aOther) const {
+    return aOther.start >= start && aOther.end <= end;
+  }
+  bool operator==(const MP4Interval& aOther) const {
+    return start == aOther.start && end == aOther.end;
+  }
+  bool operator!=(const MP4Interval& aOther) const {
+    return !(*this == aOther);
+  }
+  bool IsNull() const { return end == start; }
+  MP4Interval Extents(const MP4Interval& aOther) const {
+    if (IsNull()) {
+      return aOther;
+    }
+    return MP4Interval(std::min(start, aOther.start),
+                       std::max(end, aOther.end));
+  }
+
+  T start;
+  T end;
+
+  nsCString ToString() {
+    return nsPrintfCString("[%s, %s]", start.ToString().get(),
+                           end.ToString().get());
+  }
+
+  static void SemiNormalAppend(nsTArray<MP4Interval<T>>& aIntervals,
+                               MP4Interval<T> aMP4Interval) {
+    if (!aIntervals.IsEmpty() &&
+        aIntervals.LastElement().end == aMP4Interval.start) {
+      aIntervals.LastElement().end = aMP4Interval.end;
+    } else {
+      aIntervals.AppendElement(aMP4Interval);
+    }
+  }
+
+  static void Normalize(const nsTArray<MP4Interval<T>>& aIntervals,
+                        nsTArray<MP4Interval<T>>* aNormalized) {
+    if (!aNormalized || !aIntervals.Length()) {
+      MOZ_ASSERT(aNormalized);
+      return;
+    }
+    MOZ_ASSERT(aNormalized->IsEmpty());
+
+    nsTArray<MP4Interval<T>> sorted = aIntervals.Clone();
+    sorted.Sort(Compare());
+
+    MP4Interval<T> current = sorted[0];
+    for (size_t i = 1; i < sorted.Length(); i++) {
+      MOZ_ASSERT(sorted[i].start <= sorted[i].end);
+      if (current.Contains(sorted[i])) {
+        continue;
+      }
+      if (current.end >= sorted[i].start) {
+        current.end = sorted[i].end;
+      } else {
+        aNormalized->AppendElement(current);
+        current = sorted[i];
+      }
+    }
+    aNormalized->AppendElement(current);
+  }
+
+  static void Intersection(const nsTArray<MP4Interval<T>>& a0,
+                           const nsTArray<MP4Interval<T>>& a1,
+                           nsTArray<MP4Interval<T>>* aIntersection) {
+    MOZ_ASSERT(IsNormalized(a0));
+    MOZ_ASSERT(IsNormalized(a1));
+    size_t i0 = 0;
+    size_t i1 = 0;
+    while (i0 < a0.Length() && i1 < a1.Length()) {
+      MP4Interval i = a0[i0].Intersection(a1[i1]);
+      if (i.Length()) {
+        aIntersection->AppendElement(i);
+      }
+      if (a0[i0].end < a1[i1].end) {
+        i0++;
+        // Assert that the array is sorted
+        MOZ_ASSERT(i0 == a0.Length() || a0[i0 - 1].start < a0[i0].start);
+      } else {
+        i1++;
+        // Assert that the array is sorted
+        MOZ_ASSERT(i1 == a1.Length() || a1[i1 - 1].start < a1[i1].start);
+      }
+    }
+  }
+
+  static bool IsNormalized(const nsTArray<MP4Interval<T>>& aIntervals) {
+    for (size_t i = 1; i < aIntervals.Length(); i++) {
+      if (aIntervals[i - 1].end >= aIntervals[i].start) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  struct Compare {
+    bool Equals(const MP4Interval<T>& a0, const MP4Interval<T>& a1) const {
+      return a0.start == a1.start && a0.end == a1.end;
+    }
+
+    bool LessThan(const MP4Interval<T>& a0, const MP4Interval<T>& a1) const {
+      return a0.start < a1.start;
+    }
+  };
+};
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/mp4/MP4Metadata.cpp b/dom/media/mp4/MP4Metadata.cpp
new file mode 100644
index 0000000000..88b4fb5c39
--- /dev/null
+++ b/dom/media/mp4/MP4Metadata.cpp
@@ -0,0 +1,507 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/Assertions.h"
+#include "mozilla/CheckedInt.h"
+#include "mozilla/EndianUtils.h"
+#include "mozilla/Logging.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/Telemetry.h"
+#include "mozilla/UniquePtr.h"
+#include "VideoUtils.h"
+#include "MoofParser.h"
+#include "MP4Metadata.h"
+#include "ByteStream.h"
+#include "mp4parse.h"
+
+#include <limits>
+#include <stdint.h>
+#include <vector>
+
+using mozilla::media::TimeUnit;
+
+namespace mozilla {
+LazyLogModule gMP4MetadataLog("MP4Metadata");
+
+IndiceWrapper::IndiceWrapper(Mp4parseByteData& aRustIndice) {
+  mIndice.data = nullptr;
+  mIndice.length = aRustIndice.length;
+  mIndice.indices = aRustIndice.indices;
+}
+
+size_t IndiceWrapper::Length() const { return mIndice.length; }
+
+bool IndiceWrapper::GetIndice(size_t aIndex,
+                              MP4SampleIndex::Indice& aIndice) const {
+  if (aIndex >= mIndice.length) {
+    MOZ_LOG(gMP4MetadataLog, LogLevel::Error, ("Index overflow in indice"));
+    return false;
+  }
+
+  const Mp4parseIndice* indice = &mIndice.indices[aIndex];
+  aIndice.start_offset = indice->start_offset;
+  aIndice.end_offset = indice->end_offset;
+  aIndice.start_composition = indice->start_composition;
+  aIndice.end_composition = indice->end_composition;
+  aIndice.start_decode = indice->start_decode;
+  aIndice.sync = indice->sync;
+  return true;
+}
+
+static const char* TrackTypeToString(mozilla::TrackInfo::TrackType aType) {
+  switch (aType) {
+    case mozilla::TrackInfo::kAudioTrack:
+      return "audio";
+    case mozilla::TrackInfo::kVideoTrack:
+      return "video";
+    default:
+      return "unknown";
+  }
+}
+
+bool StreamAdaptor::Read(uint8_t* buffer, uintptr_t size, size_t* bytes_read) {
+  if (!mOffset.isValid()) {
+    MOZ_LOG(gMP4MetadataLog, LogLevel::Error,
+            ("Overflow in source stream offset"));
+    return false;
+  }
+  bool rv = mSource->ReadAt(mOffset.value(), buffer, size, bytes_read);
+  if (rv) {
+    mOffset += *bytes_read;
+  }
+  return rv;
+}
+
+// Wrapper to allow rust to call our read adaptor.
+static intptr_t read_source(uint8_t* buffer, uintptr_t size, void* userdata) {
+  MOZ_ASSERT(buffer);
+  MOZ_ASSERT(userdata);
+
+  auto source = reinterpret_cast<StreamAdaptor*>(userdata);
+  size_t bytes_read = 0;
+  bool rv = source->Read(buffer, size, &bytes_read);
+  if (!rv) {
+    MOZ_LOG(gMP4MetadataLog, LogLevel::Warning, ("Error reading source data"));
+    return -1;
+  }
+  return bytes_read;
+}
+
+MP4Metadata::MP4Metadata(ByteStream* aSource)
+    : mSource(aSource), mSourceAdaptor(aSource) {
+  DDLINKCHILD("source", aSource);
+}
+
+MP4Metadata::~MP4Metadata() = default;
+
+nsresult MP4Metadata::Parse() {
+  Mp4parseIo io = {read_source, &mSourceAdaptor};
+  Mp4parseParser* parser = nullptr;
+  Mp4parseStatus status = mp4parse_new(&io, &parser);
+  if (status == MP4PARSE_STATUS_OK && parser) {
+    mParser.reset(parser);
+    MOZ_ASSERT(mParser);
+  } else {
+    MOZ_ASSERT(!mParser);
+    MOZ_LOG(gMP4MetadataLog, LogLevel::Debug,
+            ("Parse failed, return code %d\n", status));
+    return status == MP4PARSE_STATUS_OOM ? NS_ERROR_OUT_OF_MEMORY
+                                         : NS_ERROR_DOM_MEDIA_METADATA_ERR;
+  }
+
+  UpdateCrypto();
+
+  return NS_OK;
+}
+
+void MP4Metadata::UpdateCrypto() {
+  Mp4parsePsshInfo info = {};
+  if (mp4parse_get_pssh_info(mParser.get(), &info) != MP4PARSE_STATUS_OK) {
+    return;
+  }
+
+  if (info.data.length == 0) {
+    return;
+  }
+
+  mCrypto.Update(info.data.data, info.data.length);
+}
+
+bool TrackTypeEqual(TrackInfo::TrackType aLHS, Mp4parseTrackType aRHS) {
+  switch (aLHS) {
+    case TrackInfo::kAudioTrack:
+      return aRHS == MP4PARSE_TRACK_TYPE_AUDIO;
+    case TrackInfo::kVideoTrack:
+      return aRHS == MP4PARSE_TRACK_TYPE_VIDEO;
+    default:
+      return false;
+  }
+}
+
+MP4Metadata::ResultAndTrackCount MP4Metadata::GetNumberTracks(
+    mozilla::TrackInfo::TrackType aType) const {
+  uint32_t tracks;
+  auto rv = mp4parse_get_track_count(mParser.get(), &tracks);
+  if (rv != MP4PARSE_STATUS_OK) {
+    MOZ_LOG(gMP4MetadataLog, LogLevel::Warning,
+            ("rust parser error %d counting tracks", rv));
+    return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+                        RESULT_DETAIL("Rust parser error %d", rv)),
+            MP4Metadata::NumberTracksError()};
+  }
+
+  uint32_t total = 0;
+  for (uint32_t i = 0; i < tracks; ++i) {
+    Mp4parseTrackInfo track_info;
+    rv = mp4parse_get_track_info(mParser.get(), i, &track_info);
+    if (rv != MP4PARSE_STATUS_OK) {
+      continue;
+    }
+
+    if (track_info.track_type == MP4PARSE_TRACK_TYPE_AUDIO) {
+      Mp4parseTrackAudioInfo audio;
+      auto rv = mp4parse_get_track_audio_info(mParser.get(), i, &audio);
+      if (rv != MP4PARSE_STATUS_OK) {
+        MOZ_LOG(gMP4MetadataLog, LogLevel::Warning,
+                ("mp4parse_get_track_audio_info returned error %d", rv));
+        continue;
+      }
+      MOZ_DIAGNOSTIC_ASSERT(audio.sample_info_count > 0,
+                            "Must have at least one audio sample info");
+      if (audio.sample_info_count == 0) {
+        return {
+            MediaResult(
+                NS_ERROR_DOM_MEDIA_METADATA_ERR,
+                RESULT_DETAIL(
+                    "Got 0 audio sample info while checking number tracks")),
+            MP4Metadata::NumberTracksError()};
+      }
+      // We assume the codec of the first sample info is representative of the
+      // whole track and skip it if we don't recognize the codec.
+      if (audio.sample_info[0].codec_type == MP4PARSE_CODEC_UNKNOWN) {
+        continue;
+      }
+    } else if (track_info.track_type == MP4PARSE_TRACK_TYPE_VIDEO) {
+      Mp4parseTrackVideoInfo video;
+      auto rv = mp4parse_get_track_video_info(mParser.get(), i, &video);
+      if (rv != MP4PARSE_STATUS_OK) {
+        MOZ_LOG(gMP4MetadataLog, LogLevel::Warning,
+                ("mp4parse_get_track_video_info returned error %d", rv));
+        continue;
+      }
+      MOZ_DIAGNOSTIC_ASSERT(video.sample_info_count > 0,
+                            "Must have at least one video sample info");
+      if (video.sample_info_count == 0) {
+        return {
+            MediaResult(
+                NS_ERROR_DOM_MEDIA_METADATA_ERR,
+                RESULT_DETAIL(
+                    "Got 0 video sample info while checking number tracks")),
+            MP4Metadata::NumberTracksError()};
+      }
+      // We assume the codec of the first sample info is representative of the
+      // whole track and skip it if we don't recognize the codec.
+      if (video.sample_info[0].codec_type == MP4PARSE_CODEC_UNKNOWN) {
+        continue;
+      }
+    } else {
+      // Only audio and video are supported
+      continue;
+    }
+    if (TrackTypeEqual(aType, track_info.track_type)) {
+      total += 1;
+    }
+  }
+
+  MOZ_LOG(gMP4MetadataLog, LogLevel::Info,
+          ("%s tracks found: %u", TrackTypeToString(aType), total));
+
+  return {NS_OK, total};
+}
+
+Maybe<uint32_t> MP4Metadata::TrackTypeToGlobalTrackIndex(
+    mozilla::TrackInfo::TrackType aType, size_t aTrackNumber) const {
+  uint32_t tracks;
+  auto rv = mp4parse_get_track_count(mParser.get(), &tracks);
+  if (rv != MP4PARSE_STATUS_OK) {
+    return Nothing();
+  }
+
+  /* The MP4Metadata API uses a per-TrackType index of tracks, but mp4parse
+     (and libstagefright) use a global track index.  Convert the index by
+     counting the tracks of the requested type and returning the global
+     track index when a match is found. */
+  uint32_t perType = 0;
+  for (uint32_t i = 0; i < tracks; ++i) {
+    Mp4parseTrackInfo track_info;
+    rv = mp4parse_get_track_info(mParser.get(), i, &track_info);
+    if (rv != MP4PARSE_STATUS_OK) {
+      continue;
+    }
+    if (TrackTypeEqual(aType, track_info.track_type)) {
+      if (perType == aTrackNumber) {
+        return Some(i);
+      }
+      perType += 1;
+    }
+  }
+
+  return Nothing();
+}
+
+MP4Metadata::ResultAndTrackInfo MP4Metadata::GetTrackInfo(
+    mozilla::TrackInfo::TrackType aType, size_t aTrackNumber) const {
+  Maybe<uint32_t> trackIndex = TrackTypeToGlobalTrackIndex(aType, aTrackNumber);
+  if (trackIndex.isNothing()) {
+    return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+                        RESULT_DETAIL("No %s tracks", TrackTypeToStr(aType))),
+            nullptr};
+  }
+
+  Mp4parseTrackInfo info;
+  auto rv = mp4parse_get_track_info(mParser.get(), trackIndex.value(), &info);
+  if (rv != MP4PARSE_STATUS_OK) {
+    MOZ_LOG(gMP4MetadataLog, LogLevel::Warning,
+            ("mp4parse_get_track_info returned %d", rv));
+    return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+                        RESULT_DETAIL("Cannot find %s track #%zu",
+                                      TrackTypeToStr(aType), aTrackNumber)),
+            nullptr};
+  }
+#ifdef DEBUG
+  bool haveSampleInfo = false;
+  const char* codecString = "unrecognized";
+  Mp4parseCodec codecType = MP4PARSE_CODEC_UNKNOWN;
+  if (info.track_type == MP4PARSE_TRACK_TYPE_AUDIO) {
+    Mp4parseTrackAudioInfo audio;
+    auto rv = mp4parse_get_track_audio_info(mParser.get(), trackIndex.value(),
+                                            &audio);
+    if (rv == MP4PARSE_STATUS_OK && audio.sample_info_count > 0) {
+      codecType = audio.sample_info[0].codec_type;
+      haveSampleInfo = true;
+    }
+  } else if (info.track_type == MP4PARSE_TRACK_TYPE_VIDEO) {
+    Mp4parseTrackVideoInfo video;
+    auto rv = mp4parse_get_track_video_info(mParser.get(), trackIndex.value(),
+                                            &video);
+    if (rv == MP4PARSE_STATUS_OK && video.sample_info_count > 0) {
+      codecType = video.sample_info[0].codec_type;
+      haveSampleInfo = true;
+    }
+  }
+  if (haveSampleInfo) {
+    switch (codecType) {
+      case MP4PARSE_CODEC_UNKNOWN:
+        codecString = "unknown";
+        break;
+      case MP4PARSE_CODEC_AAC:
+        codecString = "aac";
+        break;
+      case MP4PARSE_CODEC_OPUS:
+        codecString = "opus";
+        break;
+      case MP4PARSE_CODEC_FLAC:
+        codecString = "flac";
+        break;
+      case MP4PARSE_CODEC_ALAC:
+        codecString = "alac";
+        break;
+      case MP4PARSE_CODEC_H263:
+        codecString = "h.263";
+        break;
+      case MP4PARSE_CODEC_AVC:
+        codecString = "h.264";
+        break;
+      case MP4PARSE_CODEC_VP9:
+        codecString = "vp9";
+        break;
+      case MP4PARSE_CODEC_AV1:
+        codecString = "av1";
+        break;
+      case MP4PARSE_CODEC_MP3:
+        codecString = "mp3";
+        break;
+      case MP4PARSE_CODEC_MP4V:
+        codecString = "mp4v";
+        break;
+      case MP4PARSE_CODEC_JPEG:
+        codecString = "jpeg";
+        break;
+      case MP4PARSE_CODEC_AC3:
+        codecString = "ac-3";
+        break;
+      case MP4PARSE_CODEC_EC3:
+        codecString = "ec-3";
+        break;
+    }
+  }
+  MOZ_LOG(gMP4MetadataLog, LogLevel::Debug,
+          ("track codec %s (%u)\n", codecString, codecType));
+#endif
+
+  Mp4parseTrackInfo track_info;
+  rv = mp4parse_get_track_info(mParser.get(), trackIndex.value(), &track_info);
+  if (rv != MP4PARSE_STATUS_OK) {
+    MOZ_LOG(gMP4MetadataLog, LogLevel::Warning,
+            ("mp4parse_get_track_info returned error %d", rv));
+    return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+                        RESULT_DETAIL("Cannot parse %s track #%zu",
+                                      TrackTypeToStr(aType), aTrackNumber)),
+            nullptr};
+  }
+
+  uint32_t timeScale = info.time_scale;
+
+  // This specialization interface is wild.
+  UniquePtr<mozilla::TrackInfo> e;
+  switch (aType) {
+    case TrackInfo::TrackType::kAudioTrack: {
+      Mp4parseTrackAudioInfo audio;
+      auto rv = mp4parse_get_track_audio_info(mParser.get(), trackIndex.value(),
+                                              &audio);
+      if (rv != MP4PARSE_STATUS_OK) {
+        MOZ_LOG(gMP4MetadataLog, LogLevel::Warning,
+                ("mp4parse_get_track_audio_info returned error %d", rv));
+        return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+                            RESULT_DETAIL("Cannot parse %s track #%zu",
+                                          TrackTypeToStr(aType), aTrackNumber)),
+                nullptr};
+      }
+
+      auto indices = GetTrackIndice(info.track_id);
+      if (!indices.Ref()) {
+        // non fatal
+        MOZ_LOG(gMP4MetadataLog, LogLevel::Warning,
+                ("Can't get index table for audio track, duration might be "
+                 "slightly incorrect"));
+      }
+      auto track = mozilla::MakeUnique<MP4AudioInfo>();
+      MediaResult updateStatus =
+          track->Update(&info, &audio, indices.Ref().get());
+      if (NS_FAILED(updateStatus)) {
+        MOZ_LOG(gMP4MetadataLog, LogLevel::Warning,
+                ("Updating audio track failed with %s",
+                 updateStatus.Message().get()));
+        return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+                            RESULT_DETAIL(
+                                "Failed to update %s track #%zu with error: %s",
+                                TrackTypeToStr(aType), aTrackNumber,
+                                updateStatus.Message().get())),
+                nullptr};
+      }
+      e = std::move(track);
+    } break;
+    case TrackInfo::TrackType::kVideoTrack: {
+      Mp4parseTrackVideoInfo video;
+      auto rv = mp4parse_get_track_video_info(mParser.get(), trackIndex.value(),
+                                              &video);
+      if (rv != MP4PARSE_STATUS_OK) {
+        MOZ_LOG(gMP4MetadataLog, LogLevel::Warning,
+                ("mp4parse_get_track_video_info returned error %d", rv));
+        return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+                            RESULT_DETAIL("Cannot parse %s track #%zu",
+                                          TrackTypeToStr(aType), aTrackNumber)),
+                nullptr};
+      }
+      auto track = mozilla::MakeUnique<MP4VideoInfo>();
+      MediaResult updateStatus = track->Update(&info, &video);
+      if (NS_FAILED(updateStatus)) {
+        MOZ_LOG(gMP4MetadataLog, LogLevel::Warning,
+                ("Updating video track failed with %s",
+                 updateStatus.Message().get()));
+        return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+                            RESULT_DETAIL(
+                                "Failed to update %s track #%zu with error: %s",
+                                TrackTypeToStr(aType), aTrackNumber,
+                                updateStatus.Message().get())),
+                nullptr};
+      }
+      e = std::move(track);
+    } break;
+    default:
+      MOZ_LOG(gMP4MetadataLog, LogLevel::Warning,
+              ("unhandled track type %d", aType));
+      return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+                          RESULT_DETAIL("Cannot handle %s track #%zu",
+                                        TrackTypeToStr(aType), aTrackNumber)),
+              nullptr};
+  }
+
+  e->mTimeScale = timeScale;
+
+  // No duration in track, use fragment_duration.
+  if (e && !e->mDuration.IsPositive()) {
+    Mp4parseFragmentInfo fragmentInfo;
+    auto rv = mp4parse_get_fragment_info(mParser.get(), &fragmentInfo);
+    if (rv == MP4PARSE_STATUS_OK) {
+      // This doesn't use the time scale of the track, but the time scale
+      // indicated in the mvhd box
+      e->mDuration = TimeUnit(fragmentInfo.fragment_duration,
+                              AssertedCast<int64_t>(fragmentInfo.time_scale));
+    }
+  }
+
+  if (e && e->IsValid()) {
+    return {NS_OK, std::move(e)};
+  }
+  MOZ_LOG(gMP4MetadataLog, LogLevel::Debug, ("TrackInfo didn't validate"));
+
+  return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+                      RESULT_DETAIL("Invalid %s track #%zu",
+                                    TrackTypeToStr(aType), aTrackNumber)),
+          nullptr};
+}
+
+bool MP4Metadata::CanSeek() const { return true; }
+
+MP4Metadata::ResultAndCryptoFile MP4Metadata::Crypto() const {
+  return {NS_OK, &mCrypto};
+}
+
+MP4Metadata::ResultAndIndice MP4Metadata::GetTrackIndice(
+    uint32_t aTrackId) const {
+  Mp4parseByteData indiceRawData = {};
+
+  uint8_t fragmented = false;
+  auto rv = mp4parse_is_fragmented(mParser.get(), aTrackId, &fragmented);
+  if (rv != MP4PARSE_STATUS_OK) {
+    return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+                        RESULT_DETAIL("Cannot parse whether track id %u is "
+                                      "fragmented, mp4parse_error=%d",
+                                      aTrackId, int(rv))),
+            nullptr};
+  }
+
+  if (!fragmented) {
+    rv = mp4parse_get_indice_table(mParser.get(), aTrackId, &indiceRawData);
+    if (rv != MP4PARSE_STATUS_OK) {
+      return {
+          MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+                      RESULT_DETAIL("Cannot parse index table in track id %u, "
+                                    "mp4parse_error=%d",
+                                    aTrackId, int(rv))),
+          nullptr};
+    }
+  }
+
+  UniquePtr<IndiceWrapper> indice;
+  indice = mozilla::MakeUnique<IndiceWrapper>(indiceRawData);
+
+  return {NS_OK, std::move(indice)};
+}
+
+/*static*/ MP4Metadata::ResultAndByteBuffer MP4Metadata::Metadata(
+    ByteStream* aSource) {
+  auto parser = mozilla::MakeUnique<MoofParser>(
+      aSource, AsVariant(ParseAllTracks{}), false);
+  RefPtr<mozilla::MediaByteBuffer> buffer = parser->Metadata();
+  if (!buffer) {
+    return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+                        RESULT_DETAIL("Cannot parse metadata")),
+            nullptr};
+  }
+  return {NS_OK, std::move(buffer)};
+}
+
+}  // namespace mozilla
diff --git a/dom/media/mp4/MP4Metadata.h b/dom/media/mp4/MP4Metadata.h
new file mode 100644
index 0000000000..e900fbedc3
--- /dev/null
+++ b/dom/media/mp4/MP4Metadata.h
@@ -0,0 +1,116 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MP4METADATA_H_
+#define MP4METADATA_H_
+
+#include <type_traits>
+
+#include "mozilla/UniquePtr.h"
+#include "DecoderData.h"
+#include "MediaData.h"
+#include "MediaInfo.h"
+#include "MediaResult.h"
+#include "ByteStream.h"
+#include "mp4parse.h"
+#include "SampleIterator.h"
+
+namespace mozilla {
+
+DDLoggedTypeDeclName(MP4Metadata);
+
+// The memory owner in mIndice.indices is rust mp4 parser, so lifetime of this
+// class SHOULD NOT longer than rust parser.
+class IndiceWrapper {
+ public:
+  size_t Length() const;
+
+  bool GetIndice(size_t aIndex, MP4SampleIndex::Indice& aIndice) const;
+
+  explicit IndiceWrapper(Mp4parseByteData& aRustIndice);
+
+ protected:
+  Mp4parseByteData mIndice;
+};
+
+struct FreeMP4Parser {
+  void operator()(Mp4parseParser* aPtr) { mp4parse_free(aPtr); }
+};
+
+// Wrap an Stream to remember the read offset.
+class StreamAdaptor {
+ public:
+  explicit StreamAdaptor(ByteStream* aSource) : mSource(aSource), mOffset(0) {}
+
+  ~StreamAdaptor() = default;
+
+  bool Read(uint8_t* buffer, uintptr_t size, size_t* bytes_read);
+
+ private:
+  ByteStream* mSource;
+  CheckedInt<size_t> mOffset;
+};
+
+class MP4Metadata : public DecoderDoctorLifeLogger<MP4Metadata> {
+ public:
+  explicit MP4Metadata(ByteStream* aSource);
+  ~MP4Metadata();
+
+  // Simple template class containing a MediaResult and another type.
+  template <typename T>
+  class ResultAndType {
+   public:
+    template <typename M2, typename T2>
+    ResultAndType(M2&& aM, T2&& aT)
+        : mResult(std::forward<M2>(aM)), mT(std::forward<T2>(aT)) {}
+    ResultAndType(const ResultAndType&) = default;
+    ResultAndType& operator=(const ResultAndType&) = default;
+    ResultAndType(ResultAndType&&) = default;
+    ResultAndType& operator=(ResultAndType&&) = default;
+
+    mozilla::MediaResult& Result() { return mResult; }
+    T& Ref() { return mT; }
+
+   private:
+    mozilla::MediaResult mResult;
+    std::decay_t<T> mT;
+  };
+
+  using ResultAndByteBuffer = ResultAndType<RefPtr<mozilla::MediaByteBuffer>>;
+  static ResultAndByteBuffer Metadata(ByteStream* aSource);
+
+  static constexpr uint32_t NumberTracksError() { return UINT32_MAX; }
+  using ResultAndTrackCount = ResultAndType<uint32_t>;
+  ResultAndTrackCount GetNumberTracks(
+      mozilla::TrackInfo::TrackType aType) const;
+
+  using ResultAndTrackInfo =
+      ResultAndType<mozilla::UniquePtr<mozilla::TrackInfo>>;
+  ResultAndTrackInfo GetTrackInfo(mozilla::TrackInfo::TrackType aType,
+                                  size_t aTrackNumber) const;
+
+  bool CanSeek() const;
+
+  using ResultAndCryptoFile = ResultAndType<const CryptoFile*>;
+  ResultAndCryptoFile Crypto() const;
+
+  using ResultAndIndice = ResultAndType<mozilla::UniquePtr<IndiceWrapper>>;
+  ResultAndIndice GetTrackIndice(uint32_t aTrackId) const;
+
+  nsresult Parse();
+
+ private:
+  void UpdateCrypto();
+  Maybe<uint32_t> TrackTypeToGlobalTrackIndex(
+      mozilla::TrackInfo::TrackType aType, size_t aTrackNumber) const;
+
+  CryptoFile mCrypto;
+  RefPtr<ByteStream> mSource;
+  StreamAdaptor mSourceAdaptor;
+  mozilla::UniquePtr<Mp4parseParser, FreeMP4Parser> mParser;
+};
+
+}  // namespace mozilla
+
+#endif  // MP4METADATA_H_
diff --git a/dom/media/mp4/MoofParser.cpp b/dom/media/mp4/MoofParser.cpp
new file mode 100644
index 0000000000..71d3939502
--- /dev/null
+++ b/dom/media/mp4/MoofParser.cpp
@@ -0,0 +1,1286 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "MoofParser.h"
+#include "Box.h"
+#include "SinfParser.h"
+#include <limits>
+#include "MP4Interval.h"
+
+#include "mozilla/CheckedInt.h"
+#include "mozilla/HelperMacros.h"
+#include "mozilla/Logging.h"
+
+#if defined(MOZ_FMP4)
+extern mozilla::LogModule* GetDemuxerLog();
+
+#  define LOG_ERROR(name, arg, ...)                \
+    MOZ_LOG(                                       \
+        GetDemuxerLog(), mozilla::LogLevel::Error, \
+        (MOZ_STRINGIFY(name) "(%p)::%s: " arg, this, __func__, ##__VA_ARGS__))
+#  define LOG_WARN(name, arg, ...)                   \
+    MOZ_LOG(                                         \
+        GetDemuxerLog(), mozilla::LogLevel::Warning, \
+        (MOZ_STRINGIFY(name) "(%p)::%s: " arg, this, __func__, ##__VA_ARGS__))
+#  define LOG_DEBUG(name, arg, ...)                \
+    MOZ_LOG(                                       \
+        GetDemuxerLog(), mozilla::LogLevel::Debug, \
+        (MOZ_STRINGIFY(name) "(%p)::%s: " arg, this, __func__, ##__VA_ARGS__))
+
+#else
+#  define LOG_ERROR(...)
+#  define LOG_WARN(...)
+#  define LOG_DEBUG(...)
+#endif
+
+namespace mozilla {
+
+using TimeUnit = media::TimeUnit;
+
+const uint32_t kKeyIdSize = 16;
+
+bool MoofParser::RebuildFragmentedIndex(const MediaByteRangeSet& aByteRanges) {
+  BoxContext context(mSource, aByteRanges);
+  return RebuildFragmentedIndex(context);
+}
+
+bool MoofParser::RebuildFragmentedIndex(const MediaByteRangeSet& aByteRanges,
+                                        bool* aCanEvict) {
+  MOZ_ASSERT(aCanEvict);
+  if (*aCanEvict && mMoofs.Length() > 1) {
+    MOZ_ASSERT(mMoofs.Length() == mMediaRanges.Length());
+    mMoofs.RemoveElementsAt(0, mMoofs.Length() - 1);
+    mMediaRanges.RemoveElementsAt(0, mMediaRanges.Length() - 1);
+    *aCanEvict = true;
+  } else {
+    *aCanEvict = false;
+  }
+  return RebuildFragmentedIndex(aByteRanges);
+}
+
+bool MoofParser::RebuildFragmentedIndex(BoxContext& aContext) {
+  LOG_DEBUG(
+      Moof,
+      "Starting, mTrackParseMode=%s, track#=%" PRIu32
+      " (ignore if multitrack).",
+      mTrackParseMode.is<ParseAllTracks>() ? "multitrack" : "single track",
+      mTrackParseMode.is<ParseAllTracks>() ? 0
+                                           : mTrackParseMode.as<uint32_t>());
+  bool foundValidMoof = false;
+
+  for (Box box(&aContext, mOffset); box.IsAvailable(); box = box.Next()) {
+    if (box.IsType("moov") && mInitRange.IsEmpty()) {
+      mInitRange = MediaByteRange(0, box.Range().mEnd);
+      ParseMoov(box);
+    } else if (box.IsType("moof")) {
+      Moof moof(box, mTrackParseMode, mTrex, mMvhd, mMdhd, mEdts, mSinf,
+                &mLastDecodeTime, mIsAudio, mTracksEndCts);
+
+      if (!moof.IsValid() && !box.Next().IsAvailable()) {
+        // Moof isn't valid abort search for now.
+        LOG_WARN(Moof,
+                 "Could not find valid moof, moof may not be complete yet.");
+        break;
+      }
+
+      if (!mMoofs.IsEmpty()) {
+        // Stitch time ranges together in the case of a (hopefully small) time
+        // range gap between moofs.
+        mMoofs.LastElement().FixRounding(moof);
+      }
+
+      mMediaRanges.AppendElement(moof.mRange);
+      mMoofs.AppendElement(std::move(moof));
+      foundValidMoof = true;
+    } else if (box.IsType("mdat") && !Moofs().IsEmpty()) {
+      // Check if we have all our data from last moof.
+      Moof& moof = Moofs().LastElement();
+      media::Interval<int64_t> datarange(moof.mMdatRange.mStart,
+                                         moof.mMdatRange.mEnd, 0);
+      media::Interval<int64_t> mdat(box.Range().mStart, box.Range().mEnd, 0);
+      if (datarange.Intersects(mdat)) {
+        mMediaRanges.LastElement() =
+            mMediaRanges.LastElement().Span(box.Range());
+      }
+    }
+    mOffset = box.NextOffset();
+  }
+  MOZ_ASSERT(mTrackParseMode.is<ParseAllTracks>() ||
+                 mTrex.mTrackId == mTrackParseMode.as<uint32_t>(),
+             "If not parsing all tracks, mTrex should have the same track id "
+             "as the track being parsed.");
+  LOG_DEBUG(Moof, "Done, foundValidMoof=%s.",
+            foundValidMoof ? "true" : "false");
+  return foundValidMoof;
+}
+
+MediaByteRange MoofParser::FirstCompleteMediaHeader() {
+  if (Moofs().IsEmpty()) {
+    return MediaByteRange();
+  }
+  return Moofs()[0].mRange;
+}
+
+MediaByteRange MoofParser::FirstCompleteMediaSegment() {
+  for (uint32_t i = 0; i < mMediaRanges.Length(); i++) {
+    if (mMediaRanges[i].Contains(Moofs()[i].mMdatRange)) {
+      return mMediaRanges[i];
+    }
+  }
+  return MediaByteRange();
+}
+
+DDLoggedTypeDeclNameAndBase(BlockingStream, ByteStream);
+
+class BlockingStream : public ByteStream,
+                       public DecoderDoctorLifeLogger<BlockingStream> {
+ public:
+  explicit BlockingStream(ByteStream* aStream) : mStream(aStream) {
+    DDLINKCHILD("stream", aStream);
+  }
+
+  bool ReadAt(int64_t offset, void* data, size_t size,
+              size_t* bytes_read) override {
+    return mStream->ReadAt(offset, data, size, bytes_read);
+  }
+
+  bool CachedReadAt(int64_t offset, void* data, size_t size,
+                    size_t* bytes_read) override {
+    return mStream->ReadAt(offset, data, size, bytes_read);
+  }
+
+  virtual bool Length(int64_t* size) override { return mStream->Length(size); }
+
+ private:
+  RefPtr<ByteStream> mStream;
+};
+
+bool MoofParser::BlockingReadNextMoof() {
+  LOG_DEBUG(Moof, "Starting.");
+  int64_t length = std::numeric_limits<int64_t>::max();
+  mSource->Length(&length);
+  RefPtr<BlockingStream> stream = new BlockingStream(mSource);
+  MediaByteRangeSet byteRanges(MediaByteRange(0, length));
+
+  BoxContext context(stream, byteRanges);
+  for (Box box(&context, mOffset); box.IsAvailable(); box = box.Next()) {
+    if (box.IsType("moof")) {
+      MediaByteRangeSet parseByteRanges(
+          MediaByteRange(mOffset, box.Range().mEnd));
+      BoxContext parseContext(stream, parseByteRanges);
+      if (RebuildFragmentedIndex(parseContext)) {
+        LOG_DEBUG(Moof, "Succeeded on RebuildFragmentedIndex, returning true.");
+        return true;
+      }
+    }
+  }
+  LOG_DEBUG(Moof, "Couldn't read next moof, returning false.");
+  return false;
+}
+
+void MoofParser::ScanForMetadata(mozilla::MediaByteRange& aMoov) {
+  LOG_DEBUG(Moof, "Starting.");
+  int64_t length = std::numeric_limits<int64_t>::max();
+  mSource->Length(&length);
+  MediaByteRangeSet byteRanges;
+  byteRanges += MediaByteRange(0, length);
+  RefPtr<BlockingStream> stream = new BlockingStream(mSource);
+
+  BoxContext context(stream, byteRanges);
+  for (Box box(&context, mOffset); box.IsAvailable(); box = box.Next()) {
+    if (box.IsType("moov")) {
+      aMoov = box.Range();
+      break;
+    }
+  }
+  mInitRange = aMoov;
+  LOG_DEBUG(Moof,
+            "Done, mInitRange.mStart=%" PRIi64 ", mInitRange.mEnd=%" PRIi64,
+            mInitRange.mStart, mInitRange.mEnd);
+}
+
+already_AddRefed<mozilla::MediaByteBuffer> MoofParser::Metadata() {
+  LOG_DEBUG(Moof, "Starting.");
+  MediaByteRange moov;
+  ScanForMetadata(moov);
+  CheckedInt<MediaByteBuffer::size_type> moovLength = moov.Length();
+  if (!moovLength.isValid() || !moovLength.value()) {
+    // No moov, or cannot be used as array size.
+    LOG_WARN(Moof,
+             "Did not get usable moov length while trying to parse Metadata.");
+    return nullptr;
+  }
+
+  RefPtr<MediaByteBuffer> metadata = new MediaByteBuffer();
+  if (!metadata->SetLength(moovLength.value(), fallible)) {
+    LOG_ERROR(Moof, "OOM");
+    return nullptr;
+  }
+
+  RefPtr<BlockingStream> stream = new BlockingStream(mSource);
+  size_t read;
+  bool rv = stream->ReadAt(moov.mStart, metadata->Elements(),
+                           moovLength.value(), &read);
+  if (!rv || read != moovLength.value()) {
+    LOG_WARN(Moof, "Failed to read moov while trying to parse Metadata.");
+    return nullptr;
+  }
+  LOG_DEBUG(Moof, "Done, found metadata.");
+  return metadata.forget();
+}
+
+MP4Interval<TimeUnit> MoofParser::GetCompositionRange(
+    const MediaByteRangeSet& aByteRanges) {
+  LOG_DEBUG(Moof, "Starting.");
+  MP4Interval<TimeUnit> compositionRange;
+  BoxContext context(mSource, aByteRanges);
+  for (size_t i = 0; i < mMoofs.Length(); i++) {
+    Moof& moof = mMoofs[i];
+    Box box(&context, moof.mRange.mStart);
+    if (box.IsAvailable()) {
+      compositionRange = compositionRange.Extents(moof.mTimeRange);
+    }
+  }
+  LOG_DEBUG(Moof,
+            "Done, compositionRange.start=%" PRIi64
+            ", compositionRange.end=%" PRIi64 ".",
+            compositionRange.start.ToMicroseconds(),
+            compositionRange.end.ToMicroseconds());
+  return compositionRange;
+}
+
+bool MoofParser::ReachedEnd() {
+  int64_t length;
+  return mSource->Length(&length) && mOffset == length;
+}
+
+void MoofParser::ParseMoov(Box& aBox) {
+  LOG_DEBUG(Moof, "Starting.");
+  for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+    if (box.IsType("mvhd")) {
+      mMvhd = Mvhd(box);
+    } else if (box.IsType("trak")) {
+      ParseTrak(box);
+    } else if (box.IsType("mvex")) {
+      ParseMvex(box);
+    }
+  }
+  LOG_DEBUG(Moof, "Done.");
+}
+
+void MoofParser::ParseTrak(Box& aBox) {
+  LOG_DEBUG(Trak, "Starting.");
+  Tkhd tkhd;
+  for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+    if (box.IsType("tkhd")) {
+      tkhd = Tkhd(box);
+    } else if (box.IsType("mdia")) {
+      if (mTrackParseMode.is<ParseAllTracks>() ||
+          tkhd.mTrackId == mTrackParseMode.as<uint32_t>()) {
+        ParseMdia(box);
+      }
+    } else if (box.IsType("edts") &&
+               (mTrackParseMode.is<ParseAllTracks>() ||
+                tkhd.mTrackId == mTrackParseMode.as<uint32_t>())) {
+      mEdts = Edts(box);
+    }
+  }
+  LOG_DEBUG(Trak, "Done.");
+}
+
+void MoofParser::ParseMdia(Box& aBox) {
+  LOG_DEBUG(Mdia, "Starting.");
+  for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+    if (box.IsType("mdhd")) {
+      mMdhd = Mdhd(box);
+    } else if (box.IsType("minf")) {
+      ParseMinf(box);
+    }
+  }
+  LOG_DEBUG(Mdia, "Done.");
+}
+
+void MoofParser::ParseMvex(Box& aBox) {
+  LOG_DEBUG(Mvex, "Starting.");
+  for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+    if (box.IsType("trex")) {
+      Trex trex = Trex(box);
+      if (mTrackParseMode.is<ParseAllTracks>() ||
+          trex.mTrackId == mTrackParseMode.as<uint32_t>()) {
+        mTrex = trex;
+      }
+    }
+  }
+  LOG_DEBUG(Mvex, "Done.");
+}
+
+void MoofParser::ParseMinf(Box& aBox) {
+  LOG_DEBUG(Minf, "Starting.");
+  for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+    if (box.IsType("stbl")) {
+      ParseStbl(box);
+    }
+  }
+  LOG_DEBUG(Minf, "Done.");
+}
+
+void MoofParser::ParseStbl(Box& aBox) {
+  LOG_DEBUG(Stbl, "Starting.");
+  for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+    if (box.IsType("stsd")) {
+      ParseStsd(box);
+    } else if (box.IsType("sgpd")) {
+      Sgpd sgpd(box);
+      if (sgpd.IsValid() && sgpd.mGroupingType == "seig") {
+        mTrackSampleEncryptionInfoEntries.Clear();
+        if (!mTrackSampleEncryptionInfoEntries.AppendElements(
+                sgpd.mEntries, mozilla::fallible)) {
+          LOG_ERROR(Stbl, "OOM");
+          return;
+        }
+      }
+    } else if (box.IsType("sbgp")) {
+      Sbgp sbgp(box);
+      if (sbgp.IsValid() && sbgp.mGroupingType == "seig") {
+        mTrackSampleToGroupEntries.Clear();
+        if (!mTrackSampleToGroupEntries.AppendElements(sbgp.mEntries,
+                                                       mozilla::fallible)) {
+          LOG_ERROR(Stbl, "OOM");
+          return;
+        }
+      }
+    }
+  }
+  LOG_DEBUG(Stbl, "Done.");
+}
+
+void MoofParser::ParseStsd(Box& aBox) {
+  LOG_DEBUG(Stsd, "Starting.");
+  if (mTrackParseMode.is<ParseAllTracks>()) {
+    // It is not a sane operation to try and map sample description boxes from
+    // multiple tracks onto the parser, which is modeled around storing metadata
+    // for a single track.
+    LOG_DEBUG(Stsd, "Early return due to multitrack parser.");
+    return;
+  }
+  MOZ_ASSERT(
+      mSampleDescriptions.IsEmpty(),
+      "Shouldn't have any sample descriptions yet when starting to parse stsd");
+  uint32_t numberEncryptedEntries = 0;
+  for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+    SampleDescriptionEntry sampleDescriptionEntry{false};
+    if (box.IsType("encv") || box.IsType("enca")) {
+      ParseEncrypted(box);
+      sampleDescriptionEntry.mIsEncryptedEntry = true;
+      numberEncryptedEntries++;
+    }
+    if (!mSampleDescriptions.AppendElement(sampleDescriptionEntry,
+                                           mozilla::fallible)) {
+      LOG_ERROR(Stsd, "OOM");
+      return;
+    }
+  }
+  if (mSampleDescriptions.IsEmpty()) {
+    LOG_WARN(Stsd,
+             "No sample description entries found while parsing Stsd! This "
+             "shouldn't happen, as the spec requires one for each track!");
+  }
+  if (numberEncryptedEntries > 1) {
+    LOG_WARN(Stsd,
+             "More than one encrypted sample description entry found while "
+             "parsing track! We don't expect this, and it will likely break "
+             "during fragment look up!");
+  }
+  LOG_DEBUG(Stsd,
+            "Done, numberEncryptedEntries=%" PRIu32
+            ", mSampleDescriptions.Length=%zu",
+            numberEncryptedEntries, mSampleDescriptions.Length());
+}
+
+void MoofParser::ParseEncrypted(Box& aBox) {
+  LOG_DEBUG(Moof, "Starting.");
+  for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+    // Some MP4 files have been found to have multiple sinf boxes in the same
+    // enc* box. This does not match spec anyway, so just choose the first
+    // one that parses properly.
+    if (box.IsType("sinf")) {
+      mSinf = Sinf(box);
+
+      if (mSinf.IsValid()) {
+        break;
+      }
+    }
+  }
+  LOG_DEBUG(Moof, "Done.");
+}
+
+class CtsComparator {
+ public:
+  bool Equals(Sample* const aA, Sample* const aB) const {
+    return aA->mCompositionRange.start == aB->mCompositionRange.start;
+  }
+  bool LessThan(Sample* const aA, Sample* const aB) const {
+    return aA->mCompositionRange.start < aB->mCompositionRange.start;
+  }
+};
+
+Moof::Moof(Box& aBox, const TrackParseMode& aTrackParseMode, Trex& aTrex,
+           Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf,
+           uint64_t* aDecodeTime, bool aIsAudio,
+           nsTArray<TrackEndCts>& aTracksEndCts)
+    : mRange(aBox.Range()),
+      mTfhd(aTrex),
+      // Do not reporting discontuities less than 35ms
+      mMaxRoundingError(TimeUnit::FromSeconds(0.035)) {
+  LOG_DEBUG(
+      Moof,
+      "Starting, aTrackParseMode=%s, track#=%" PRIu32
+      " (ignore if multitrack).",
+      aTrackParseMode.is<ParseAllTracks>() ? "multitrack" : "single track",
+      aTrackParseMode.is<ParseAllTracks>() ? 0
+                                           : aTrackParseMode.as<uint32_t>());
+  MOZ_ASSERT(aTrackParseMode.is<ParseAllTracks>() ||
+                 aTrex.mTrackId == aTrackParseMode.as<uint32_t>(),
+             "If not parsing all tracks, aTrex should have the same track id "
+             "as the track being parsed.");
+  nsTArray<Box> psshBoxes;
+  for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+    if (box.IsType("traf")) {
+      ParseTraf(box, aTrackParseMode, aTrex, aMvhd, aMdhd, aEdts, aSinf,
+                aDecodeTime, aIsAudio);
+    }
+    if (box.IsType("pssh")) {
+      psshBoxes.AppendElement(box);
+    }
+  }
+
+  // The EME spec requires that PSSH boxes which are contiguous in the
+  // file are dispatched to the media element in a single "encrypted" event.
+  // So append contiguous boxes here.
+  for (size_t i = 0; i < psshBoxes.Length(); ++i) {
+    Box box = psshBoxes[i];
+    if (i == 0 || box.Offset() != psshBoxes[i - 1].NextOffset()) {
+      mPsshes.AppendElement();
+    }
+    nsTArray<uint8_t>& pssh = mPsshes.LastElement();
+    pssh.AppendElements(std::move(box.ReadCompleteBox()));
+  }
+
+  if (IsValid()) {
+    if (mIndex.Length()) {
+      // Ensure the samples are contiguous with no gaps.
+      nsTArray<Sample*> ctsOrder;
+      for (auto& sample : mIndex) {
+        ctsOrder.AppendElement(&sample);
+      }
+      ctsOrder.Sort(CtsComparator());
+
+      for (size_t i = 1; i < ctsOrder.Length(); i++) {
+        ctsOrder[i - 1]->mCompositionRange.end =
+            ctsOrder[i]->mCompositionRange.start;
+      }
+
+      // Ensure that there are no gaps between the first sample in this
+      // Moof and the preceeding Moof.
+      if (!ctsOrder.IsEmpty()) {
+        bool found = false;
+        // Track ID of the track we're parsing.
+        const uint32_t trackId = aTrex.mTrackId;
+        // Find the previous CTS end time of Moof preceeding the Moofs we just
+        // parsed, for the track we're parsing.
+        for (auto& prevCts : aTracksEndCts) {
+          if (prevCts.mTrackId == trackId) {
+            // We ensure there are no gaps in samples' CTS between the last
+            // sample in a Moof, and the first sample in the next Moof, if
+            // they're within these many Microseconds of each other.
+            const TimeUnit CROSS_MOOF_CTS_MERGE_THRESHOLD =
+                TimeUnit::FromMicroseconds(1);
+            // We have previously parsed a Moof for this track. Smooth the gap
+            // between samples for this track across the Moof bounary.
+            if (ctsOrder[0]->mCompositionRange.start > prevCts.mCtsEndTime &&
+                ctsOrder[0]->mCompositionRange.start - prevCts.mCtsEndTime <=
+                    CROSS_MOOF_CTS_MERGE_THRESHOLD) {
+              ctsOrder[0]->mCompositionRange.start = prevCts.mCtsEndTime;
+            }
+            prevCts.mCtsEndTime = ctsOrder.LastElement()->mCompositionRange.end;
+            found = true;
+            break;
+          }
+        }
+        if (!found) {
+          // We've not parsed a Moof for this track yet. Save its CTS end
+          // time for the next Moof we parse.
+          aTracksEndCts.AppendElement(TrackEndCts(
+              trackId, ctsOrder.LastElement()->mCompositionRange.end));
+        }
+      }
+
+      // In MP4, the duration of a sample is defined as the delta between two
+      // decode timestamps. The operation above has updated the duration of each
+      // sample as a Sample's duration is mCompositionRange.end -
+      // mCompositionRange.start MSE's TrackBuffersManager expects dts that
+      // increased by the sample's duration, so we rewrite the dts accordingly.
+      TimeUnit presentationDuration =
+          ctsOrder.LastElement()->mCompositionRange.end -
+          ctsOrder[0]->mCompositionRange.start;
+      auto decodeOffset =
+          aMdhd.ToTimeUnit((int64_t)*aDecodeTime - aEdts.mMediaStart);
+      auto offsetOffset = aMvhd.ToTimeUnit(aEdts.mEmptyOffset);
+      TimeUnit endDecodeTime =
+          (decodeOffset.isOk() && offsetOffset.isOk())
+              ? decodeOffset.unwrap() + offsetOffset.unwrap()
+              : TimeUnit::Zero(aMvhd.mTimescale);
+      TimeUnit decodeDuration = endDecodeTime - mIndex[0].mDecodeTime;
+      double adjust = 0.;
+      if (!presentationDuration.IsZero()) {
+        double num = decodeDuration.ToSeconds();
+        double denom = presentationDuration.ToSeconds();
+        if (denom != 0.) {
+          adjust = num / denom;
+        }
+      }
+
+      TimeUnit dtsOffset = mIndex[0].mDecodeTime;
+      TimeUnit compositionDuration(0, aMvhd.mTimescale);
+      // Adjust the dts, ensuring that the new adjusted dts will never be
+      // greater than decodeTime (the next moof's decode start time).
+      for (auto& sample : mIndex) {
+        sample.mDecodeTime = dtsOffset + compositionDuration.MultDouble(adjust);
+        compositionDuration += sample.mCompositionRange.Length();
+      }
+      mTimeRange =
+          MP4Interval<TimeUnit>(ctsOrder[0]->mCompositionRange.start,
+                                ctsOrder.LastElement()->mCompositionRange.end);
+    }
+    ProcessCencAuxInfo(aSinf.mDefaultEncryptionType);
+  }
+  LOG_DEBUG(Moof, "Done.");
+}
+
+bool Moof::GetAuxInfo(AtomType aType,
+                      FallibleTArray<MediaByteRange>* aByteRanges) {
+  LOG_DEBUG(Moof, "Starting.");
+  aByteRanges->Clear();
+
+  Saiz* saiz = nullptr;
+  for (int i = 0;; i++) {
+    if (i == mSaizs.Length()) {
+      LOG_DEBUG(Moof, "Could not find saiz matching aType. Returning false.");
+      return false;
+    }
+    if (mSaizs[i].mAuxInfoType == aType) {
+      saiz = &mSaizs[i];
+      break;
+    }
+  }
+  Saio* saio = nullptr;
+  for (int i = 0;; i++) {
+    if (i == mSaios.Length()) {
+      LOG_DEBUG(Moof, "Could not find saio matching aType. Returning false.");
+      return false;
+    }
+    if (mSaios[i].mAuxInfoType == aType) {
+      saio = &mSaios[i];
+      break;
+    }
+  }
+
+  if (saio->mOffsets.Length() == 1) {
+    if (!aByteRanges->SetCapacity(saiz->mSampleInfoSize.Length(),
+                                  mozilla::fallible)) {
+      LOG_ERROR(Moof, "OOM");
+      return false;
+    }
+    uint64_t offset = mRange.mStart + saio->mOffsets[0];
+    for (size_t i = 0; i < saiz->mSampleInfoSize.Length(); i++) {
+      if (!aByteRanges->AppendElement(
+              MediaByteRange(offset, offset + saiz->mSampleInfoSize[i]),
+              mozilla::fallible)) {
+        LOG_ERROR(Moof, "OOM");
+        return false;
+      }
+      offset += saiz->mSampleInfoSize[i];
+    }
+    LOG_DEBUG(
+        Moof,
+        "Saio has 1 entry. aByteRanges populated accordingly. Returning true.");
+    return true;
+  }
+
+  if (saio->mOffsets.Length() == saiz->mSampleInfoSize.Length()) {
+    if (!aByteRanges->SetCapacity(saiz->mSampleInfoSize.Length(),
+                                  mozilla::fallible)) {
+      LOG_ERROR(Moof, "OOM");
+      return false;
+    }
+    for (size_t i = 0; i < saio->mOffsets.Length(); i++) {
+      uint64_t offset = mRange.mStart + saio->mOffsets[i];
+      if (!aByteRanges->AppendElement(
+              MediaByteRange(offset, offset + saiz->mSampleInfoSize[i]),
+              mozilla::fallible)) {
+        LOG_ERROR(Moof, "OOM");
+        return false;
+      }
+    }
+    LOG_DEBUG(
+        Moof,
+        "Saio and saiz have same number of entries. aByteRanges populated "
+        "accordingly. Returning true.");
+    return true;
+  }
+
+  LOG_DEBUG(Moof,
+            "Moof::GetAuxInfo could not find any Aux info, returning false.");
+  return false;
+}
+
+bool Moof::ProcessCencAuxInfo(AtomType aScheme) {
+  LOG_DEBUG(Moof, "Starting.");
+  FallibleTArray<MediaByteRange> cencRanges;
+  if (!GetAuxInfo(aScheme, &cencRanges) ||
+      cencRanges.Length() != mIndex.Length()) {
+    LOG_DEBUG(Moof, "Couldn't find cenc aux info.");
+    return false;
+  }
+  for (int i = 0; i < cencRanges.Length(); i++) {
+    mIndex[i].mCencRange = cencRanges[i];
+  }
+  LOG_DEBUG(Moof, "Found cenc aux info and stored on index.");
+  return true;
+}
+
+void Moof::ParseTraf(Box& aBox, const TrackParseMode& aTrackParseMode,
+                     Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts,
+                     Sinf& aSinf, uint64_t* aDecodeTime, bool aIsAudio) {
+  LOG_DEBUG(
+      Traf,
+      "Starting, aTrackParseMode=%s, track#=%" PRIu32
+      " (ignore if multitrack).",
+      aTrackParseMode.is<ParseAllTracks>() ? "multitrack" : "single track",
+      aTrackParseMode.is<ParseAllTracks>() ? 0
+                                           : aTrackParseMode.as<uint32_t>());
+  MOZ_ASSERT(aDecodeTime);
+  MOZ_ASSERT(aTrackParseMode.is<ParseAllTracks>() ||
+                 aTrex.mTrackId == aTrackParseMode.as<uint32_t>(),
+             "If not parsing all tracks, aTrex should have the same track id "
+             "as the track being parsed.");
+  Tfdt tfdt;
+
+  for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+    if (box.IsType("tfhd")) {
+      mTfhd = Tfhd(box, aTrex);
+    } else if (aTrackParseMode.is<ParseAllTracks>() ||
+               mTfhd.mTrackId == aTrackParseMode.as<uint32_t>()) {
+      if (box.IsType("tfdt")) {
+        tfdt = Tfdt(box);
+      } else if (box.IsType("sgpd")) {
+        Sgpd sgpd(box);
+        if (sgpd.IsValid() && sgpd.mGroupingType == "seig") {
+          mFragmentSampleEncryptionInfoEntries.Clear();
+          if (!mFragmentSampleEncryptionInfoEntries.AppendElements(
+                  sgpd.mEntries, mozilla::fallible)) {
+            LOG_ERROR(Moof, "OOM");
+            return;
+          }
+        }
+      } else if (box.IsType("sbgp")) {
+        Sbgp sbgp(box);
+        if (sbgp.IsValid() && sbgp.mGroupingType == "seig") {
+          mFragmentSampleToGroupEntries.Clear();
+          if (!mFragmentSampleToGroupEntries.AppendElements(
+                  sbgp.mEntries, mozilla::fallible)) {
+            LOG_ERROR(Moof, "OOM");
+            return;
+          }
+        }
+      } else if (box.IsType("saiz")) {
+        if (!mSaizs.AppendElement(Saiz(box, aSinf.mDefaultEncryptionType),
+                                  mozilla::fallible)) {
+          LOG_ERROR(Moof, "OOM");
+          return;
+        }
+      } else if (box.IsType("saio")) {
+        if (!mSaios.AppendElement(Saio(box, aSinf.mDefaultEncryptionType),
+                                  mozilla::fallible)) {
+          LOG_ERROR(Moof, "OOM");
+          return;
+        }
+      }
+    }
+  }
+  if (aTrackParseMode.is<uint32_t>() &&
+      mTfhd.mTrackId != aTrackParseMode.as<uint32_t>()) {
+    LOG_DEBUG(Traf,
+              "Early return as not multitrack parser and track id didn't match "
+              "mTfhd.mTrackId=%" PRIu32,
+              mTfhd.mTrackId);
+    return;
+  }
+  // Now search for TRUN boxes.
+  uint64_t decodeTime =
+      tfdt.IsValid() ? tfdt.mBaseMediaDecodeTime : *aDecodeTime;
+  for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+    if (box.IsType("trun")) {
+      if (ParseTrun(box, aMvhd, aMdhd, aEdts, &decodeTime, aIsAudio).isOk()) {
+        mValid = true;
+      } else {
+        LOG_WARN(Moof, "ParseTrun failed");
+        mValid = false;
+        break;
+      }
+    }
+  }
+  *aDecodeTime = decodeTime;
+  LOG_DEBUG(Traf, "Done, setting aDecodeTime=%." PRIu64 ".", decodeTime);
+}
+
+void Moof::FixRounding(const Moof& aMoof) {
+  TimeUnit gap = aMoof.mTimeRange.start - mTimeRange.end;
+  if (gap.IsPositive() && gap <= mMaxRoundingError) {
+    mTimeRange.end = aMoof.mTimeRange.start;
+  }
+}
+
+Result<Ok, nsresult> Moof::ParseTrun(Box& aBox, Mvhd& aMvhd, Mdhd& aMdhd,
+                                     Edts& aEdts, uint64_t* aDecodeTime,
+                                     bool aIsAudio) {
+  LOG_DEBUG(Trun, "Starting.");
+  if (!mTfhd.IsValid() || !aMvhd.IsValid() || !aMdhd.IsValid() ||
+      !aEdts.IsValid()) {
+    LOG_WARN(
+        Moof, "Invalid dependencies: mTfhd(%d) aMvhd(%d) aMdhd(%d) aEdts(%d)",
+        mTfhd.IsValid(), aMvhd.IsValid(), aMdhd.IsValid(), !aEdts.IsValid());
+    return Err(NS_ERROR_FAILURE);
+  }
+
+  BoxReader reader(aBox);
+  if (!reader->CanReadType<uint32_t>()) {
+    LOG_WARN(Moof, "Incomplete Box (missing flags)");
+    return Err(NS_ERROR_FAILURE);
+  }
+  uint32_t flags;
+  MOZ_TRY_VAR(flags, reader->ReadU32());
+
+  if (!reader->CanReadType<uint32_t>()) {
+    LOG_WARN(Moof, "Incomplete Box (missing sampleCount)");
+    return Err(NS_ERROR_FAILURE);
+  }
+  uint32_t sampleCount;
+  MOZ_TRY_VAR(sampleCount, reader->ReadU32());
+  if (sampleCount == 0) {
+    LOG_DEBUG(Trun, "Trun with no samples, returning.");
+    return Ok();
+  }
+
+  uint64_t offset = mTfhd.mBaseDataOffset;
+  if (flags & 0x01) {
+    uint32_t tmp;
+    MOZ_TRY_VAR(tmp, reader->ReadU32());
+    offset += tmp;
+  }
+  uint32_t firstSampleFlags = mTfhd.mDefaultSampleFlags;
+  if (flags & 0x04) {
+    MOZ_TRY_VAR(firstSampleFlags, reader->ReadU32());
+  }
+  nsTArray<MP4Interval<TimeUnit>> timeRanges;
+  uint64_t decodeTime = *aDecodeTime;
+
+  if (!mIndex.SetCapacity(sampleCount, fallible)) {
+    LOG_ERROR(Moof, "Out of Memory");
+    return Err(NS_ERROR_FAILURE);
+  }
+
+  for (size_t i = 0; i < sampleCount; i++) {
+    uint32_t sampleDuration = mTfhd.mDefaultSampleDuration;
+    if (flags & 0x100) {
+      MOZ_TRY_VAR(sampleDuration, reader->ReadU32());
+    }
+    uint32_t sampleSize = mTfhd.mDefaultSampleSize;
+    if (flags & 0x200) {
+      MOZ_TRY_VAR(sampleSize, reader->ReadU32());
+    }
+    uint32_t sampleFlags = i ? mTfhd.mDefaultSampleFlags : firstSampleFlags;
+    if (flags & 0x400) {
+      MOZ_TRY_VAR(sampleFlags, reader->ReadU32());
+    }
+    int32_t ctsOffset = 0;
+    if (flags & 0x800) {
+      MOZ_TRY_VAR(ctsOffset, reader->Read32());
+    }
+
+    if (sampleSize) {
+      Sample sample;
+      sample.mByteRange = MediaByteRange(offset, offset + sampleSize);
+      offset += sampleSize;
+
+      TimeUnit decodeOffset, emptyOffset, startCts, endCts;
+      MOZ_TRY_VAR(decodeOffset,
+                  aMdhd.ToTimeUnit((int64_t)decodeTime - aEdts.mMediaStart));
+      MOZ_TRY_VAR(emptyOffset, aMvhd.ToTimeUnit(aEdts.mEmptyOffset));
+      sample.mDecodeTime = decodeOffset + emptyOffset;
+      MOZ_TRY_VAR(startCts, aMdhd.ToTimeUnit((int64_t)decodeTime + ctsOffset -
+                                             aEdts.mMediaStart));
+      MOZ_TRY_VAR(endCts, aMdhd.ToTimeUnit((int64_t)decodeTime + ctsOffset +
+                                           sampleDuration - aEdts.mMediaStart));
+      sample.mCompositionRange =
+          MP4Interval<TimeUnit>(startCts + emptyOffset, endCts + emptyOffset);
+      // Sometimes audio streams don't properly mark their samples as keyframes,
+      // because every audio sample is a keyframe.
+      sample.mSync = !(sampleFlags & 0x1010000) || aIsAudio;
+
+      // FIXME: Make this infallible after bug 968520 is done.
+      MOZ_ALWAYS_TRUE(mIndex.AppendElement(sample, fallible));
+
+      mMdatRange = mMdatRange.Span(sample.mByteRange);
+    }
+    decodeTime += sampleDuration;
+  }
+  TimeUnit roundTime;
+  MOZ_TRY_VAR(roundTime, aMdhd.ToTimeUnit(sampleCount));
+  mMaxRoundingError = roundTime + mMaxRoundingError;
+
+  *aDecodeTime = decodeTime;
+
+  LOG_DEBUG(Trun, "Done.");
+  return Ok();
+}
+
+Tkhd::Tkhd(Box& aBox) : mTrackId(0) {
+  mValid = Parse(aBox).isOk();
+  if (!mValid) {
+    LOG_WARN(Tkhd, "Parse failed");
+  }
+}
+
+Result<Ok, nsresult> Tkhd::Parse(Box& aBox) {
+  BoxReader reader(aBox);
+  uint32_t flags;
+  MOZ_TRY_VAR(flags, reader->ReadU32());
+  uint8_t version = flags >> 24;
+  if (version == 0) {
+    uint32_t creationTime, modificationTime, reserved, duration;
+    MOZ_TRY_VAR(creationTime, reader->ReadU32());
+    MOZ_TRY_VAR(modificationTime, reader->ReadU32());
+    MOZ_TRY_VAR(mTrackId, reader->ReadU32());
+    MOZ_TRY_VAR(reserved, reader->ReadU32());
+    MOZ_TRY_VAR(duration, reader->ReadU32());
+
+    (void)reserved;
+    NS_ASSERTION(!reserved, "reserved should be 0");
+
+    mCreationTime = creationTime;
+    mModificationTime = modificationTime;
+    mDuration = duration;
+  } else if (version == 1) {
+    uint32_t reserved;
+    MOZ_TRY_VAR(mCreationTime, reader->ReadU64());
+    MOZ_TRY_VAR(mModificationTime, reader->ReadU64());
+    MOZ_TRY_VAR(mTrackId, reader->ReadU32());
+    MOZ_TRY_VAR(reserved, reader->ReadU32());
+    (void)reserved;
+    NS_ASSERTION(!reserved, "reserved should be 0");
+    MOZ_TRY_VAR(mDuration, reader->ReadU64());
+  }
+  return Ok();
+}
+
+Mvhd::Mvhd(Box& aBox)
+    : mCreationTime(0), mModificationTime(0), mTimescale(0), mDuration(0) {
+  mValid = Parse(aBox).isOk();
+  if (!mValid) {
+    LOG_WARN(Mvhd, "Parse failed");
+  }
+}
+
+Result<Ok, nsresult> Mvhd::Parse(Box& aBox) {
+  BoxReader reader(aBox);
+
+  uint32_t flags;
+  MOZ_TRY_VAR(flags, reader->ReadU32());
+  uint8_t version = flags >> 24;
+
+  if (version == 0) {
+    uint32_t creationTime, modificationTime, duration;
+    MOZ_TRY_VAR(creationTime, reader->ReadU32());
+    MOZ_TRY_VAR(modificationTime, reader->ReadU32());
+    MOZ_TRY_VAR(mTimescale, reader->ReadU32());
+    MOZ_TRY_VAR(duration, reader->ReadU32());
+    mCreationTime = creationTime;
+    mModificationTime = modificationTime;
+    mDuration = duration;
+  } else if (version == 1) {
+    MOZ_TRY_VAR(mCreationTime, reader->ReadU64());
+    MOZ_TRY_VAR(mModificationTime, reader->ReadU64());
+    MOZ_TRY_VAR(mTimescale, reader->ReadU32());
+    MOZ_TRY_VAR(mDuration, reader->ReadU64());
+  } else {
+    return Err(NS_ERROR_FAILURE);
+  }
+  return Ok();
+}
+
+Mdhd::Mdhd(Box& aBox) : Mvhd(aBox) {}
+
+Trex::Trex(Box& aBox)
+    : mFlags(0),
+      mTrackId(0),
+      mDefaultSampleDescriptionIndex(0),
+      mDefaultSampleDuration(0),
+      mDefaultSampleSize(0),
+      mDefaultSampleFlags(0) {
+  mValid = Parse(aBox).isOk();
+  if (!mValid) {
+    LOG_WARN(Trex, "Parse failed");
+  }
+}
+
+Result<Ok, nsresult> Trex::Parse(Box& aBox) {
+  BoxReader reader(aBox);
+
+  MOZ_TRY_VAR(mFlags, reader->ReadU32());
+  MOZ_TRY_VAR(mTrackId, reader->ReadU32());
+  MOZ_TRY_VAR(mDefaultSampleDescriptionIndex, reader->ReadU32());
+  MOZ_TRY_VAR(mDefaultSampleDuration, reader->ReadU32());
+  MOZ_TRY_VAR(mDefaultSampleSize, reader->ReadU32());
+  MOZ_TRY_VAR(mDefaultSampleFlags, reader->ReadU32());
+
+  return Ok();
+}
+
+Tfhd::Tfhd(Box& aBox, Trex& aTrex) : Trex(aTrex), mBaseDataOffset(0) {
+  mValid = Parse(aBox).isOk();
+  if (!mValid) {
+    LOG_WARN(Tfhd, "Parse failed");
+  }
+}
+
+Result<Ok, nsresult> Tfhd::Parse(Box& aBox) {
+  MOZ_ASSERT(aBox.IsType("tfhd"));
+  MOZ_ASSERT(aBox.Parent()->IsType("traf"));
+  MOZ_ASSERT(aBox.Parent()->Parent()->IsType("moof"));
+
+  BoxReader reader(aBox);
+
+  MOZ_TRY_VAR(mFlags, reader->ReadU32());
+  MOZ_TRY_VAR(mTrackId, reader->ReadU32());
+  mBaseDataOffset = aBox.Parent()->Parent()->Offset();
+  if (mFlags & 0x01) {
+    MOZ_TRY_VAR(mBaseDataOffset, reader->ReadU64());
+  }
+  if (mFlags & 0x02) {
+    MOZ_TRY_VAR(mDefaultSampleDescriptionIndex, reader->ReadU32());
+  }
+  if (mFlags & 0x08) {
+    MOZ_TRY_VAR(mDefaultSampleDuration, reader->ReadU32());
+  }
+  if (mFlags & 0x10) {
+    MOZ_TRY_VAR(mDefaultSampleSize, reader->ReadU32());
+  }
+  if (mFlags & 0x20) {
+    MOZ_TRY_VAR(mDefaultSampleFlags, reader->ReadU32());
+  }
+
+  return Ok();
+}
+
+Tfdt::Tfdt(Box& aBox) : mBaseMediaDecodeTime(0) {
+  mValid = Parse(aBox).isOk();
+  if (!mValid) {
+    LOG_WARN(Tfdt, "Parse failed");
+  }
+}
+
+Result<Ok, nsresult> Tfdt::Parse(Box& aBox) {
+  BoxReader reader(aBox);
+
+  uint32_t flags;
+  MOZ_TRY_VAR(flags, reader->ReadU32());
+  uint8_t version = flags >> 24;
+  if (version == 0) {
+    uint32_t tmp;
+    MOZ_TRY_VAR(tmp, reader->ReadU32());
+    mBaseMediaDecodeTime = tmp;
+  } else if (version == 1) {
+    MOZ_TRY_VAR(mBaseMediaDecodeTime, reader->ReadU64());
+  }
+  return Ok();
+}
+
+Edts::Edts(Box& aBox) : mMediaStart(0), mEmptyOffset(0) {
+  mValid = Parse(aBox).isOk();
+  if (!mValid) {
+    LOG_WARN(Edts, "Parse failed");
+  }
+}
+
+Result<Ok, nsresult> Edts::Parse(Box& aBox) {
+  Box child = aBox.FirstChild();
+  if (!child.IsType("elst")) {
+    return Err(NS_ERROR_FAILURE);
+  }
+
+  BoxReader reader(child);
+  uint32_t flags;
+  MOZ_TRY_VAR(flags, reader->ReadU32());
+  uint8_t version = flags >> 24;
+  bool emptyEntry = false;
+  uint32_t entryCount;
+  MOZ_TRY_VAR(entryCount, reader->ReadU32());
+  for (uint32_t i = 0; i < entryCount; i++) {
+    uint64_t segment_duration;
+    int64_t media_time;
+    if (version == 1) {
+      MOZ_TRY_VAR(segment_duration, reader->ReadU64());
+      MOZ_TRY_VAR(media_time, reader->Read64());
+    } else {
+      uint32_t tmp;
+      MOZ_TRY_VAR(tmp, reader->ReadU32());
+      segment_duration = tmp;
+      int32_t tmp2;
+      MOZ_TRY_VAR(tmp2, reader->Read32());
+      media_time = tmp2;
+    }
+    if (media_time == -1 && i) {
+      LOG_WARN(Edts, "Multiple empty edit, not handled");
+    } else if (media_time == -1) {
+      mEmptyOffset = segment_duration;
+      emptyEntry = true;
+    } else if (i > 1 || (i > 0 && !emptyEntry)) {
+      LOG_WARN(Edts,
+               "More than one edit entry, not handled. A/V sync will be wrong");
+      break;
+    } else {
+      mMediaStart = media_time;
+    }
+    MOZ_TRY(reader->ReadU32());  // media_rate_integer and media_rate_fraction
+  }
+
+  return Ok();
+}
+
+Saiz::Saiz(Box& aBox, AtomType aDefaultType)
+    : mAuxInfoType(aDefaultType), mAuxInfoTypeParameter(0) {
+  mValid = Parse(aBox).isOk();
+  if (!mValid) {
+    LOG_WARN(Saiz, "Parse failed");
+  }
+}
+
+Result<Ok, nsresult> Saiz::Parse(Box& aBox) {
+  BoxReader reader(aBox);
+
+  uint32_t flags;
+  MOZ_TRY_VAR(flags, reader->ReadU32());
+  if (flags & 1) {
+    MOZ_TRY_VAR(mAuxInfoType, reader->ReadU32());
+    MOZ_TRY_VAR(mAuxInfoTypeParameter, reader->ReadU32());
+  }
+  uint8_t defaultSampleInfoSize;
+  MOZ_TRY_VAR(defaultSampleInfoSize, reader->ReadU8());
+  uint32_t count;
+  MOZ_TRY_VAR(count, reader->ReadU32());
+  if (defaultSampleInfoSize) {
+    if (!mSampleInfoSize.SetLength(count, fallible)) {
+      LOG_ERROR(Saiz, "OOM");
+      return Err(NS_ERROR_FAILURE);
+    }
+    memset(mSampleInfoSize.Elements(), defaultSampleInfoSize,
+           mSampleInfoSize.Length());
+  } else {
+    if (!reader->ReadArray(mSampleInfoSize, count)) {
+      LOG_WARN(Saiz, "Incomplete Box (OOM or missing count:%u)", count);
+      return Err(NS_ERROR_FAILURE);
+    }
+  }
+  return Ok();
+}
+
+Saio::Saio(Box& aBox, AtomType aDefaultType)
+    : mAuxInfoType(aDefaultType), mAuxInfoTypeParameter(0) {
+  mValid = Parse(aBox).isOk();
+  if (!mValid) {
+    LOG_WARN(Saio, "Parse failed");
+  }
+}
+
+Result<Ok, nsresult> Saio::Parse(Box& aBox) {
+  BoxReader reader(aBox);
+
+  uint32_t flags;
+  MOZ_TRY_VAR(flags, reader->ReadU32());
+  uint8_t version = flags >> 24;
+  if (flags & 1) {
+    MOZ_TRY_VAR(mAuxInfoType, reader->ReadU32());
+    MOZ_TRY_VAR(mAuxInfoTypeParameter, reader->ReadU32());
+  }
+
+  size_t count;
+  MOZ_TRY_VAR(count, reader->ReadU32());
+  if (!mOffsets.SetCapacity(count, fallible)) {
+    LOG_ERROR(Saiz, "OOM");
+    return Err(NS_ERROR_FAILURE);
+  }
+  if (version == 0) {
+    uint32_t offset;
+    for (size_t i = 0; i < count; i++) {
+      MOZ_TRY_VAR(offset, reader->ReadU32());
+      MOZ_ALWAYS_TRUE(mOffsets.AppendElement(offset, fallible));
+    }
+  } else {
+    uint64_t offset;
+    for (size_t i = 0; i < count; i++) {
+      MOZ_TRY_VAR(offset, reader->ReadU64());
+      MOZ_ALWAYS_TRUE(mOffsets.AppendElement(offset, fallible));
+    }
+  }
+  return Ok();
+}
+
+Sbgp::Sbgp(Box& aBox) : mGroupingTypeParam(0) {
+  mValid = Parse(aBox).isOk();
+  if (!mValid) {
+    LOG_WARN(Sbgp, "Parse failed");
+  }
+}
+
+Result<Ok, nsresult> Sbgp::Parse(Box& aBox) {
+  BoxReader reader(aBox);
+
+  uint32_t flags;
+  MOZ_TRY_VAR(flags, reader->ReadU32());
+  const uint8_t version = flags >> 24;
+
+  uint32_t type;
+  MOZ_TRY_VAR(type, reader->ReadU32());
+  mGroupingType = type;
+
+  if (version == 1) {
+    MOZ_TRY_VAR(mGroupingTypeParam, reader->ReadU32());
+  }
+
+  uint32_t count;
+  MOZ_TRY_VAR(count, reader->ReadU32());
+
+  for (uint32_t i = 0; i < count; i++) {
+    uint32_t sampleCount;
+    MOZ_TRY_VAR(sampleCount, reader->ReadU32());
+    uint32_t groupDescriptionIndex;
+    MOZ_TRY_VAR(groupDescriptionIndex, reader->ReadU32());
+
+    SampleToGroupEntry entry(sampleCount, groupDescriptionIndex);
+    if (!mEntries.AppendElement(entry, mozilla::fallible)) {
+      LOG_ERROR(Sbgp, "OOM");
+      return Err(NS_ERROR_FAILURE);
+    }
+  }
+  return Ok();
+}
+
+Sgpd::Sgpd(Box& aBox) {
+  mValid = Parse(aBox).isOk();
+  if (!mValid) {
+    LOG_WARN(Sgpd, "Parse failed");
+  }
+}
+
+Result<Ok, nsresult> Sgpd::Parse(Box& aBox) {
+  BoxReader reader(aBox);
+
+  uint32_t flags;
+  MOZ_TRY_VAR(flags, reader->ReadU32());
+  const uint8_t version = flags >> 24;
+
+  uint32_t type;
+  MOZ_TRY_VAR(type, reader->ReadU32());
+  mGroupingType = type;
+
+  const uint32_t entrySize = sizeof(uint32_t) + kKeyIdSize;
+  uint32_t defaultLength = 0;
+
+  if (version == 1) {
+    MOZ_TRY_VAR(defaultLength, reader->ReadU32());
+    if (defaultLength < entrySize && defaultLength != 0) {
+      return Err(NS_ERROR_FAILURE);
+    }
+  }
+
+  uint32_t count;
+  MOZ_TRY_VAR(count, reader->ReadU32());
+
+  for (uint32_t i = 0; i < count; ++i) {
+    if (version == 1 && defaultLength == 0) {
+      uint32_t descriptionLength;
+      MOZ_TRY_VAR(descriptionLength, reader->ReadU32());
+      if (descriptionLength < entrySize) {
+        return Err(NS_ERROR_FAILURE);
+      }
+    }
+
+    CencSampleEncryptionInfoEntry entry;
+    bool valid = entry.Init(reader).isOk();
+    if (!valid) {
+      return Err(NS_ERROR_FAILURE);
+    }
+    if (!mEntries.AppendElement(entry, mozilla::fallible)) {
+      LOG_ERROR(Sgpd, "OOM");
+      return Err(NS_ERROR_FAILURE);
+    }
+  }
+  return Ok();
+}
+
+Result<Ok, nsresult> CencSampleEncryptionInfoEntry::Init(BoxReader& aReader) {
+  // Skip a reserved byte.
+  MOZ_TRY(aReader->ReadU8());
+
+  uint8_t pattern;
+  MOZ_TRY_VAR(pattern, aReader->ReadU8());
+  mCryptByteBlock = pattern >> 4;
+  mSkipByteBlock = pattern & 0x0f;
+
+  uint8_t isEncrypted;
+  MOZ_TRY_VAR(isEncrypted, aReader->ReadU8());
+  mIsEncrypted = isEncrypted != 0;
+
+  MOZ_TRY_VAR(mIVSize, aReader->ReadU8());
+
+  // Read the key id.
+  if (!mKeyId.SetLength(kKeyIdSize, fallible)) {
+    LOG_ERROR(CencSampleEncryptionInfoEntry, "OOM");
+    return Err(NS_ERROR_FAILURE);
+  }
+  for (uint32_t i = 0; i < kKeyIdSize; ++i) {
+    MOZ_TRY_VAR(mKeyId.ElementAt(i), aReader->ReadU8());
+  }
+
+  if (mIsEncrypted) {
+    if (mIVSize != 8 && mIVSize != 16) {
+      return Err(NS_ERROR_FAILURE);
+    }
+  } else if (mIVSize != 0) {
+    // Protected content with 0 sized IV indicates a constant IV is present.
+    // This is used for the cbcs scheme.
+    uint8_t constantIVSize;
+    MOZ_TRY_VAR(constantIVSize, aReader->ReadU8());
+    if (constantIVSize != 8 && constantIVSize != 16) {
+      LOG_WARN(CencSampleEncryptionInfoEntry,
+               "Unexpected constantIVSize: %" PRIu8, constantIVSize);
+      return Err(NS_ERROR_FAILURE);
+    }
+    if (!mConsantIV.SetLength(constantIVSize, mozilla::fallible)) {
+      LOG_ERROR(CencSampleEncryptionInfoEntry, "OOM");
+      return Err(NS_ERROR_FAILURE);
+    }
+    for (uint32_t i = 0; i < constantIVSize; ++i) {
+      MOZ_TRY_VAR(mConsantIV.ElementAt(i), aReader->ReadU8());
+    }
+  }
+
+  return Ok();
+}
+}  // namespace mozilla
+
+#undef LOG_DEBUG
+#undef LOG_WARN
+#undef LOG_ERROR
diff --git a/dom/media/mp4/MoofParser.h b/dom/media/mp4/MoofParser.h
new file mode 100644
index 0000000000..f644157308
--- /dev/null
+++ b/dom/media/mp4/MoofParser.h
@@ -0,0 +1,361 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOOF_PARSER_H_
+#define MOOF_PARSER_H_
+
+#include "mozilla/ResultExtensions.h"
+#include "TimeUnits.h"
+#include "mozilla/Variant.h"
+#include "Atom.h"
+#include "AtomType.h"
+#include "SinfParser.h"
+#include "ByteStream.h"
+#include "MP4Interval.h"
+#include "MediaResource.h"
+
+namespace mozilla {
+
+class Box;
+class BoxContext;
+class BoxReader;
+class Moof;
+
+// Used to track the CTS end time of the last sample of a track
+// in the preceeding Moof, so that we can smooth tracks' timestamps
+// across Moofs.
+struct TrackEndCts {
+  TrackEndCts(uint32_t aTrackId, const media::TimeUnit& aCtsEndTime)
+      : mTrackId(aTrackId), mCtsEndTime(aCtsEndTime) {}
+  uint32_t mTrackId;
+  media::TimeUnit mCtsEndTime;
+};
+
+class Mvhd : public Atom {
+ public:
+  Mvhd()
+      : mCreationTime(0), mModificationTime(0), mTimescale(0), mDuration(0) {}
+  explicit Mvhd(Box& aBox);
+
+  Result<media::TimeUnit, nsresult> ToTimeUnit(int64_t aTimescaleUnits) {
+    if (!mTimescale) {
+      NS_WARNING("invalid mTimescale");
+      return Err(NS_ERROR_FAILURE);
+    }
+    return media::TimeUnit(aTimescaleUnits, mTimescale);
+  }
+
+  uint64_t mCreationTime;
+  uint64_t mModificationTime;
+  uint32_t mTimescale;
+  uint64_t mDuration;
+
+ protected:
+  Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+class Tkhd : public Mvhd {
+ public:
+  Tkhd() : mTrackId(0) {}
+  explicit Tkhd(Box& aBox);
+
+  uint32_t mTrackId;
+
+ protected:
+  Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+class Mdhd : public Mvhd {
+ public:
+  Mdhd() = default;
+  explicit Mdhd(Box& aBox);
+};
+
+class Trex : public Atom {
+ public:
+  explicit Trex(uint32_t aTrackId)
+      : mFlags(0),
+        mTrackId(aTrackId),
+        mDefaultSampleDescriptionIndex(0),
+        mDefaultSampleDuration(0),
+        mDefaultSampleSize(0),
+        mDefaultSampleFlags(0) {}
+
+  explicit Trex(Box& aBox);
+
+  uint32_t mFlags;
+  uint32_t mTrackId;
+  uint32_t mDefaultSampleDescriptionIndex;
+  uint32_t mDefaultSampleDuration;
+  uint32_t mDefaultSampleSize;
+  uint32_t mDefaultSampleFlags;
+
+ protected:
+  Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+class Tfhd : public Trex {
+ public:
+  explicit Tfhd(Trex& aTrex) : Trex(aTrex), mBaseDataOffset(0) {
+    mValid = aTrex.IsValid();
+  }
+  Tfhd(Box& aBox, Trex& aTrex);
+
+  uint64_t mBaseDataOffset;
+
+ protected:
+  Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+class Tfdt : public Atom {
+ public:
+  Tfdt() : mBaseMediaDecodeTime(0) {}
+  explicit Tfdt(Box& aBox);
+
+  uint64_t mBaseMediaDecodeTime;
+
+ protected:
+  Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+class Edts : public Atom {
+ public:
+  Edts() : mMediaStart(0), mEmptyOffset(0) {}
+  explicit Edts(Box& aBox);
+  virtual bool IsValid() override {
+    // edts is optional
+    return true;
+  }
+
+  int64_t mMediaStart;
+  int64_t mEmptyOffset;
+
+ protected:
+  Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+struct Sample {
+  mozilla::MediaByteRange mByteRange;
+  mozilla::MediaByteRange mCencRange;
+  media::TimeUnit mDecodeTime;
+  MP4Interval<media::TimeUnit> mCompositionRange;
+  bool mSync;
+};
+
+class Saiz final : public Atom {
+ public:
+  Saiz(Box& aBox, AtomType aDefaultType);
+
+  AtomType mAuxInfoType;
+  uint32_t mAuxInfoTypeParameter;
+  FallibleTArray<uint8_t> mSampleInfoSize;
+
+ protected:
+  Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+class Saio final : public Atom {
+ public:
+  Saio(Box& aBox, AtomType aDefaultType);
+
+  AtomType mAuxInfoType;
+  uint32_t mAuxInfoTypeParameter;
+  FallibleTArray<uint64_t> mOffsets;
+
+ protected:
+  Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+struct SampleToGroupEntry {
+ public:
+  static const uint32_t kTrackGroupDescriptionIndexBase = 0;
+  static const uint32_t kFragmentGroupDescriptionIndexBase = 0x10000;
+
+  SampleToGroupEntry(uint32_t aSampleCount, uint32_t aGroupDescriptionIndex)
+      : mSampleCount(aSampleCount),
+        mGroupDescriptionIndex(aGroupDescriptionIndex) {}
+
+  uint32_t mSampleCount;
+  uint32_t mGroupDescriptionIndex;
+};
+
+class Sbgp final : public Atom  // SampleToGroup box.
+{
+ public:
+  explicit Sbgp(Box& aBox);
+
+  AtomType mGroupingType;
+  uint32_t mGroupingTypeParam;
+  FallibleTArray<SampleToGroupEntry> mEntries;
+
+ protected:
+  Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+// Stores information form CencSampleEncryptionInformationGroupEntry (seig).
+// Cenc here refers to the common encryption standard, rather than the specific
+// cenc scheme from that standard. This structure is used for all encryption
+// schemes. I.e. it is used for both cenc and cbcs, not just cenc.
+struct CencSampleEncryptionInfoEntry final {
+ public:
+  CencSampleEncryptionInfoEntry() = default;
+
+  Result<Ok, nsresult> Init(BoxReader& aReader);
+
+  bool mIsEncrypted = false;
+  uint8_t mIVSize = 0;
+  CopyableTArray<uint8_t> mKeyId;
+  uint8_t mCryptByteBlock = 0;
+  uint8_t mSkipByteBlock = 0;
+  CopyableTArray<uint8_t> mConsantIV;
+};
+
+class Sgpd final : public Atom  // SampleGroupDescription box.
+{
+ public:
+  explicit Sgpd(Box& aBox);
+
+  AtomType mGroupingType;
+  FallibleTArray<CencSampleEncryptionInfoEntry> mEntries;
+
+ protected:
+  Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+// Audio/video entries from the sample description box (stsd). We only need to
+// store if these are encrypted, so do not need a specialized class for
+// different audio and video data. Currently most of the parsing of these
+// entries is by the mp4parse-rust, but moof pasrser needs to know which of
+// these are encrypted when parsing the track fragment header (tfhd).
+struct SampleDescriptionEntry {
+  bool mIsEncryptedEntry = false;
+};
+
+// Used to indicate in variants if all tracks should be parsed.
+struct ParseAllTracks {};
+
+typedef Variant<ParseAllTracks, uint32_t> TrackParseMode;
+
+class Moof final : public Atom {
+ public:
+  Moof(Box& aBox, const TrackParseMode& aTrackParseMode, Trex& aTrex,
+       Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf,
+       uint64_t* aDecodeTime, bool aIsAudio,
+       nsTArray<TrackEndCts>& aTracksEndCts);
+  bool GetAuxInfo(AtomType aType, FallibleTArray<MediaByteRange>* aByteRanges);
+  void FixRounding(const Moof& aMoof);
+
+  mozilla::MediaByteRange mRange;
+  mozilla::MediaByteRange mMdatRange;
+  MP4Interval<media::TimeUnit> mTimeRange;
+  FallibleTArray<Sample> mIndex;
+
+  FallibleTArray<CencSampleEncryptionInfoEntry>
+      mFragmentSampleEncryptionInfoEntries;
+  FallibleTArray<SampleToGroupEntry> mFragmentSampleToGroupEntries;
+
+  Tfhd mTfhd;
+  FallibleTArray<Saiz> mSaizs;
+  FallibleTArray<Saio> mSaios;
+  nsTArray<nsTArray<uint8_t>> mPsshes;
+
+ private:
+  // aDecodeTime is updated to the end of the parsed TRAF on return.
+  void ParseTraf(Box& aBox, const TrackParseMode& aTrackParseMode, Trex& aTrex,
+                 Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf,
+                 uint64_t* aDecodeTime, bool aIsAudio);
+  // aDecodeTime is updated to the end of the parsed TRUN on return.
+  Result<Ok, nsresult> ParseTrun(Box& aBox, Mvhd& aMvhd, Mdhd& aMdhd,
+                                 Edts& aEdts, uint64_t* aDecodeTime,
+                                 bool aIsAudio);
+  // Process the sample auxiliary information used by common encryption.
+  // aScheme is used to select the appropriate auxiliary information and should
+  // be set based on the encryption scheme used by the track being processed.
+  // Note, the term cenc here refers to the standard, not the specific scheme
+  // from that standard. I.e. this function is used to handle up auxiliary
+  // information from the cenc and cbcs schemes.
+  bool ProcessCencAuxInfo(AtomType aScheme);
+  media::TimeUnit mMaxRoundingError;
+};
+
+DDLoggedTypeDeclName(MoofParser);
+
+class MoofParser : public DecoderDoctorLifeLogger<MoofParser> {
+ public:
+  MoofParser(ByteStream* aSource, const TrackParseMode& aTrackParseMode,
+             bool aIsAudio)
+      : mSource(aSource),
+        mOffset(0),
+        mTrex(aTrackParseMode.is<uint32_t>() ? aTrackParseMode.as<uint32_t>()
+                                             : 0),
+        mIsAudio(aIsAudio),
+        mLastDecodeTime(0),
+        mTrackParseMode(aTrackParseMode) {
+    // Setting mIsMultitrackParser is a nasty work around for calculating
+    // the composition range for MSE that causes the parser to parse multiple
+    // tracks. Ideally we'd store an array of tracks with different metadata
+    // for each.
+    DDLINKCHILD("source", aSource);
+  }
+  bool RebuildFragmentedIndex(const mozilla::MediaByteRangeSet& aByteRanges);
+  // If *aCanEvict is set to true. then will remove all moofs already parsed
+  // from index then rebuild the index. *aCanEvict is set to true upon return if
+  // some moofs were removed.
+  bool RebuildFragmentedIndex(const mozilla::MediaByteRangeSet& aByteRanges,
+                              bool* aCanEvict);
+  bool RebuildFragmentedIndex(BoxContext& aContext);
+  MP4Interval<media::TimeUnit> GetCompositionRange(
+      const mozilla::MediaByteRangeSet& aByteRanges);
+  bool ReachedEnd();
+  void ParseMoov(Box& aBox);
+  void ParseTrak(Box& aBox);
+  void ParseMdia(Box& aBox);
+  void ParseMvex(Box& aBox);
+
+  void ParseMinf(Box& aBox);
+  void ParseStbl(Box& aBox);
+  void ParseStsd(Box& aBox);
+  void ParseEncrypted(Box& aBox);
+
+  bool BlockingReadNextMoof();
+
+  already_AddRefed<mozilla::MediaByteBuffer> Metadata();
+  MediaByteRange FirstCompleteMediaSegment();
+  MediaByteRange FirstCompleteMediaHeader();
+
+  mozilla::MediaByteRange mInitRange;
+  RefPtr<ByteStream> mSource;
+  uint64_t mOffset;
+  Mvhd mMvhd;
+  Mdhd mMdhd;
+  Trex mTrex;
+  Tfdt mTfdt;
+  Edts mEdts;
+  Sinf mSinf;
+
+  FallibleTArray<CencSampleEncryptionInfoEntry>
+      mTrackSampleEncryptionInfoEntries;
+  FallibleTArray<SampleToGroupEntry> mTrackSampleToGroupEntries;
+  FallibleTArray<SampleDescriptionEntry> mSampleDescriptions;
+
+  nsTArray<Moof>& Moofs() { return mMoofs; }
+
+ private:
+  void ScanForMetadata(mozilla::MediaByteRange& aMoov);
+  nsTArray<Moof> mMoofs;
+  nsTArray<MediaByteRange> mMediaRanges;
+  nsTArray<TrackEndCts> mTracksEndCts;
+  bool mIsAudio;
+  uint64_t mLastDecodeTime;
+  // Either a ParseAllTracks if in multitrack mode, or an integer representing
+  // the track_id for the track being parsed. If parsing a specific track, mTrex
+  // should have an id matching mTrackParseMode.as<uint32_t>(). In this case 0
+  // is a valid track id -- this is not allowed in the spec, but such mp4s
+  // appear in the wild. In the ParseAllTracks case, mTrex can have an arbitrary
+  // id based on the tracks being parsed.
+  const TrackParseMode mTrackParseMode;
+};
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/mp4/ResourceStream.cpp b/dom/media/mp4/ResourceStream.cpp
new file mode 100644
index 0000000000..ce2fb6f2f6
--- /dev/null
+++ b/dom/media/mp4/ResourceStream.cpp
@@ -0,0 +1,56 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ResourceStream.h"
+
+namespace mozilla {
+
+ResourceStream::ResourceStream(mozilla::MediaResource* aResource)
+    : mResource(aResource), mPinCount(0) {
+  MOZ_ASSERT(aResource);
+  DDLINKCHILD("resource", &mResource);
+}
+
+ResourceStream::~ResourceStream() { MOZ_ASSERT(mPinCount == 0); }
+
+bool ResourceStream::ReadAt(int64_t aOffset, void* aBuffer, size_t aCount,
+                            size_t* aBytesRead) {
+  uint32_t sum = 0;
+  uint32_t bytesRead = 0;
+  do {
+    uint64_t offset = aOffset + sum;
+    char* buffer = reinterpret_cast<char*>(aBuffer) + sum;
+    uint32_t toRead = aCount - sum;
+    nsresult rv = mResource.ReadAt(offset, buffer, toRead, &bytesRead);
+    if (NS_FAILED(rv)) {
+      return false;
+    }
+    sum += bytesRead;
+  } while (sum < aCount && bytesRead > 0);
+
+  *aBytesRead = sum;
+  return true;
+}
+
+bool ResourceStream::CachedReadAt(int64_t aOffset, void* aBuffer, size_t aCount,
+                                  size_t* aBytesRead) {
+  nsresult rv = mResource.GetResource()->ReadFromCache(
+      reinterpret_cast<char*>(aBuffer), aOffset, aCount);
+  if (NS_FAILED(rv)) {
+    *aBytesRead = 0;
+    return false;
+  }
+  *aBytesRead = aCount;
+  return true;
+}
+
+bool ResourceStream::Length(int64_t* aSize) {
+  if (mResource.GetLength() < 0) return false;
+  *aSize = mResource.GetLength();
+  return true;
+}
+
+}  // namespace mozilla
diff --git a/dom/media/mp4/ResourceStream.h b/dom/media/mp4/ResourceStream.h
new file mode 100644
index 0000000000..1aa59fdaed
--- /dev/null
+++ b/dom/media/mp4/ResourceStream.h
@@ -0,0 +1,48 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef RESOURCESTREAM_H_
+#define RESOURCESTREAM_H_
+
+#include "MediaResource.h"
+#include "ByteStream.h"
+#include "mozilla/RefPtr.h"
+
+namespace mozilla {
+
+DDLoggedTypeDeclNameAndBase(ResourceStream, ByteStream);
+
+class ResourceStream : public ByteStream,
+                       public DecoderDoctorLifeLogger<ResourceStream> {
+ public:
+  explicit ResourceStream(mozilla::MediaResource* aResource);
+
+  virtual bool ReadAt(int64_t offset, void* aBuffer, size_t aCount,
+                      size_t* aBytesRead) override;
+  virtual bool CachedReadAt(int64_t aOffset, void* aBuffer, size_t aCount,
+                            size_t* aBytesRead) override;
+  virtual bool Length(int64_t* size) override;
+
+  void Pin() {
+    mResource.GetResource()->Pin();
+    ++mPinCount;
+  }
+
+  void Unpin() {
+    mResource.GetResource()->Unpin();
+    MOZ_ASSERT(mPinCount);
+    --mPinCount;
+  }
+
+ protected:
+  virtual ~ResourceStream();
+
+ private:
+  mozilla::MediaResourceIndex mResource;
+  uint32_t mPinCount;
+};
+
+}  // namespace mozilla
+
+#endif  // RESOURCESTREAM_H_
diff --git a/dom/media/mp4/SampleIterator.cpp b/dom/media/mp4/SampleIterator.cpp
new file mode 100644
index 0000000000..95fc8af457
--- /dev/null
+++ b/dom/media/mp4/SampleIterator.cpp
@@ -0,0 +1,712 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SampleIterator.h"
+
+#include <algorithm>
+#include <limits>
+
+#include "BufferReader.h"
+#include "mozilla/RefPtr.h"
+#include "MP4Interval.h"
+#include "MP4Metadata.h"
+#include "SinfParser.h"
+
+using namespace mozilla::media;
+
+namespace mozilla {
+
+class MOZ_STACK_CLASS RangeFinder {
+ public:
+  // Given that we're processing this in order we don't use a binary search
+  // to find the apropriate time range. Instead we search linearly from the
+  // last used point.
+  explicit RangeFinder(const MediaByteRangeSet& ranges)
+      : mRanges(ranges), mIndex(0) {
+    // Ranges must be normalised for this to work
+  }
+
+  bool Contains(const MediaByteRange& aByteRange);
+
+ private:
+  const MediaByteRangeSet& mRanges;
+  size_t mIndex;
+};
+
+bool RangeFinder::Contains(const MediaByteRange& aByteRange) {
+  if (mRanges.IsEmpty()) {
+    return false;
+  }
+
+  if (mRanges[mIndex].ContainsStrict(aByteRange)) {
+    return true;
+  }
+
+  if (aByteRange.mStart < mRanges[mIndex].mStart) {
+    // Search backwards
+    do {
+      if (!mIndex) {
+        return false;
+      }
+      --mIndex;
+      if (mRanges[mIndex].ContainsStrict(aByteRange)) {
+        return true;
+      }
+    } while (aByteRange.mStart < mRanges[mIndex].mStart);
+
+    return false;
+  }
+
+  while (aByteRange.mEnd > mRanges[mIndex].mEnd) {
+    if (mIndex == mRanges.Length() - 1) {
+      return false;
+    }
+    ++mIndex;
+    if (mRanges[mIndex].ContainsStrict(aByteRange)) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+SampleIterator::SampleIterator(MP4SampleIndex* aIndex)
+    : mIndex(aIndex), mCurrentMoof(0), mCurrentSample(0) {
+  mIndex->RegisterIterator(this);
+}
+
+SampleIterator::~SampleIterator() { mIndex->UnregisterIterator(this); }
+
+bool SampleIterator::HasNext() { return !!Get(); }
+
+already_AddRefed<MediaRawData> SampleIterator::GetNext() {
+  Sample* s(Get());
+  if (!s) {
+    return nullptr;
+  }
+
+  int64_t length = std::numeric_limits<int64_t>::max();
+  mIndex->mSource->Length(&length);
+  if (s->mByteRange.mEnd > length) {
+    // We don't have this complete sample.
+    return nullptr;
+  }
+
+  RefPtr<MediaRawData> sample = new MediaRawData();
+  sample->mTimecode = s->mDecodeTime;
+  sample->mTime = s->mCompositionRange.start;
+  sample->mDuration = s->mCompositionRange.Length();
+  sample->mOffset = s->mByteRange.mStart;
+  sample->mKeyframe = s->mSync;
+
+  UniquePtr<MediaRawDataWriter> writer(sample->CreateWriter());
+  // Do the blocking read
+  if (!writer->SetSize(s->mByteRange.Length())) {
+    return nullptr;
+  }
+
+  size_t bytesRead;
+  if (!mIndex->mSource->ReadAt(sample->mOffset, writer->Data(), sample->Size(),
+                               &bytesRead) ||
+      bytesRead != sample->Size()) {
+    return nullptr;
+  }
+
+  MoofParser* moofParser = mIndex->mMoofParser.get();
+  if (!moofParser) {
+    // File is not fragmented, we can't have crypto, just early return.
+    Next();
+    return sample.forget();
+  }
+
+  // We need to check if this moof has init data the CDM expects us to surface.
+  // This should happen when handling the first sample, even if that sample
+  // isn't encrypted (samples later in the moof may be).
+  if (mCurrentSample == 0) {
+    const nsTArray<Moof>& moofs = moofParser->Moofs();
+    const Moof* currentMoof = &moofs[mCurrentMoof];
+    if (!currentMoof->mPsshes.IsEmpty()) {
+      // This Moof contained crypto init data. Report that. We only report
+      // the init data on the Moof's first sample, to avoid reporting it more
+      // than once per Moof.
+      writer->mCrypto.mInitDatas.AppendElements(currentMoof->mPsshes);
+      writer->mCrypto.mInitDataType = u"cenc"_ns;
+    }
+  }
+
+  auto cryptoSchemeResult = GetEncryptionScheme();
+  if (cryptoSchemeResult.isErr()) {
+    // Log the error here in future.
+    return nullptr;
+  }
+  CryptoScheme cryptoScheme = cryptoSchemeResult.unwrap();
+  if (cryptoScheme == CryptoScheme::None) {
+    // No crypto to handle, early return.
+    Next();
+    return sample.forget();
+  }
+
+  writer->mCrypto.mCryptoScheme = cryptoScheme;
+  MOZ_ASSERT(writer->mCrypto.mCryptoScheme != CryptoScheme::None,
+             "Should have early returned if we don't have a crypto scheme!");
+  MOZ_ASSERT(writer->mCrypto.mKeyId.IsEmpty(),
+             "Sample should not already have a key ID");
+  MOZ_ASSERT(writer->mCrypto.mConstantIV.IsEmpty(),
+             "Sample should not already have a constant IV");
+  CencSampleEncryptionInfoEntry* sampleInfo = GetSampleEncryptionEntry();
+  if (sampleInfo) {
+    // Use sample group information if present, this supersedes track level
+    // information.
+    writer->mCrypto.mKeyId.AppendElements(sampleInfo->mKeyId);
+    writer->mCrypto.mIVSize = sampleInfo->mIVSize;
+    writer->mCrypto.mCryptByteBlock = sampleInfo->mCryptByteBlock;
+    writer->mCrypto.mSkipByteBlock = sampleInfo->mSkipByteBlock;
+    writer->mCrypto.mConstantIV.AppendElements(sampleInfo->mConsantIV);
+  } else {
+    // Use the crypto info from track metadata
+    writer->mCrypto.mKeyId.AppendElements(moofParser->mSinf.mDefaultKeyID, 16);
+    writer->mCrypto.mIVSize = moofParser->mSinf.mDefaultIVSize;
+    writer->mCrypto.mCryptByteBlock = moofParser->mSinf.mDefaultCryptByteBlock;
+    writer->mCrypto.mSkipByteBlock = moofParser->mSinf.mDefaultSkipByteBlock;
+    writer->mCrypto.mConstantIV.AppendElements(
+        moofParser->mSinf.mDefaultConstantIV);
+  }
+
+  if ((writer->mCrypto.mIVSize == 0 && writer->mCrypto.mConstantIV.IsEmpty()) ||
+      (writer->mCrypto.mIVSize != 0 && s->mCencRange.IsEmpty())) {
+    // If mIVSize == 0, this indicates that a constant IV is in use, thus we
+    // should have a non empty constant IV. Alternatively if IV size is non
+    // zero, we should have an IV for this sample, which we need to look up
+    // in mCencRange (which must then be non empty). If neither of these are
+    // true we have bad crypto data, so bail.
+    return nullptr;
+  }
+  // Parse auxiliary information if present
+  if (!s->mCencRange.IsEmpty()) {
+    // The size comes from an 8 bit field
+    AutoTArray<uint8_t, 256> cencAuxInfo;
+    cencAuxInfo.SetLength(s->mCencRange.Length());
+    if (!mIndex->mSource->ReadAt(s->mCencRange.mStart, cencAuxInfo.Elements(),
+                                 cencAuxInfo.Length(), &bytesRead) ||
+        bytesRead != cencAuxInfo.Length()) {
+      return nullptr;
+    }
+    BufferReader reader(cencAuxInfo);
+    if (!reader.ReadArray(writer->mCrypto.mIV, writer->mCrypto.mIVSize)) {
+      return nullptr;
+    }
+
+    // Parse the auxiliary information for subsample information
+    auto res = reader.ReadU16();
+    if (res.isOk() && res.unwrap() > 0) {
+      uint16_t count = res.unwrap();
+
+      if (reader.Remaining() < count * 6) {
+        return nullptr;
+      }
+
+      for (size_t i = 0; i < count; i++) {
+        auto res_16 = reader.ReadU16();
+        auto res_32 = reader.ReadU32();
+        if (res_16.isErr() || res_32.isErr()) {
+          return nullptr;
+        }
+        writer->mCrypto.mPlainSizes.AppendElement(res_16.unwrap());
+        writer->mCrypto.mEncryptedSizes.AppendElement(res_32.unwrap());
+      }
+    } else {
+      // No subsample information means the entire sample is encrypted.
+      writer->mCrypto.mPlainSizes.AppendElement(0);
+      writer->mCrypto.mEncryptedSizes.AppendElement(sample->Size());
+    }
+  }
+
+  Next();
+
+  return sample.forget();
+}
+
+SampleDescriptionEntry* SampleIterator::GetSampleDescriptionEntry() {
+  nsTArray<Moof>& moofs = mIndex->mMoofParser->Moofs();
+  Moof& currentMoof = moofs[mCurrentMoof];
+  uint32_t sampleDescriptionIndex =
+      currentMoof.mTfhd.mDefaultSampleDescriptionIndex;
+  // Mp4 indices start at 1, shift down 1 so we index our array correctly.
+  sampleDescriptionIndex--;
+  FallibleTArray<SampleDescriptionEntry>& sampleDescriptions =
+      mIndex->mMoofParser->mSampleDescriptions;
+  if (sampleDescriptionIndex >= sampleDescriptions.Length()) {
+    // The sample description index is invalid, the mp4 is malformed. Bail out.
+    return nullptr;
+  }
+  return &sampleDescriptions[sampleDescriptionIndex];
+}
+
+CencSampleEncryptionInfoEntry* SampleIterator::GetSampleEncryptionEntry() {
+  nsTArray<Moof>& moofs = mIndex->mMoofParser->Moofs();
+  Moof* currentMoof = &moofs[mCurrentMoof];
+  SampleToGroupEntry* sampleToGroupEntry = nullptr;
+
+  // Default to using the sample to group entries for the fragment, otherwise
+  // fall back to the sample to group entries for the track.
+  FallibleTArray<SampleToGroupEntry>* sampleToGroupEntries =
+      currentMoof->mFragmentSampleToGroupEntries.Length() != 0
+          ? &currentMoof->mFragmentSampleToGroupEntries
+          : &mIndex->mMoofParser->mTrackSampleToGroupEntries;
+
+  uint32_t seen = 0;
+
+  for (SampleToGroupEntry& entry : *sampleToGroupEntries) {
+    if (seen + entry.mSampleCount > mCurrentSample) {
+      sampleToGroupEntry = &entry;
+      break;
+    }
+    seen += entry.mSampleCount;
+  }
+
+  // ISO-14496-12 Section 8.9.2.3 and 8.9.4 : group description index
+  // (1) ranges from 1 to the number of sample group entries in the track
+  // level SampleGroupDescription Box, or (2) takes the value 0 to
+  // indicate that this sample is a member of no group, in this case, the
+  // sample is associated with the default values specified in
+  // TrackEncryption Box, or (3) starts at 0x10001, i.e. the index value
+  // 1, with the value 1 in the top 16 bits, to reference fragment-local
+  // SampleGroupDescription Box.
+
+  // According to the spec, ISO-14496-12, the sum of the sample counts in this
+  // box should be equal to the total number of samples, and, if less, the
+  // reader should behave as if an extra SampleToGroupEntry existed, with
+  // groupDescriptionIndex 0.
+
+  if (!sampleToGroupEntry || sampleToGroupEntry->mGroupDescriptionIndex == 0) {
+    return nullptr;
+  }
+
+  FallibleTArray<CencSampleEncryptionInfoEntry>* entries =
+      &mIndex->mMoofParser->mTrackSampleEncryptionInfoEntries;
+
+  uint32_t groupIndex = sampleToGroupEntry->mGroupDescriptionIndex;
+
+  // If the first bit is set to a one, then we should use the sample group
+  // descriptions from the fragment.
+  if (groupIndex > SampleToGroupEntry::kFragmentGroupDescriptionIndexBase) {
+    groupIndex -= SampleToGroupEntry::kFragmentGroupDescriptionIndexBase;
+    entries = &currentMoof->mFragmentSampleEncryptionInfoEntries;
+  }
+
+  // The group_index is one based.
+  return groupIndex > entries->Length() ? nullptr
+                                        : &entries->ElementAt(groupIndex - 1);
+}
+
+Result<CryptoScheme, nsCString> SampleIterator::GetEncryptionScheme() {
+  // See ISO/IEC 23001-7 for information on the metadata being checked.
+  MoofParser* moofParser = mIndex->mMoofParser.get();
+  if (!moofParser) {
+    // This mp4 isn't fragmented so it can't be encrypted.
+    return CryptoScheme::None;
+  }
+
+  SampleDescriptionEntry* sampleDescriptionEntry = GetSampleDescriptionEntry();
+  if (!sampleDescriptionEntry) {
+    // For the file to be valid the tfhd must reference a sample description
+    // entry.
+    // If we encounter this error often, we may consider using the first
+    // sample description entry if the index is out of bounds.
+    return mozilla::Err(nsLiteralCString(
+        "Could not determine encryption scheme due to bad index for sample "
+        "description entry."));
+  }
+
+  if (!sampleDescriptionEntry->mIsEncryptedEntry) {
+    return CryptoScheme::None;
+  }
+
+  if (!moofParser->mSinf.IsValid()) {
+    // The sample description entry says this sample is encrypted, but we
+    // don't have a valid sinf box. This shouldn't happen as the sinf box is
+    // part of the sample description entry. Suggests a malformed file, bail.
+    return mozilla::Err(nsLiteralCString(
+        "Could not determine encryption scheme. Sample description entry "
+        "indicates encryption, but could not find associated sinf box."));
+  }
+
+  CencSampleEncryptionInfoEntry* sampleInfo = GetSampleEncryptionEntry();
+  if (sampleInfo && !sampleInfo->mIsEncrypted) {
+    // May not have sample encryption info, but if we do, it should match other
+    // metadata.
+    return mozilla::Err(nsLiteralCString(
+        "Could not determine encryption scheme. Sample description entry "
+        "indicates encryption, but sample encryption entry indicates sample is "
+        "not encrypted. These should be consistent."));
+  }
+
+  if (moofParser->mSinf.mDefaultEncryptionType == AtomType("cenc")) {
+    return CryptoScheme::Cenc;
+  } else if (moofParser->mSinf.mDefaultEncryptionType == AtomType("cbcs")) {
+    return CryptoScheme::Cbcs;
+  }
+  return mozilla::Err(nsLiteralCString(
+      "Could not determine encryption scheme. Sample description entry "
+      "reports sample is encrypted, but no scheme, or an unsupported scheme "
+      "is in use."));
+}
+
+Sample* SampleIterator::Get() {
+  if (!mIndex->mMoofParser) {
+    MOZ_ASSERT(!mCurrentMoof);
+    return mCurrentSample < mIndex->mIndex.Length()
+               ? &mIndex->mIndex[mCurrentSample]
+               : nullptr;
+  }
+
+  nsTArray<Moof>& moofs = mIndex->mMoofParser->Moofs();
+  while (true) {
+    if (mCurrentMoof == moofs.Length()) {
+      if (!mIndex->mMoofParser->BlockingReadNextMoof()) {
+        return nullptr;
+      }
+      MOZ_ASSERT(mCurrentMoof < moofs.Length());
+    }
+    if (mCurrentSample < moofs[mCurrentMoof].mIndex.Length()) {
+      break;
+    }
+    mCurrentSample = 0;
+    ++mCurrentMoof;
+  }
+  return &moofs[mCurrentMoof].mIndex[mCurrentSample];
+}
+
+void SampleIterator::Next() { ++mCurrentSample; }
+
+void SampleIterator::Seek(const TimeUnit& aTime) {
+  size_t syncMoof = 0;
+  size_t syncSample = 0;
+  mCurrentMoof = 0;
+  mCurrentSample = 0;
+  Sample* sample;
+  while (!!(sample = Get())) {
+    if (sample->mCompositionRange.start > aTime) {
+      break;
+    }
+    if (sample->mSync) {
+      syncMoof = mCurrentMoof;
+      syncSample = mCurrentSample;
+    }
+    if (sample->mCompositionRange.start == aTime) {
+      break;
+    }
+    Next();
+  }
+  mCurrentMoof = syncMoof;
+  mCurrentSample = syncSample;
+}
+
+TimeUnit SampleIterator::GetNextKeyframeTime() {
+  SampleIterator itr(*this);
+  Sample* sample;
+  while (!!(sample = itr.Get())) {
+    if (sample->mSync) {
+      return sample->mCompositionRange.start;
+    }
+    itr.Next();
+  }
+  return TimeUnit::Invalid();
+}
+
+MP4SampleIndex::MP4SampleIndex(const IndiceWrapper& aIndices,
+                               ByteStream* aSource, uint32_t aTrackId,
+                               bool aIsAudio, uint32_t aTimeScale)
+    : mSource(aSource), mIsAudio(aIsAudio) {
+  if (!aIndices.Length()) {
+    mMoofParser =
+        MakeUnique<MoofParser>(aSource, AsVariant(aTrackId), aIsAudio);
+  } else {
+    if (!mIndex.SetCapacity(aIndices.Length(), fallible)) {
+      // OOM.
+      return;
+    }
+    media::IntervalSet<TimeUnit> intervalTime;
+    MediaByteRange intervalRange;
+    bool haveSync = false;
+    bool progressive = true;
+    int64_t lastOffset = 0;
+    for (size_t i = 0; i < aIndices.Length(); i++) {
+      Indice indice{};
+      int64_t timescale =
+          mMoofParser ? AssertedCast<int64_t>(mMoofParser->mMvhd.mTimescale)
+                      : aTimeScale;
+      if (!aIndices.GetIndice(i, indice)) {
+        // Out of index?
+        return;
+      }
+      if (indice.sync || mIsAudio) {
+        haveSync = true;
+      }
+      if (!haveSync) {
+        continue;
+      }
+      Sample sample;
+      sample.mByteRange =
+          MediaByteRange(indice.start_offset, indice.end_offset);
+      sample.mCompositionRange = MP4Interval<media::TimeUnit>(
+          TimeUnit(indice.start_composition, timescale),
+          TimeUnit(indice.end_composition, timescale));
+      sample.mDecodeTime = TimeUnit(indice.start_decode, timescale);
+      sample.mSync = indice.sync || mIsAudio;
+      // FIXME: Make this infallible after bug 968520 is done.
+      MOZ_ALWAYS_TRUE(mIndex.AppendElement(sample, fallible));
+      if (indice.start_offset < lastOffset) {
+        NS_WARNING("Chunks in MP4 out of order, expect slow down");
+        progressive = false;
+      }
+      lastOffset = indice.end_offset;
+
+      // Pack audio samples in group of 128.
+      if (sample.mSync && progressive && (!mIsAudio || !(i % 128))) {
+        if (mDataOffset.Length()) {
+          auto& last = mDataOffset.LastElement();
+          last.mEndOffset = intervalRange.mEnd;
+          NS_ASSERTION(intervalTime.Length() == 1,
+                       "Discontinuous samples between keyframes");
+          last.mTime.start = intervalTime.GetStart();
+          last.mTime.end = intervalTime.GetEnd();
+        }
+        if (!mDataOffset.AppendElement(
+                MP4DataOffset(mIndex.Length() - 1, indice.start_offset),
+                fallible)) {
+          // OOM.
+          return;
+        }
+        intervalTime = media::IntervalSet<TimeUnit>();
+        intervalRange = MediaByteRange();
+      }
+      intervalTime += media::Interval<TimeUnit>(sample.mCompositionRange.start,
+                                                sample.mCompositionRange.end);
+      intervalRange = intervalRange.Span(sample.mByteRange);
+    }
+
+    if (mDataOffset.Length() && progressive) {
+      Indice indice;
+      if (!aIndices.GetIndice(aIndices.Length() - 1, indice)) {
+        return;
+      }
+      auto& last = mDataOffset.LastElement();
+      last.mEndOffset = indice.end_offset;
+      last.mTime =
+          MP4Interval<TimeUnit>(intervalTime.GetStart(), intervalTime.GetEnd());
+    } else {
+      mDataOffset.Clear();
+    }
+  }
+}
+
+MP4SampleIndex::~MP4SampleIndex() = default;
+
+void MP4SampleIndex::UpdateMoofIndex(const MediaByteRangeSet& aByteRanges) {
+  UpdateMoofIndex(aByteRanges, false);
+}
+
+void MP4SampleIndex::UpdateMoofIndex(const MediaByteRangeSet& aByteRanges,
+                                     bool aCanEvict) {
+  if (!mMoofParser) {
+    return;
+  }
+  size_t moofs = mMoofParser->Moofs().Length();
+  bool canEvict = aCanEvict && moofs > 1;
+  if (canEvict) {
+    // Check that we can trim the mMoofParser. We can only do so if all
+    // iterators have demuxed all possible samples.
+    for (const SampleIterator* iterator : mIterators) {
+      if ((iterator->mCurrentSample == 0 && iterator->mCurrentMoof == moofs) ||
+          iterator->mCurrentMoof == moofs - 1) {
+        continue;
+      }
+      canEvict = false;
+      break;
+    }
+  }
+  mMoofParser->RebuildFragmentedIndex(aByteRanges, &canEvict);
+  if (canEvict) {
+    // The moofparser got trimmed. Adjust all registered iterators.
+    for (SampleIterator* iterator : mIterators) {
+      iterator->mCurrentMoof -= moofs - 1;
+    }
+  }
+}
+
+TimeUnit MP4SampleIndex::GetEndCompositionIfBuffered(
+    const MediaByteRangeSet& aByteRanges) {
+  FallibleTArray<Sample>* index;
+  if (mMoofParser) {
+    int64_t base = mMoofParser->mMdhd.mTimescale;
+    if (!mMoofParser->ReachedEnd() || mMoofParser->Moofs().IsEmpty()) {
+      return TimeUnit::Zero(base);
+    }
+    index = &mMoofParser->Moofs().LastElement().mIndex;
+  } else {
+    index = &mIndex;
+  }
+
+  int64_t base = mMoofParser->mMdhd.mTimescale;
+  media::TimeUnit lastComposition = TimeUnit::Zero(base);
+  RangeFinder rangeFinder(aByteRanges);
+  for (size_t i = index->Length(); i--;) {
+    const Sample& sample = (*index)[i];
+    if (!rangeFinder.Contains(sample.mByteRange)) {
+      return TimeUnit::Zero(base);
+    }
+    lastComposition = std::max(lastComposition, sample.mCompositionRange.end);
+    if (sample.mSync) {
+      return lastComposition;
+    }
+  }
+  return TimeUnit::Zero(base);
+}
+
+TimeIntervals MP4SampleIndex::ConvertByteRangesToTimeRanges(
+    const MediaByteRangeSet& aByteRanges) {
+  if (aByteRanges == mLastCachedRanges) {
+    return mLastBufferedRanges;
+  }
+  mLastCachedRanges = aByteRanges;
+
+  if (mDataOffset.Length()) {
+    TimeIntervals timeRanges;
+    for (const auto& range : aByteRanges) {
+      uint32_t start = mDataOffset.IndexOfFirstElementGt(range.mStart - 1);
+      if (!mIsAudio && start == mDataOffset.Length()) {
+        continue;
+      }
+      uint32_t end = mDataOffset.IndexOfFirstElementGt(
+          range.mEnd, MP4DataOffset::EndOffsetComparator());
+      if (!mIsAudio && end < start) {
+        continue;
+      }
+      if (mIsAudio && start &&
+          range.Intersects(MediaByteRange(mDataOffset[start - 1].mStartOffset,
+                                          mDataOffset[start - 1].mEndOffset))) {
+        // Check if previous audio data block contains some available samples.
+        for (size_t i = mDataOffset[start - 1].mIndex; i < mIndex.Length();
+             i++) {
+          if (range.ContainsStrict(mIndex[i].mByteRange)) {
+            timeRanges += TimeInterval(mIndex[i].mCompositionRange.start,
+                                       mIndex[i].mCompositionRange.end);
+          }
+        }
+      }
+      if (end > start) {
+        for (uint32_t i = start; i < end; i++) {
+          timeRanges += TimeInterval(mDataOffset[i].mTime.start,
+                                     mDataOffset[i].mTime.end);
+        }
+      }
+      if (end < mDataOffset.Length()) {
+        // Find samples in partial block contained in the byte range.
+        for (size_t i = mDataOffset[end].mIndex;
+             i < mIndex.Length() && range.ContainsStrict(mIndex[i].mByteRange);
+             i++) {
+          timeRanges += TimeInterval(mIndex[i].mCompositionRange.start,
+                                     mIndex[i].mCompositionRange.end);
+        }
+      }
+    }
+    mLastBufferedRanges = timeRanges;
+    return timeRanges;
+  }
+
+  RangeFinder rangeFinder(aByteRanges);
+  nsTArray<MP4Interval<media::TimeUnit>> timeRanges;
+  nsTArray<FallibleTArray<Sample>*> indexes;
+  if (mMoofParser) {
+    // We take the index out of the moof parser and move it into a local
+    // variable so we don't get concurrency issues. It gets freed when we
+    // exit this function.
+    for (int i = 0; i < mMoofParser->Moofs().Length(); i++) {
+      Moof& moof = mMoofParser->Moofs()[i];
+
+      // We need the entire moof in order to play anything
+      if (rangeFinder.Contains(moof.mRange)) {
+        if (rangeFinder.Contains(moof.mMdatRange)) {
+          MP4Interval<media::TimeUnit>::SemiNormalAppend(timeRanges,
+                                                         moof.mTimeRange);
+        } else {
+          indexes.AppendElement(&moof.mIndex);
+        }
+      }
+    }
+  } else {
+    indexes.AppendElement(&mIndex);
+  }
+
+  bool hasSync = false;
+  for (size_t i = 0; i < indexes.Length(); i++) {
+    FallibleTArray<Sample>* index = indexes[i];
+    for (size_t j = 0; j < index->Length(); j++) {
+      const Sample& sample = (*index)[j];
+      if (!rangeFinder.Contains(sample.mByteRange)) {
+        // We process the index in decode order so we clear hasSync when we hit
+        // a range that isn't buffered.
+        hasSync = false;
+        continue;
+      }
+
+      hasSync |= sample.mSync;
+      if (!hasSync) {
+        continue;
+      }
+
+      MP4Interval<media::TimeUnit>::SemiNormalAppend(timeRanges,
+                                                     sample.mCompositionRange);
+    }
+  }
+
+  // This fixes up when the compositon order differs from the byte range order
+  nsTArray<MP4Interval<TimeUnit>> timeRangesNormalized;
+  MP4Interval<media::TimeUnit>::Normalize(timeRanges, &timeRangesNormalized);
+  // convert timeRanges.
+  media::TimeIntervals ranges;
+  for (size_t i = 0; i < timeRangesNormalized.Length(); i++) {
+    ranges += media::TimeInterval(timeRangesNormalized[i].start,
+                                  timeRangesNormalized[i].end);
+  }
+  mLastBufferedRanges = ranges;
+  return ranges;
+}
+
+uint64_t MP4SampleIndex::GetEvictionOffset(const TimeUnit& aTime) {
+  uint64_t offset = std::numeric_limits<uint64_t>::max();
+  if (mMoofParser) {
+    // We need to keep the whole moof if we're keeping any of it because the
+    // parser doesn't keep parsed moofs.
+    for (int i = 0; i < mMoofParser->Moofs().Length(); i++) {
+      Moof& moof = mMoofParser->Moofs()[i];
+
+      if (!moof.mTimeRange.Length().IsZero() && moof.mTimeRange.end > aTime) {
+        offset = std::min(offset, uint64_t(std::min(moof.mRange.mStart,
+                                                    moof.mMdatRange.mStart)));
+      }
+    }
+  } else {
+    // We've already parsed and stored the moov so we don't need to keep it.
+    // All we need to keep is the sample data itself.
+    for (size_t i = 0; i < mIndex.Length(); i++) {
+      const Sample& sample = mIndex[i];
+      if (aTime >= sample.mCompositionRange.end) {
+        offset = std::min(offset, uint64_t(sample.mByteRange.mEnd));
+      }
+    }
+  }
+  return offset;
+}
+
+void MP4SampleIndex::RegisterIterator(SampleIterator* aIterator) {
+  mIterators.AppendElement(aIterator);
+}
+
+void MP4SampleIndex::UnregisterIterator(SampleIterator* aIterator) {
+  mIterators.RemoveElement(aIterator);
+}
+
+}  // namespace mozilla
diff --git a/dom/media/mp4/SampleIterator.h b/dom/media/mp4/SampleIterator.h
new file mode 100644
index 0000000000..61b60df6af
--- /dev/null
+++ b/dom/media/mp4/SampleIterator.h
@@ -0,0 +1,134 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef DOM_MEDIA_MP4_SAMPLE_ITERATOR_H_
+#define DOM_MEDIA_MP4_SAMPLE_ITERATOR_H_
+
+#include "ByteStream.h"
+#include "MediaData.h"
+#include "MediaResource.h"
+#include "MoofParser.h"
+#include "mozilla/ResultVariant.h"
+#include "MP4Interval.h"
+#include "nsISupportsImpl.h"
+#include "TimeUnits.h"
+
+namespace mozilla {
+
+struct CencSampleEncryptionInfoEntry;
+class IndiceWrapper;
+class MP4SampleIndex;
+struct Sample;
+
+class SampleIterator {
+ public:
+  explicit SampleIterator(MP4SampleIndex* aIndex);
+  ~SampleIterator();
+  bool HasNext();
+  already_AddRefed<mozilla::MediaRawData> GetNext();
+  void Seek(const media::TimeUnit& aTime);
+  media::TimeUnit GetNextKeyframeTime();
+
+ private:
+  Sample* Get();
+
+  // Gets the sample description entry for the current moof, or nullptr if
+  // called without a valid current moof.
+  SampleDescriptionEntry* GetSampleDescriptionEntry();
+  CencSampleEncryptionInfoEntry* GetSampleEncryptionEntry();
+
+  // Determines the encryption scheme in use for the current sample. If the
+  // the scheme cannot be unambiguously determined, will return an error with
+  // the reason.
+  //
+  // Returns: Ok(CryptoScheme) if a crypto scheme, including None, can be
+  // determined, or Err(nsCString) if there is an issue determining the scheme.
+  Result<CryptoScheme, nsCString> GetEncryptionScheme();
+
+  void Next();
+  RefPtr<MP4SampleIndex> mIndex;
+  friend class MP4SampleIndex;
+  size_t mCurrentMoof;
+  size_t mCurrentSample;
+};
+
+class MP4SampleIndex {
+ public:
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MP4SampleIndex)
+
+  struct Indice {
+    uint64_t start_offset;
+    uint64_t end_offset;
+    int64_t start_composition;
+    int64_t end_composition;
+    int64_t start_decode;
+    bool sync;
+  };
+
+  struct MP4DataOffset {
+    MP4DataOffset(uint32_t aIndex, int64_t aStartOffset)
+        : mIndex(aIndex), mStartOffset(aStartOffset), mEndOffset(0) {}
+
+    bool operator==(int64_t aStartOffset) const {
+      return mStartOffset == aStartOffset;
+    }
+
+    bool operator!=(int64_t aStartOffset) const {
+      return mStartOffset != aStartOffset;
+    }
+
+    bool operator<(int64_t aStartOffset) const {
+      return mStartOffset < aStartOffset;
+    }
+
+    struct EndOffsetComparator {
+      bool Equals(const MP4DataOffset& a, const int64_t& b) const {
+        return a.mEndOffset == b;
+      }
+
+      bool LessThan(const MP4DataOffset& a, const int64_t& b) const {
+        return a.mEndOffset < b;
+      }
+    };
+
+    uint32_t mIndex;
+    int64_t mStartOffset;
+    int64_t mEndOffset;
+    MP4Interval<media::TimeUnit> mTime;
+  };
+
+  MP4SampleIndex(const mozilla::IndiceWrapper& aIndices, ByteStream* aSource,
+                 uint32_t aTrackId, bool aIsAudio, uint32_t aTimeScale);
+
+  void UpdateMoofIndex(const mozilla::MediaByteRangeSet& aByteRanges,
+                       bool aCanEvict);
+  void UpdateMoofIndex(const mozilla::MediaByteRangeSet& aByteRanges);
+  media::TimeUnit GetEndCompositionIfBuffered(
+      const mozilla::MediaByteRangeSet& aByteRanges);
+  mozilla::media::TimeIntervals ConvertByteRangesToTimeRanges(
+      const mozilla::MediaByteRangeSet& aByteRanges);
+  uint64_t GetEvictionOffset(const media::TimeUnit& aTime);
+  bool IsFragmented() { return !!mMoofParser; }
+
+  friend class SampleIterator;
+
+ private:
+  ~MP4SampleIndex();
+  void RegisterIterator(SampleIterator* aIterator);
+  void UnregisterIterator(SampleIterator* aIterator);
+
+  ByteStream* mSource;
+  FallibleTArray<Sample> mIndex;
+  FallibleTArray<MP4DataOffset> mDataOffset;
+  UniquePtr<MoofParser> mMoofParser;
+  nsTArray<SampleIterator*> mIterators;
+
+  // ConvertByteRangesToTimeRanges cache
+  mozilla::MediaByteRangeSet mLastCachedRanges;
+  mozilla::media::TimeIntervals mLastBufferedRanges;
+  bool mIsAudio;
+};
+}  // namespace mozilla
+
+#endif
diff --git a/dom/media/mp4/SinfParser.cpp b/dom/media/mp4/SinfParser.cpp
new file mode 100644
index 0000000000..4ea14adaaa
--- /dev/null
+++ b/dom/media/mp4/SinfParser.cpp
@@ -0,0 +1,95 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/Unused.h"
+#include "SinfParser.h"
+#include "AtomType.h"
+#include "Box.h"
+#include "ByteStream.h"
+
+namespace mozilla {
+
+Sinf::Sinf(Box& aBox) : mDefaultIVSize(0), mDefaultEncryptionType() {
+  SinfParser parser(aBox);
+  if (parser.GetSinf().IsValid()) {
+    *this = parser.GetSinf();
+  }
+}
+
+SinfParser::SinfParser(Box& aBox) {
+  for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+    if (box.IsType("schm")) {
+      mozilla::Unused << ParseSchm(box);
+    } else if (box.IsType("schi")) {
+      mozilla::Unused << ParseSchi(box);
+    }
+  }
+}
+
+Result<Ok, nsresult> SinfParser::ParseSchm(Box& aBox) {
+  BoxReader reader(aBox);
+
+  if (reader->Remaining() < 8) {
+    return Err(NS_ERROR_FAILURE);
+  }
+
+  MOZ_TRY(reader->ReadU32());  // flags -- ignore
+  MOZ_TRY_VAR(mSinf.mDefaultEncryptionType, reader->ReadU32());
+  return Ok();
+}
+
+Result<Ok, nsresult> SinfParser::ParseSchi(Box& aBox) {
+  for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+    if (box.IsType("tenc") && ParseTenc(box).isErr()) {
+      return Err(NS_ERROR_FAILURE);
+    }
+  }
+  return Ok();
+}
+
+Result<Ok, nsresult> SinfParser::ParseTenc(Box& aBox) {
+  BoxReader reader(aBox);
+
+  if (reader->Remaining() < 24) {
+    return Err(NS_ERROR_FAILURE);
+  }
+
+  uint32_t flags;
+  MOZ_TRY_VAR(flags, reader->ReadU32());
+  uint8_t version = flags >> 24;
+
+  // Skip reserved byte
+  MOZ_TRY(reader->ReadU8());
+  if (version >= 1) {
+    uint8_t pattern;
+    MOZ_TRY_VAR(pattern, reader->ReadU8());
+    mSinf.mDefaultCryptByteBlock = pattern >> 4;
+    mSinf.mDefaultSkipByteBlock = pattern & 0x0f;
+  } else {
+    // Reserved if version is less than 1
+    MOZ_TRY(reader->ReadU8());
+    mSinf.mDefaultCryptByteBlock = 0;
+    mSinf.mDefaultSkipByteBlock = 0;
+  }
+
+  uint8_t isEncrypted;
+  MOZ_TRY_VAR(isEncrypted, reader->ReadU8());
+  MOZ_TRY_VAR(mSinf.mDefaultIVSize, reader->ReadU8());
+  memcpy(mSinf.mDefaultKeyID, reader->Read(16), 16);
+
+  if (isEncrypted && mSinf.mDefaultIVSize == 0) {
+    uint8_t defaultConstantIVSize;
+    MOZ_TRY_VAR(defaultConstantIVSize, reader->ReadU8());
+    if (!mSinf.mDefaultConstantIV.SetLength(defaultConstantIVSize,
+                                            mozilla::fallible)) {
+      return Err(NS_ERROR_FAILURE);
+    }
+    for (uint8_t i = 0; i < defaultConstantIVSize; i++) {
+      MOZ_TRY_VAR(mSinf.mDefaultConstantIV.ElementAt(i), reader->ReadU8());
+    }
+  }
+  return Ok();
+}
+
+}  // namespace mozilla
diff --git a/dom/media/mp4/SinfParser.h b/dom/media/mp4/SinfParser.h
new file mode 100644
index 0000000000..084892854c
--- /dev/null
+++ b/dom/media/mp4/SinfParser.h
@@ -0,0 +1,56 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef SINF_PARSER_H_
+#define SINF_PARSER_H_
+
+#include "mozilla/ResultExtensions.h"
+#include "Atom.h"
+#include "AtomType.h"
+#include "nsTArray.h"
+
+namespace mozilla {
+
+class Box;
+
+class Sinf : public Atom {
+ public:
+  Sinf()
+      : mDefaultIVSize(0),
+        mDefaultEncryptionType(),
+        mDefaultCryptByteBlock(0),
+        mDefaultSkipByteBlock(0) {}
+  explicit Sinf(Box& aBox);
+
+  bool IsValid() override {
+    return !!mDefaultEncryptionType &&  // Should have an encryption scheme
+           (mDefaultIVSize > 0 ||       // and either a default IV size
+            mDefaultConstantIV.Length() > 0);  // or a constant IV.
+  }
+
+  uint8_t mDefaultIVSize;
+  AtomType mDefaultEncryptionType;
+  uint8_t mDefaultKeyID[16];
+  uint8_t mDefaultCryptByteBlock;
+  uint8_t mDefaultSkipByteBlock;
+  CopyableTArray<uint8_t> mDefaultConstantIV;
+};
+
+class SinfParser {
+ public:
+  explicit SinfParser(Box& aBox);
+
+  Sinf& GetSinf() { return mSinf; }
+
+ private:
+  Result<Ok, nsresult> ParseSchm(Box& aBox);
+  Result<Ok, nsresult> ParseSchi(Box& aBox);
+  Result<Ok, nsresult> ParseTenc(Box& aBox);
+
+  Sinf mSinf;
+};
+
+}  // namespace mozilla
+
+#endif  // SINF_PARSER_H_
diff --git a/dom/media/mp4/moz.build b/dom/media/mp4/moz.build
new file mode 100644
index 0000000000..48fce2a040
--- /dev/null
+++ b/dom/media/mp4/moz.build
@@ -0,0 +1,45 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+EXPORTS += [
+    "Atom.h",
+    "AtomType.h",
+    "Box.h",
+    "BufferStream.h",
+    "ByteStream.h",
+    "DecoderData.h",
+    "MoofParser.h",
+    "MP4Decoder.h",
+    "MP4Demuxer.h",
+    "MP4Interval.h",
+    "MP4Metadata.h",
+    "ResourceStream.h",
+    "SampleIterator.h",
+    "SinfParser.h",
+]
+
+UNIFIED_SOURCES += [
+    "Box.cpp",
+    "BufferStream.cpp",
+    "DecoderData.cpp",
+    "MoofParser.cpp",
+    "MP4Decoder.cpp",
+    "MP4Demuxer.cpp",
+    "MP4Metadata.cpp",
+    "ResourceStream.cpp",
+    "SampleIterator.cpp",
+    "SinfParser.cpp",
+]
+
+FINAL_LIBRARY = "xul"
+
+# Suppress warnings for now.
+CXXFLAGS += [
+    "-Wno-sign-compare",
+]
+
+# Add libFuzzer configuration directives
+include("/tools/fuzzing/libfuzzer-config.mozbuild")
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 01:47:29 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 01:47:29 +0000
commit	0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d (patch)
tree	a31f07c9bcca9d56ce61e9a1ffd30ef350d513aa /dom/media/mp4
parent	Initial commit. (diff)
download	firefox-esr-0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d.tar.xz firefox-esr-0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d.zip