summaryrefslogtreecommitdiffstats
path: root/dom/media/mp4
diff options
context:
space:
mode:
Diffstat (limited to 'dom/media/mp4')
-rw-r--r--dom/media/mp4/Atom.h21
-rw-r--r--dom/media/mp4/AtomType.h29
-rw-r--r--dom/media/mp4/Box.cpp230
-rw-r--r--dom/media/mp4/Box.h100
-rw-r--r--dom/media/mp4/BufferStream.cpp59
-rw-r--r--dom/media/mp4/BufferStream.h45
-rw-r--r--dom/media/mp4/ByteStream.h41
-rw-r--r--dom/media/mp4/DecoderData.cpp291
-rw-r--r--dom/media/mp4/DecoderData.h74
-rw-r--r--dom/media/mp4/Index.cpp707
-rw-r--r--dom/media/mp4/Index.h135
-rw-r--r--dom/media/mp4/MP4Decoder.cpp222
-rw-r--r--dom/media/mp4/MP4Decoder.h52
-rw-r--r--dom/media/mp4/MP4Demuxer.cpp557
-rw-r--r--dom/media/mp4/MP4Demuxer.h52
-rw-r--r--dom/media/mp4/MP4Interval.h131
-rw-r--r--dom/media/mp4/MP4Metadata.cpp478
-rw-r--r--dom/media/mp4/MP4Metadata.h116
-rw-r--r--dom/media/mp4/MoofParser.cpp1276
-rw-r--r--dom/media/mp4/MoofParser.h364
-rw-r--r--dom/media/mp4/ResourceStream.cpp56
-rw-r--r--dom/media/mp4/ResourceStream.h48
-rw-r--r--dom/media/mp4/SinfParser.cpp95
-rw-r--r--dom/media/mp4/SinfParser.h56
-rw-r--r--dom/media/mp4/moz.build45
25 files changed, 5280 insertions, 0 deletions
diff --git a/dom/media/mp4/Atom.h b/dom/media/mp4/Atom.h
new file mode 100644
index 0000000000..f008dfe148
--- /dev/null
+++ b/dom/media/mp4/Atom.h
@@ -0,0 +1,21 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ATOM_H_
+#define ATOM_H_
+
+namespace mozilla {
+
+class Atom {
+ public:
+ Atom() : mValid(false) {}
+ virtual bool IsValid() { return mValid; }
+
+ protected:
+ bool mValid;
+};
+
+} // namespace mozilla
+
+#endif // ATOM_H_
diff --git a/dom/media/mp4/AtomType.h b/dom/media/mp4/AtomType.h
new file mode 100644
index 0000000000..dcecde845d
--- /dev/null
+++ b/dom/media/mp4/AtomType.h
@@ -0,0 +1,29 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ATOM_TYPE_H_
+#define ATOM_TYPE_H_
+
+#include <stdint.h>
+#include "mozilla/EndianUtils.h"
+
+namespace mozilla {
+
+class AtomType {
+ public:
+ AtomType() : mType(0) {}
+ MOZ_IMPLICIT AtomType(uint32_t aType) : mType(aType) {}
+ MOZ_IMPLICIT AtomType(const char* aType)
+ : mType(BigEndian::readUint32(aType)) {}
+ bool operator==(const AtomType& aType) const { return mType == aType.mType; }
+ bool operator!() const { return !mType; }
+
+ private:
+ uint32_t mType;
+};
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/mp4/Box.cpp b/dom/media/mp4/Box.cpp
new file mode 100644
index 0000000000..334ba3e3f8
--- /dev/null
+++ b/dom/media/mp4/Box.cpp
@@ -0,0 +1,230 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "Box.h"
+#include "ByteStream.h"
+#include "mozilla/EndianUtils.h"
+#include "mozilla/Unused.h"
+#include <algorithm>
+
+namespace mozilla {
+
+// Limit reads to 32MiB max.
+// static
+const uint64_t Box::kMAX_BOX_READ = 32 * 1024 * 1024;
+
+// Returns the offset from the start of the body of a box of type |aType|
+// to the start of its first child.
+static uint32_t BoxOffset(AtomType aType) {
+ const uint32_t FULLBOX_OFFSET = 4;
+
+ if (aType == AtomType("mp4a") || aType == AtomType("enca")) {
+ // AudioSampleEntry; ISO 14496-12, section 8.16
+ return 28;
+ } else if (aType == AtomType("mp4v") || aType == AtomType("encv")) {
+ // VideoSampleEntry; ISO 14496-12, section 8.16
+ return 78;
+ } else if (aType == AtomType("stsd")) {
+ // SampleDescriptionBox; ISO 14496-12, section 8.16
+ // This is a FullBox, and contains a |count| member before its child
+ // boxes.
+ return FULLBOX_OFFSET + 4;
+ }
+
+ return 0;
+}
+
+Box::Box(BoxContext* aContext, uint64_t aOffset, const Box* aParent)
+ : mContext(aContext), mParent(aParent) {
+ uint8_t header[8];
+
+ if (aOffset > INT64_MAX - sizeof(header)) {
+ return;
+ }
+
+ MediaByteRange headerRange(aOffset, aOffset + sizeof(header));
+ if (mParent && !mParent->mRange.Contains(headerRange)) {
+ return;
+ }
+
+ const MediaByteRange* byteRange;
+ for (int i = 0;; i++) {
+ if (i == mContext->mByteRanges.Length()) {
+ return;
+ }
+
+ byteRange = static_cast<const MediaByteRange*>(&mContext->mByteRanges[i]);
+ if (byteRange->Contains(headerRange)) {
+ break;
+ }
+ }
+
+ size_t bytes;
+ if (!mContext->mSource->CachedReadAt(aOffset, header, sizeof(header),
+ &bytes) ||
+ bytes != sizeof(header)) {
+ return;
+ }
+
+ uint64_t size = BigEndian::readUint32(header);
+ if (size == 1) {
+ uint8_t bigLength[8];
+ if (aOffset > INT64_MAX - sizeof(header) - sizeof(bigLength)) {
+ return;
+ }
+ MediaByteRange bigLengthRange(headerRange.mEnd,
+ headerRange.mEnd + sizeof(bigLength));
+ if ((mParent && !mParent->mRange.Contains(bigLengthRange)) ||
+ !byteRange->Contains(bigLengthRange) ||
+ !mContext->mSource->CachedReadAt(aOffset + sizeof(header), bigLength,
+ sizeof(bigLength), &bytes) ||
+ bytes != sizeof(bigLength)) {
+ return;
+ }
+ size = BigEndian::readUint64(bigLength);
+ mBodyOffset = bigLengthRange.mEnd;
+ } else if (size == 0) {
+ // box extends to end of file.
+ size = mContext->mByteRanges.LastInterval().mEnd - aOffset;
+ mBodyOffset = headerRange.mEnd;
+ } else {
+ mBodyOffset = headerRange.mEnd;
+ }
+
+ if (size > INT64_MAX) {
+ return;
+ }
+ int64_t end = static_cast<int64_t>(aOffset) + static_cast<int64_t>(size);
+ if (end < static_cast<int64_t>(aOffset)) {
+ // Overflowed.
+ return;
+ }
+
+ mType = BigEndian::readUint32(&header[4]);
+ mChildOffset = mBodyOffset + BoxOffset(mType);
+
+ MediaByteRange boxRange(aOffset, end);
+ if (mChildOffset > boxRange.mEnd ||
+ (mParent && !mParent->mRange.Contains(boxRange)) ||
+ !byteRange->Contains(boxRange)) {
+ return;
+ }
+
+ mRange = boxRange;
+}
+
+Box::Box()
+ : mContext(nullptr), mBodyOffset(0), mChildOffset(0), mParent(nullptr) {}
+
+Box Box::Next() const {
+ MOZ_ASSERT(IsAvailable());
+ return Box(mContext, mRange.mEnd, mParent);
+}
+
+Box Box::FirstChild() const {
+ MOZ_ASSERT(IsAvailable());
+ if (mChildOffset == mRange.mEnd) {
+ return Box();
+ }
+ return Box(mContext, mChildOffset, this);
+}
+
+nsTArray<uint8_t> Box::ReadCompleteBox() const {
+ const size_t length = mRange.mEnd - mRange.mStart;
+ nsTArray<uint8_t> out(length);
+ out.SetLength(length);
+ size_t bytesRead = 0;
+ if (!mContext->mSource->CachedReadAt(mRange.mStart, out.Elements(), length,
+ &bytesRead) ||
+ bytesRead != length) {
+ // Byte ranges are being reported incorrectly
+ NS_WARNING("Read failed in mozilla::Box::ReadCompleteBox()");
+ return nsTArray<uint8_t>(0);
+ }
+ return out;
+}
+
+nsTArray<uint8_t> Box::Read() const {
+ nsTArray<uint8_t> out;
+ Unused << Read(&out, mRange);
+ return out;
+}
+
+bool Box::Read(nsTArray<uint8_t>* aDest, const MediaByteRange& aRange) const {
+ int64_t length;
+ if (!mContext->mSource->Length(&length)) {
+ // The HTTP server didn't give us a length to work with.
+ // Limit the read to kMAX_BOX_READ max.
+ length = std::min(aRange.mEnd - mChildOffset, kMAX_BOX_READ);
+ } else {
+ length = aRange.mEnd - mChildOffset;
+ }
+ aDest->SetLength(length);
+ size_t bytes;
+ if (!mContext->mSource->CachedReadAt(mChildOffset, aDest->Elements(),
+ aDest->Length(), &bytes) ||
+ bytes != aDest->Length()) {
+ // Byte ranges are being reported incorrectly
+ NS_WARNING("Read failed in mozilla::Box::Read()");
+ aDest->Clear();
+ return false;
+ }
+ return true;
+}
+
+ByteSlice Box::ReadAsSlice() {
+ if (!mContext || mRange.IsEmpty()) {
+ return ByteSlice{nullptr, 0};
+ }
+
+ int64_t length;
+ if (!mContext->mSource->Length(&length)) {
+ // The HTTP server didn't give us a length to work with.
+ // Limit the read to kMAX_BOX_READ max.
+ length = std::min(mRange.mEnd - mChildOffset, kMAX_BOX_READ);
+ } else {
+ length = mRange.mEnd - mChildOffset;
+ }
+
+ const uint8_t* data =
+ mContext->mSource->GetContiguousAccess(mChildOffset, length);
+ if (data) {
+ // We can direct access the underlying storage of the ByteStream.
+ return ByteSlice{data, size_t(length)};
+ }
+
+ uint8_t* p = mContext->mAllocator.Allocate(size_t(length));
+ size_t bytes;
+ if (!mContext->mSource->CachedReadAt(mChildOffset, p, length, &bytes) ||
+ bytes != length) {
+ // Byte ranges are being reported incorrectly
+ NS_WARNING("Read failed in mozilla::Box::ReadAsSlice()");
+ return ByteSlice{nullptr, 0};
+ }
+ return ByteSlice{p, size_t(length)};
+}
+
+const size_t BLOCK_CAPACITY = 16 * 1024;
+
+uint8_t* BumpAllocator::Allocate(size_t aNumBytes) {
+ if (aNumBytes > BLOCK_CAPACITY) {
+ mBuffers.AppendElement(nsTArray<uint8_t>(aNumBytes));
+ mBuffers.LastElement().SetLength(aNumBytes);
+ return mBuffers.LastElement().Elements();
+ }
+ for (nsTArray<uint8_t>& buffer : mBuffers) {
+ if (buffer.Length() + aNumBytes < BLOCK_CAPACITY) {
+ size_t offset = buffer.Length();
+ buffer.SetLength(buffer.Length() + aNumBytes);
+ return buffer.Elements() + offset;
+ }
+ }
+ mBuffers.AppendElement(nsTArray<uint8_t>(BLOCK_CAPACITY));
+ mBuffers.LastElement().SetLength(aNumBytes);
+ return mBuffers.LastElement().Elements();
+}
+
+} // namespace mozilla
diff --git a/dom/media/mp4/Box.h b/dom/media/mp4/Box.h
new file mode 100644
index 0000000000..e63bfbcc90
--- /dev/null
+++ b/dom/media/mp4/Box.h
@@ -0,0 +1,100 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BOX_H_
+#define BOX_H_
+
+#include <stdint.h>
+#include "nsTArray.h"
+#include "MediaResource.h"
+#include "mozilla/EndianUtils.h"
+#include "AtomType.h"
+#include "BufferReader.h"
+
+namespace mozilla {
+class ByteStream;
+
+class BumpAllocator {
+ public:
+ uint8_t* Allocate(size_t aNumBytes);
+
+ private:
+ nsTArray<nsTArray<uint8_t>> mBuffers;
+};
+
+class BoxContext {
+ public:
+ BoxContext(ByteStream* aSource, const MediaByteRangeSet& aByteRanges)
+ : mSource(aSource), mByteRanges(aByteRanges) {}
+
+ RefPtr<ByteStream> mSource;
+ const MediaByteRangeSet& mByteRanges;
+ BumpAllocator mAllocator;
+};
+
+struct ByteSlice {
+ const uint8_t* mBytes;
+ size_t mSize;
+};
+
+class Box {
+ public:
+ Box(BoxContext* aContext, uint64_t aOffset, const Box* aParent = nullptr);
+ Box();
+
+ bool IsAvailable() const { return !mRange.IsEmpty(); }
+ uint64_t Offset() const { return mRange.mStart; }
+ uint64_t Length() const { return mRange.mEnd - mRange.mStart; }
+ uint64_t NextOffset() const { return mRange.mEnd; }
+ const MediaByteRange& Range() const { return mRange; }
+ const Box* Parent() const { return mParent; }
+ bool IsType(const char* aType) const { return mType == AtomType(aType); }
+
+ Box Next() const;
+ Box FirstChild() const;
+ // Reads the box contents, excluding the header.
+ nsTArray<uint8_t> Read() const;
+
+ // Reads the complete box; its header and body.
+ nsTArray<uint8_t> ReadCompleteBox() const;
+
+ // Reads from the content of the box, excluding header.
+ bool Read(nsTArray<uint8_t>* aDest, const MediaByteRange& aRange) const;
+
+ static const uint64_t kMAX_BOX_READ;
+
+ // Returns a slice, pointing to the data of this box. The lifetime of
+ // the memory this slice points to matches the box's context's lifetime.
+ ByteSlice ReadAsSlice();
+
+ private:
+ bool Contains(MediaByteRange aRange) const;
+ BoxContext* mContext;
+ mozilla::MediaByteRange mRange;
+ uint64_t mBodyOffset;
+ uint64_t mChildOffset;
+ AtomType mType;
+ const Box* mParent;
+};
+
+// BoxReader serves box data through an AutoByteReader. The box data is
+// stored either in the box's context's bump allocator, or in the ByteStream
+// itself if the ByteStream implements the Access() method.
+// NOTE: The data the BoxReader reads may be stored in the Box's BoxContext.
+// Ensure that the BoxReader doesn't outlive the BoxContext!
+class MOZ_RAII BoxReader {
+ public:
+ explicit BoxReader(Box& aBox)
+ : mData(aBox.ReadAsSlice()), mReader(mData.mBytes, mData.mSize) {}
+ BufferReader* operator->() { return &mReader; }
+
+ private:
+ ByteSlice mData;
+ BufferReader mReader;
+};
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/mp4/BufferStream.cpp b/dom/media/mp4/BufferStream.cpp
new file mode 100644
index 0000000000..c2fa40cb8a
--- /dev/null
+++ b/dom/media/mp4/BufferStream.cpp
@@ -0,0 +1,59 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "BufferStream.h"
+#include "MediaData.h"
+#include "MediaResource.h"
+#include <algorithm>
+
+namespace mozilla {
+
+BufferStream::BufferStream()
+ : mStartOffset(0), mData(new mozilla::MediaByteBuffer) {}
+
+BufferStream::BufferStream(mozilla::MediaByteBuffer* aBuffer)
+ : mStartOffset(0), mData(aBuffer) {}
+
+BufferStream::~BufferStream() = default;
+
+/*virtual*/
+bool BufferStream::ReadAt(int64_t aOffset, void* aData, size_t aLength,
+ size_t* aBytesRead) {
+ if (aOffset < mStartOffset || aOffset > mStartOffset + mData->Length()) {
+ return false;
+ }
+ *aBytesRead =
+ std::min(aLength, size_t(mStartOffset + mData->Length() - aOffset));
+ memcpy(aData, mData->Elements() + aOffset - mStartOffset, *aBytesRead);
+ return true;
+}
+
+/*virtual*/
+bool BufferStream::CachedReadAt(int64_t aOffset, void* aData, size_t aLength,
+ size_t* aBytesRead) {
+ return ReadAt(aOffset, aData, aLength, aBytesRead);
+}
+
+/*virtual*/
+bool BufferStream::Length(int64_t* aLength) {
+ *aLength = mStartOffset + mData->Length();
+ return true;
+}
+
+/* virtual */
+void BufferStream::DiscardBefore(int64_t aOffset) {
+ if (aOffset > mStartOffset) {
+ mData->RemoveElementsAt(0, aOffset - mStartOffset);
+ mStartOffset = aOffset;
+ }
+}
+
+bool BufferStream::AppendBytes(const uint8_t* aData, size_t aLength) {
+ return mData->AppendElements(aData, aLength, fallible);
+}
+
+MediaByteRange BufferStream::GetByteRange() {
+ return MediaByteRange(mStartOffset, mStartOffset + mData->Length());
+}
+} // namespace mozilla
diff --git a/dom/media/mp4/BufferStream.h b/dom/media/mp4/BufferStream.h
new file mode 100644
index 0000000000..fb817b5916
--- /dev/null
+++ b/dom/media/mp4/BufferStream.h
@@ -0,0 +1,45 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BUFFER_STREAM_H_
+#define BUFFER_STREAM_H_
+
+#include "ByteStream.h"
+#include "nsTArray.h"
+#include "MediaResource.h"
+
+namespace mozilla {
+class MediaByteBuffer;
+
+DDLoggedTypeDeclNameAndBase(BufferStream, ByteStream);
+
+class BufferStream : public ByteStream,
+ public mozilla::DecoderDoctorLifeLogger<BufferStream> {
+ public:
+ /* BufferStream does not take ownership of aData nor does it make a copy.
+ * Therefore BufferStream shouldn't get used after aData is destroyed.
+ */
+ BufferStream();
+ explicit BufferStream(mozilla::MediaByteBuffer* aBuffer);
+
+ virtual bool ReadAt(int64_t aOffset, void* aData, size_t aLength,
+ size_t* aBytesRead) override;
+ virtual bool CachedReadAt(int64_t aOffset, void* aData, size_t aLength,
+ size_t* aBytesRead) override;
+ virtual bool Length(int64_t* aLength) override;
+
+ virtual void DiscardBefore(int64_t aOffset) override;
+
+ bool AppendBytes(const uint8_t* aData, size_t aLength);
+
+ mozilla::MediaByteRange GetByteRange();
+
+ private:
+ ~BufferStream();
+ int64_t mStartOffset;
+ RefPtr<mozilla::MediaByteBuffer> mData;
+};
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/mp4/ByteStream.h b/dom/media/mp4/ByteStream.h
new file mode 100644
index 0000000000..0f733dfb97
--- /dev/null
+++ b/dom/media/mp4/ByteStream.h
@@ -0,0 +1,41 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef STREAM_H_
+#define STREAM_H_
+
+#include "DecoderDoctorLogger.h"
+#include "nsISupportsImpl.h"
+
+namespace mozilla {
+
+DDLoggedTypeDeclName(ByteStream);
+
+class ByteStream : public DecoderDoctorLifeLogger<ByteStream> {
+ public:
+ NS_INLINE_DECL_THREADSAFE_REFCOUNTING(ByteStream);
+
+ virtual bool ReadAt(int64_t offset, void* data, size_t size,
+ size_t* bytes_read) = 0;
+ virtual bool CachedReadAt(int64_t offset, void* data, size_t size,
+ size_t* bytes_read) = 0;
+ virtual bool Length(int64_t* size) = 0;
+
+ virtual void DiscardBefore(int64_t offset) {}
+
+ // If this ByteStream's underlying storage of media is in-memory, this
+ // function returns a pointer to the in-memory storage of data at offset.
+ // Note that even if a ByteStream stores data in memory, it may not be
+ // stored contiguously, in which case this returns nullptr.
+ virtual const uint8_t* GetContiguousAccess(int64_t aOffset, size_t aSize) {
+ return nullptr;
+ }
+
+ protected:
+ virtual ~ByteStream() = default;
+};
+
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/mp4/DecoderData.cpp b/dom/media/mp4/DecoderData.cpp
new file mode 100644
index 0000000000..b1b503cb90
--- /dev/null
+++ b/dom/media/mp4/DecoderData.cpp
@@ -0,0 +1,291 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "Adts.h"
+#include "AnnexB.h"
+#include "BufferReader.h"
+#include "DecoderData.h"
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/EndianUtils.h"
+#include "mozilla/Telemetry.h"
+#include "VideoUtils.h"
+
+// OpusDecoder header is really needed only by MP4 in rust
+#include "OpusDecoder.h"
+#include "mp4parse.h"
+
+using mozilla::media::TimeUnit;
+
+namespace mozilla {
+
+mozilla::Result<mozilla::Ok, nsresult> CryptoFile::DoUpdate(
+ const uint8_t* aData, size_t aLength) {
+ BufferReader reader(aData, aLength);
+ while (reader.Remaining()) {
+ PsshInfo psshInfo;
+ if (!reader.ReadArray(psshInfo.uuid, 16)) {
+ return mozilla::Err(NS_ERROR_FAILURE);
+ }
+
+ if (!reader.CanReadType<uint32_t>()) {
+ return mozilla::Err(NS_ERROR_FAILURE);
+ }
+ auto length = reader.ReadType<uint32_t>();
+
+ if (!reader.ReadArray(psshInfo.data, length)) {
+ return mozilla::Err(NS_ERROR_FAILURE);
+ }
+ pssh.AppendElement(std::move(psshInfo));
+ }
+ return mozilla::Ok();
+}
+
+static MediaResult UpdateTrackProtectedInfo(mozilla::TrackInfo& aConfig,
+ const Mp4parseSinfInfo& aSinf) {
+ if (aSinf.is_encrypted != 0) {
+ if (aSinf.scheme_type == MP4_PARSE_ENCRYPTION_SCHEME_TYPE_CENC) {
+ aConfig.mCrypto.mCryptoScheme = CryptoScheme::Cenc;
+ } else if (aSinf.scheme_type == MP4_PARSE_ENCRYPTION_SCHEME_TYPE_CBCS) {
+ aConfig.mCrypto.mCryptoScheme = CryptoScheme::Cbcs;
+ } else {
+ // Unsupported encryption type;
+ return MediaResult(
+ NS_ERROR_DOM_MEDIA_METADATA_ERR,
+ RESULT_DETAIL(
+ "Unsupported encryption scheme encountered aSinf.scheme_type=%d",
+ static_cast<int>(aSinf.scheme_type)));
+ }
+ aConfig.mCrypto.mIVSize = aSinf.iv_size;
+ aConfig.mCrypto.mKeyId.AppendElements(aSinf.kid.data, aSinf.kid.length);
+ aConfig.mCrypto.mCryptByteBlock = aSinf.crypt_byte_block;
+ aConfig.mCrypto.mSkipByteBlock = aSinf.skip_byte_block;
+ aConfig.mCrypto.mConstantIV.AppendElements(aSinf.constant_iv.data,
+ aSinf.constant_iv.length);
+ }
+ return NS_OK;
+}
+
+// Verify various information shared by Mp4ParseTrackAudioInfo and
+// Mp4ParseTrackVideoInfo and record telemetry on that info. Returns an
+// appropriate MediaResult indicating if the info is valid or not.
+// This verifies:
+// - That we have a sample_info_count > 0 (valid tracks should have at least one
+// sample description entry)
+// - That only a single codec is used across all sample infos, as we don't
+// handle multiple.
+// - If more than one sample information structures contain crypto info. This
+// case is not fatal (we don't return an error), but does record telemetry
+// to help judge if we need more handling in gecko for multiple crypto.
+//
+// Telemetry is also recorded on the above. As of writing, the
+// telemetry is recorded to give us early warning if MP4s exist that we're not
+// handling. Note, if adding new checks and telemetry to this function,
+// telemetry should be recorded before returning to ensure it is gathered.
+template <typename Mp4ParseTrackAudioOrVideoInfo>
+static MediaResult VerifyAudioOrVideoInfoAndRecordTelemetry(
+ Mp4ParseTrackAudioOrVideoInfo* audioOrVideoInfo) {
+ Telemetry::Accumulate(
+ Telemetry::MEDIA_MP4_PARSE_NUM_SAMPLE_DESCRIPTION_ENTRIES,
+ audioOrVideoInfo->sample_info_count);
+
+ bool hasMultipleCodecs = false;
+ uint32_t cryptoCount = 0;
+ Mp4parseCodec codecType = audioOrVideoInfo->sample_info[0].codec_type;
+ for (uint32_t i = 0; i < audioOrVideoInfo->sample_info_count; i++) {
+ if (audioOrVideoInfo->sample_info[0].codec_type != codecType) {
+ hasMultipleCodecs = true;
+ }
+
+ // Update our encryption info if any is present on the sample info.
+ if (audioOrVideoInfo->sample_info[i].protected_data.is_encrypted) {
+ cryptoCount += 1;
+ }
+ }
+
+ Telemetry::Accumulate(
+ Telemetry::
+ MEDIA_MP4_PARSE_SAMPLE_DESCRIPTION_ENTRIES_HAVE_MULTIPLE_CODECS,
+ hasMultipleCodecs);
+
+ // Accumulate if we have multiple (2 or more) crypto entries.
+ // TODO(1715283): rework this to count number of crypto entries + gather
+ // richer data.
+ Telemetry::Accumulate(
+ Telemetry::
+ MEDIA_MP4_PARSE_SAMPLE_DESCRIPTION_ENTRIES_HAVE_MULTIPLE_CRYPTO,
+ cryptoCount >= 2);
+
+ if (audioOrVideoInfo->sample_info_count == 0) {
+ return MediaResult(
+ NS_ERROR_DOM_MEDIA_METADATA_ERR,
+ RESULT_DETAIL("Got 0 sample info while verifying track."));
+ }
+
+ if (hasMultipleCodecs) {
+ // Different codecs in a single track. We don't handle this.
+ return MediaResult(
+ NS_ERROR_DOM_MEDIA_METADATA_ERR,
+ RESULT_DETAIL("Multiple codecs encountered while verifying track."));
+ }
+
+ return NS_OK;
+}
+
+MediaResult MP4AudioInfo::Update(const Mp4parseTrackInfo* track,
+ const Mp4parseTrackAudioInfo* audio) {
+ auto rv = VerifyAudioOrVideoInfoAndRecordTelemetry(audio);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ Mp4parseCodec codecType = audio->sample_info[0].codec_type;
+ for (uint32_t i = 0; i < audio->sample_info_count; i++) {
+ if (audio->sample_info[i].protected_data.is_encrypted) {
+ auto rv =
+ UpdateTrackProtectedInfo(*this, audio->sample_info[i].protected_data);
+ NS_ENSURE_SUCCESS(rv, rv);
+ break;
+ }
+ }
+
+ // We assume that the members of the first sample info are representative of
+ // the entire track. This code will need to be updated should this assumption
+ // ever not hold. E.g. if we need to handle different codecs in a single
+ // track, or if we have different numbers or channels in a single track.
+ Mp4parseByteData mp4ParseSampleCodecSpecific =
+ audio->sample_info[0].codec_specific_config;
+ Mp4parseByteData extraData = audio->sample_info[0].extra_data;
+ MOZ_ASSERT(mCodecSpecificConfig.is<NoCodecSpecificData>(),
+ "Should have no codec specific data yet");
+ if (codecType == MP4PARSE_CODEC_OPUS) {
+ mMimeType = "audio/opus"_ns;
+ OpusCodecSpecificData opusCodecSpecificData{};
+ // The Opus decoder expects the container's codec delay or
+ // pre-skip value, in microseconds, as a 64-bit int at the
+ // start of the codec-specific config blob.
+ if (mp4ParseSampleCodecSpecific.data &&
+ mp4ParseSampleCodecSpecific.length >= 12) {
+ uint16_t preskip = mozilla::LittleEndian::readUint16(
+ mp4ParseSampleCodecSpecific.data + 10);
+ opusCodecSpecificData.mContainerCodecDelayMicroSeconds =
+ mozilla::FramesToUsecs(preskip, 48000).value();
+ } else {
+ // This file will error later as it will be rejected by the opus decoder.
+ opusCodecSpecificData.mContainerCodecDelayMicroSeconds = 0;
+ }
+ opusCodecSpecificData.mHeadersBinaryBlob->AppendElements(
+ mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length);
+ mCodecSpecificConfig =
+ AudioCodecSpecificVariant{std::move(opusCodecSpecificData)};
+ } else if (codecType == MP4PARSE_CODEC_AAC) {
+ mMimeType = "audio/mp4a-latm"_ns;
+ AacCodecSpecificData aacCodecSpecificData{};
+ // codec specific data is used to store the DecoderConfigDescriptor.
+ aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob->AppendElements(
+ mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length);
+ // extra data stores the ES_Descriptor.
+ aacCodecSpecificData.mEsDescriptorBinaryBlob->AppendElements(
+ extraData.data, extraData.length);
+ mCodecSpecificConfig =
+ AudioCodecSpecificVariant{std::move(aacCodecSpecificData)};
+ } else if (codecType == MP4PARSE_CODEC_FLAC) {
+ MOZ_ASSERT(extraData.length == 0,
+ "FLAC doesn't expect extra data so doesn't handle it!");
+ mMimeType = "audio/flac"_ns;
+ FlacCodecSpecificData flacCodecSpecificData{};
+ flacCodecSpecificData.mStreamInfoBinaryBlob->AppendElements(
+ mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length);
+ mCodecSpecificConfig =
+ AudioCodecSpecificVariant{std::move(flacCodecSpecificData)};
+ } else if (codecType == MP4PARSE_CODEC_MP3) {
+ // mp3 in mp4 can contain ES_Descriptor info (it also has a flash in mp4
+ // specific box, which the rust parser recognizes). However, we don't
+ // handle any such data here.
+ mMimeType = "audio/mpeg"_ns;
+ // TODO(bug 1705812): parse the encoder delay values from the mp4.
+ mCodecSpecificConfig = AudioCodecSpecificVariant{Mp3CodecSpecificData{}};
+ }
+
+ mRate = audio->sample_info[0].sample_rate;
+ mChannels = audio->sample_info[0].channels;
+ mBitDepth = audio->sample_info[0].bit_depth;
+ mExtendedProfile = audio->sample_info[0].extended_profile;
+ mDuration = TimeUnit::FromMicroseconds(track->duration);
+ mMediaTime = TimeUnit::FromMicroseconds(track->media_time);
+ mTrackId = track->track_id;
+
+ // In stagefright, mProfile is kKeyAACProfile, mExtendedProfile is kKeyAACAOT.
+ if (audio->sample_info[0].profile <= 4) {
+ mProfile = audio->sample_info[0].profile;
+ }
+
+ if (mCodecSpecificConfig.is<NoCodecSpecificData>()) {
+ // Handle codecs that are not explicitly handled above.
+ MOZ_ASSERT(
+ extraData.length == 0,
+ "Codecs that use extra data should be explicitly handled already");
+ AudioCodecSpecificBinaryBlob codecSpecificBinaryBlob;
+ // No codec specific metadata set, use the generic form.
+ codecSpecificBinaryBlob.mBinaryBlob->AppendElements(
+ mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length);
+ mCodecSpecificConfig =
+ AudioCodecSpecificVariant{std::move(codecSpecificBinaryBlob)};
+ }
+
+ return NS_OK;
+}
+
+bool MP4AudioInfo::IsValid() const {
+ return mChannels > 0 && mRate > 0 &&
+ // Accept any mime type here, but if it's aac, validate the profile.
+ (!mMimeType.EqualsLiteral("audio/mp4a-latm") || mProfile > 0 ||
+ mExtendedProfile > 0);
+}
+
+MediaResult MP4VideoInfo::Update(const Mp4parseTrackInfo* track,
+ const Mp4parseTrackVideoInfo* video) {
+ auto rv = VerifyAudioOrVideoInfoAndRecordTelemetry(video);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ Mp4parseCodec codecType = video->sample_info[0].codec_type;
+ for (uint32_t i = 0; i < video->sample_info_count; i++) {
+ if (video->sample_info[i].protected_data.is_encrypted) {
+ auto rv =
+ UpdateTrackProtectedInfo(*this, video->sample_info[i].protected_data);
+ NS_ENSURE_SUCCESS(rv, rv);
+ break;
+ }
+ }
+
+ // We assume that the members of the first sample info are representative of
+ // the entire track. This code will need to be updated should this assumption
+ // ever not hold. E.g. if we need to handle different codecs in a single
+ // track, or if we have different numbers or channels in a single track.
+ if (codecType == MP4PARSE_CODEC_AVC) {
+ mMimeType = "video/avc"_ns;
+ } else if (codecType == MP4PARSE_CODEC_VP9) {
+ mMimeType = "video/vp9"_ns;
+ } else if (codecType == MP4PARSE_CODEC_AV1) {
+ mMimeType = "video/av1"_ns;
+ } else if (codecType == MP4PARSE_CODEC_MP4V) {
+ mMimeType = "video/mp4v-es"_ns;
+ }
+ mTrackId = track->track_id;
+ mDuration = TimeUnit::FromMicroseconds(track->duration);
+ mMediaTime = TimeUnit::FromMicroseconds(track->media_time);
+ mDisplay.width = video->display_width;
+ mDisplay.height = video->display_height;
+ mImage.width = video->sample_info[0].image_width;
+ mImage.height = video->sample_info[0].image_height;
+ mRotation = ToSupportedRotation(video->rotation);
+ Mp4parseByteData extraData = video->sample_info[0].extra_data;
+ // If length is 0 we append nothing
+ mExtraData->AppendElements(extraData.data, extraData.length);
+ return NS_OK;
+}
+
+bool MP4VideoInfo::IsValid() const {
+ return (mDisplay.width > 0 && mDisplay.height > 0) ||
+ (mImage.width > 0 && mImage.height > 0);
+}
+
+} // namespace mozilla
diff --git a/dom/media/mp4/DecoderData.h b/dom/media/mp4/DecoderData.h
new file mode 100644
index 0000000000..5509ba9a29
--- /dev/null
+++ b/dom/media/mp4/DecoderData.h
@@ -0,0 +1,74 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef DECODER_DATA_H_
+#define DECODER_DATA_H_
+
+#include "MediaInfo.h"
+#include "MediaResult.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/Result.h"
+#include "mozilla/Types.h"
+#include "mozilla/Vector.h"
+#include "nsString.h"
+#include "nsTArray.h"
+#include "mp4parse.h"
+
+namespace mozilla {
+
+class MP4Demuxer;
+
+struct PsshInfo {
+ PsshInfo() = default;
+ PsshInfo(const PsshInfo& aOther) = delete;
+ PsshInfo(PsshInfo&& aOther) = default;
+
+ nsTArray<uint8_t> uuid;
+ nsTArray<uint8_t> data;
+
+ bool operator==(const PsshInfo& aOther) const {
+ return uuid == aOther.uuid && data == aOther.data;
+ }
+};
+
+class CryptoFile {
+ public:
+ CryptoFile() : valid(false) {}
+ CryptoFile(const CryptoFile& aCryptoFile) = delete;
+
+ void Update(const uint8_t* aData, size_t aLength) {
+ valid = DoUpdate(aData, aLength).isOk();
+ }
+
+ bool valid;
+ nsTArray<PsshInfo> pssh;
+
+ private:
+ mozilla::Result<mozilla::Ok, nsresult> DoUpdate(const uint8_t* aData,
+ size_t aLength);
+};
+
+class MP4AudioInfo : public mozilla::AudioInfo {
+ public:
+ MP4AudioInfo() = default;
+
+ MediaResult Update(const Mp4parseTrackInfo* track,
+ const Mp4parseTrackAudioInfo* audio);
+
+ virtual bool IsValid() const override;
+};
+
+class MP4VideoInfo : public mozilla::VideoInfo {
+ public:
+ MP4VideoInfo() = default;
+
+ MediaResult Update(const Mp4parseTrackInfo* track,
+ const Mp4parseTrackVideoInfo* video);
+
+ virtual bool IsValid() const override;
+};
+
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/mp4/Index.cpp b/dom/media/mp4/Index.cpp
new file mode 100644
index 0000000000..978f960a2d
--- /dev/null
+++ b/dom/media/mp4/Index.cpp
@@ -0,0 +1,707 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "Index.h"
+
+#include <algorithm>
+#include <limits>
+
+#include "BufferReader.h"
+#include "mozilla/RefPtr.h"
+#include "MP4Interval.h"
+#include "MP4Metadata.h"
+#include "SinfParser.h"
+
+using namespace mozilla::media;
+
+namespace mozilla {
+
+class MOZ_STACK_CLASS RangeFinder {
+ public:
+ // Given that we're processing this in order we don't use a binary search
+ // to find the apropriate time range. Instead we search linearly from the
+ // last used point.
+ explicit RangeFinder(const MediaByteRangeSet& ranges)
+ : mRanges(ranges), mIndex(0) {
+ // Ranges must be normalised for this to work
+ }
+
+ bool Contains(MediaByteRange aByteRange);
+
+ private:
+ const MediaByteRangeSet& mRanges;
+ size_t mIndex;
+};
+
+bool RangeFinder::Contains(MediaByteRange aByteRange) {
+ if (mRanges.IsEmpty()) {
+ return false;
+ }
+
+ if (mRanges[mIndex].ContainsStrict(aByteRange)) {
+ return true;
+ }
+
+ if (aByteRange.mStart < mRanges[mIndex].mStart) {
+ // Search backwards
+ do {
+ if (!mIndex) {
+ return false;
+ }
+ --mIndex;
+ if (mRanges[mIndex].ContainsStrict(aByteRange)) {
+ return true;
+ }
+ } while (aByteRange.mStart < mRanges[mIndex].mStart);
+
+ return false;
+ }
+
+ while (aByteRange.mEnd > mRanges[mIndex].mEnd) {
+ if (mIndex == mRanges.Length() - 1) {
+ return false;
+ }
+ ++mIndex;
+ if (mRanges[mIndex].ContainsStrict(aByteRange)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+SampleIterator::SampleIterator(Index* aIndex)
+ : mIndex(aIndex), mCurrentMoof(0), mCurrentSample(0) {
+ mIndex->RegisterIterator(this);
+}
+
+SampleIterator::~SampleIterator() { mIndex->UnregisterIterator(this); }
+
+already_AddRefed<MediaRawData> SampleIterator::GetNext() {
+ Sample* s(Get());
+ if (!s) {
+ return nullptr;
+ }
+
+ int64_t length = std::numeric_limits<int64_t>::max();
+ mIndex->mSource->Length(&length);
+ if (s->mByteRange.mEnd > length) {
+ // We don't have this complete sample.
+ return nullptr;
+ }
+
+ RefPtr<MediaRawData> sample = new MediaRawData();
+ sample->mTimecode = TimeUnit::FromMicroseconds(s->mDecodeTime);
+ sample->mTime = TimeUnit::FromMicroseconds(s->mCompositionRange.start);
+ sample->mDuration = TimeUnit::FromMicroseconds(s->mCompositionRange.Length());
+ sample->mOffset = s->mByteRange.mStart;
+ sample->mKeyframe = s->mSync;
+
+ UniquePtr<MediaRawDataWriter> writer(sample->CreateWriter());
+ // Do the blocking read
+ if (!writer->SetSize(s->mByteRange.Length())) {
+ return nullptr;
+ }
+
+ size_t bytesRead;
+ if (!mIndex->mSource->ReadAt(sample->mOffset, writer->Data(), sample->Size(),
+ &bytesRead) ||
+ bytesRead != sample->Size()) {
+ return nullptr;
+ }
+
+ MoofParser* moofParser = mIndex->mMoofParser.get();
+ if (!moofParser) {
+ // File is not fragmented, we can't have crypto, just early return.
+ Next();
+ return sample.forget();
+ }
+
+ // We need to check if this moof has init data the CDM expects us to surface.
+ // This should happen when handling the first sample, even if that sample
+ // isn't encrypted (samples later in the moof may be).
+ if (mCurrentSample == 0) {
+ const nsTArray<Moof>& moofs = moofParser->Moofs();
+ const Moof* currentMoof = &moofs[mCurrentMoof];
+ if (!currentMoof->mPsshes.IsEmpty()) {
+ // This Moof contained crypto init data. Report that. We only report
+ // the init data on the Moof's first sample, to avoid reporting it more
+ // than once per Moof.
+ writer->mCrypto.mInitDatas.AppendElements(currentMoof->mPsshes);
+ writer->mCrypto.mInitDataType = u"cenc"_ns;
+ }
+ }
+
+ auto cryptoSchemeResult = GetEncryptionScheme();
+ if (cryptoSchemeResult.isErr()) {
+ // Log the error here in future.
+ return nullptr;
+ }
+ CryptoScheme cryptoScheme = cryptoSchemeResult.unwrap();
+ if (cryptoScheme == CryptoScheme::None) {
+ // No crypto to handle, early return.
+ Next();
+ return sample.forget();
+ }
+
+ writer->mCrypto.mCryptoScheme = cryptoScheme;
+ MOZ_ASSERT(writer->mCrypto.mCryptoScheme != CryptoScheme::None,
+ "Should have early returned if we don't have a crypto scheme!");
+ MOZ_ASSERT(writer->mCrypto.mKeyId.IsEmpty(),
+ "Sample should not already have a key ID");
+ MOZ_ASSERT(writer->mCrypto.mConstantIV.IsEmpty(),
+ "Sample should not already have a constant IV");
+ CencSampleEncryptionInfoEntry* sampleInfo = GetSampleEncryptionEntry();
+ if (sampleInfo) {
+ // Use sample group information if present, this supersedes track level
+ // information.
+ writer->mCrypto.mKeyId.AppendElements(sampleInfo->mKeyId);
+ writer->mCrypto.mIVSize = sampleInfo->mIVSize;
+ writer->mCrypto.mCryptByteBlock = sampleInfo->mCryptByteBlock;
+ writer->mCrypto.mSkipByteBlock = sampleInfo->mSkipByteBlock;
+ writer->mCrypto.mConstantIV.AppendElements(sampleInfo->mConsantIV);
+ } else {
+ // Use the crypto info from track metadata
+ writer->mCrypto.mKeyId.AppendElements(moofParser->mSinf.mDefaultKeyID, 16);
+ writer->mCrypto.mIVSize = moofParser->mSinf.mDefaultIVSize;
+ writer->mCrypto.mCryptByteBlock = moofParser->mSinf.mDefaultCryptByteBlock;
+ writer->mCrypto.mSkipByteBlock = moofParser->mSinf.mDefaultSkipByteBlock;
+ writer->mCrypto.mConstantIV.AppendElements(
+ moofParser->mSinf.mDefaultConstantIV);
+ }
+
+ if ((writer->mCrypto.mIVSize == 0 && writer->mCrypto.mConstantIV.IsEmpty()) ||
+ (writer->mCrypto.mIVSize != 0 && s->mCencRange.IsEmpty())) {
+ // If mIVSize == 0, this indicates that a constant IV is in use, thus we
+ // should have a non empty constant IV. Alternatively if IV size is non
+ // zero, we should have an IV for this sample, which we need to look up
+ // in mCencRange (which must then be non empty). If neither of these are
+ // true we have bad crypto data, so bail.
+ return nullptr;
+ }
+ // Parse auxiliary information if present
+ if (!s->mCencRange.IsEmpty()) {
+ // The size comes from an 8 bit field
+ AutoTArray<uint8_t, 256> cencAuxInfo;
+ cencAuxInfo.SetLength(s->mCencRange.Length());
+ if (!mIndex->mSource->ReadAt(s->mCencRange.mStart, cencAuxInfo.Elements(),
+ cencAuxInfo.Length(), &bytesRead) ||
+ bytesRead != cencAuxInfo.Length()) {
+ return nullptr;
+ }
+ BufferReader reader(cencAuxInfo);
+ if (!reader.ReadArray(writer->mCrypto.mIV, writer->mCrypto.mIVSize)) {
+ return nullptr;
+ }
+
+ // Parse the auxiliary information for subsample information
+ auto res = reader.ReadU16();
+ if (res.isOk() && res.unwrap() > 0) {
+ uint16_t count = res.unwrap();
+
+ if (reader.Remaining() < count * 6) {
+ return nullptr;
+ }
+
+ for (size_t i = 0; i < count; i++) {
+ auto res_16 = reader.ReadU16();
+ auto res_32 = reader.ReadU32();
+ if (res_16.isErr() || res_32.isErr()) {
+ return nullptr;
+ }
+ writer->mCrypto.mPlainSizes.AppendElement(res_16.unwrap());
+ writer->mCrypto.mEncryptedSizes.AppendElement(res_32.unwrap());
+ }
+ } else {
+ // No subsample information means the entire sample is encrypted.
+ writer->mCrypto.mPlainSizes.AppendElement(0);
+ writer->mCrypto.mEncryptedSizes.AppendElement(sample->Size());
+ }
+ }
+
+ Next();
+
+ return sample.forget();
+}
+
+SampleDescriptionEntry* SampleIterator::GetSampleDescriptionEntry() {
+ nsTArray<Moof>& moofs = mIndex->mMoofParser->Moofs();
+ Moof& currentMoof = moofs[mCurrentMoof];
+ uint32_t sampleDescriptionIndex =
+ currentMoof.mTfhd.mDefaultSampleDescriptionIndex;
+ // Mp4 indices start at 1, shift down 1 so we index our array correctly.
+ sampleDescriptionIndex--;
+ FallibleTArray<SampleDescriptionEntry>& sampleDescriptions =
+ mIndex->mMoofParser->mSampleDescriptions;
+ if (sampleDescriptionIndex >= sampleDescriptions.Length()) {
+ // The sample description index is invalid, the mp4 is malformed. Bail out.
+ return nullptr;
+ }
+ return &sampleDescriptions[sampleDescriptionIndex];
+}
+
+CencSampleEncryptionInfoEntry* SampleIterator::GetSampleEncryptionEntry() {
+ nsTArray<Moof>& moofs = mIndex->mMoofParser->Moofs();
+ Moof* currentMoof = &moofs[mCurrentMoof];
+ SampleToGroupEntry* sampleToGroupEntry = nullptr;
+
+ // Default to using the sample to group entries for the fragment, otherwise
+ // fall back to the sample to group entries for the track.
+ FallibleTArray<SampleToGroupEntry>* sampleToGroupEntries =
+ currentMoof->mFragmentSampleToGroupEntries.Length() != 0
+ ? &currentMoof->mFragmentSampleToGroupEntries
+ : &mIndex->mMoofParser->mTrackSampleToGroupEntries;
+
+ uint32_t seen = 0;
+
+ for (SampleToGroupEntry& entry : *sampleToGroupEntries) {
+ if (seen + entry.mSampleCount > mCurrentSample) {
+ sampleToGroupEntry = &entry;
+ break;
+ }
+ seen += entry.mSampleCount;
+ }
+
+ // ISO-14496-12 Section 8.9.2.3 and 8.9.4 : group description index
+ // (1) ranges from 1 to the number of sample group entries in the track
+ // level SampleGroupDescription Box, or (2) takes the value 0 to
+ // indicate that this sample is a member of no group, in this case, the
+ // sample is associated with the default values specified in
+ // TrackEncryption Box, or (3) starts at 0x10001, i.e. the index value
+ // 1, with the value 1 in the top 16 bits, to reference fragment-local
+ // SampleGroupDescription Box.
+
+ // According to the spec, ISO-14496-12, the sum of the sample counts in this
+ // box should be equal to the total number of samples, and, if less, the
+ // reader should behave as if an extra SampleToGroupEntry existed, with
+ // groupDescriptionIndex 0.
+
+ if (!sampleToGroupEntry || sampleToGroupEntry->mGroupDescriptionIndex == 0) {
+ return nullptr;
+ }
+
+ FallibleTArray<CencSampleEncryptionInfoEntry>* entries =
+ &mIndex->mMoofParser->mTrackSampleEncryptionInfoEntries;
+
+ uint32_t groupIndex = sampleToGroupEntry->mGroupDescriptionIndex;
+
+ // If the first bit is set to a one, then we should use the sample group
+ // descriptions from the fragment.
+ if (groupIndex > SampleToGroupEntry::kFragmentGroupDescriptionIndexBase) {
+ groupIndex -= SampleToGroupEntry::kFragmentGroupDescriptionIndexBase;
+ entries = &currentMoof->mFragmentSampleEncryptionInfoEntries;
+ }
+
+ // The group_index is one based.
+ return groupIndex > entries->Length() ? nullptr
+ : &entries->ElementAt(groupIndex - 1);
+}
+
+Result<CryptoScheme, nsCString> SampleIterator::GetEncryptionScheme() {
+ // See ISO/IEC 23001-7 for information on the metadata being checked.
+ MoofParser* moofParser = mIndex->mMoofParser.get();
+ if (!moofParser) {
+ // This mp4 isn't fragmented so it can't be encrypted.
+ return CryptoScheme::None;
+ }
+
+ SampleDescriptionEntry* sampleDescriptionEntry = GetSampleDescriptionEntry();
+ if (!sampleDescriptionEntry) {
+ // For the file to be valid the tfhd must reference a sample description
+ // entry.
+ // If we encounter this error often, we may consider using the first
+ // sample description entry if the index is out of bounds.
+ return mozilla::Err(nsLiteralCString(
+ "Could not determine encryption scheme due to bad index for sample "
+ "description entry."));
+ }
+
+ if (!sampleDescriptionEntry->mIsEncryptedEntry) {
+ return CryptoScheme::None;
+ }
+
+ if (!moofParser->mSinf.IsValid()) {
+ // The sample description entry says this sample is encrypted, but we
+ // don't have a valid sinf box. This shouldn't happen as the sinf box is
+ // part of the sample description entry. Suggests a malformed file, bail.
+ return mozilla::Err(nsLiteralCString(
+ "Could not determine encryption scheme. Sample description entry "
+ "indicates encryption, but could not find associated sinf box."));
+ }
+
+ CencSampleEncryptionInfoEntry* sampleInfo = GetSampleEncryptionEntry();
+ if (sampleInfo && !sampleInfo->mIsEncrypted) {
+ // May not have sample encryption info, but if we do, it should match other
+ // metadata.
+ return mozilla::Err(nsLiteralCString(
+ "Could not determine encryption scheme. Sample description entry "
+ "indicates encryption, but sample encryption entry indicates sample is "
+ "not encrypted. These should be consistent."));
+ }
+
+ if (moofParser->mSinf.mDefaultEncryptionType == AtomType("cenc")) {
+ return CryptoScheme::Cenc;
+ } else if (moofParser->mSinf.mDefaultEncryptionType == AtomType("cbcs")) {
+ return CryptoScheme::Cbcs;
+ }
+ return mozilla::Err(nsLiteralCString(
+ "Could not determine encryption scheme. Sample description entry "
+ "reports sample is encrypted, but no scheme, or an unsupported scheme "
+ "is in use."));
+}
+
+Sample* SampleIterator::Get() {
+ if (!mIndex->mMoofParser) {
+ MOZ_ASSERT(!mCurrentMoof);
+ return mCurrentSample < mIndex->mIndex.Length()
+ ? &mIndex->mIndex[mCurrentSample]
+ : nullptr;
+ }
+
+ nsTArray<Moof>& moofs = mIndex->mMoofParser->Moofs();
+ while (true) {
+ if (mCurrentMoof == moofs.Length()) {
+ if (!mIndex->mMoofParser->BlockingReadNextMoof()) {
+ return nullptr;
+ }
+ MOZ_ASSERT(mCurrentMoof < moofs.Length());
+ }
+ if (mCurrentSample < moofs[mCurrentMoof].mIndex.Length()) {
+ break;
+ }
+ mCurrentSample = 0;
+ ++mCurrentMoof;
+ }
+ return &moofs[mCurrentMoof].mIndex[mCurrentSample];
+}
+
+void SampleIterator::Next() { ++mCurrentSample; }
+
+void SampleIterator::Seek(Microseconds aTime) {
+ size_t syncMoof = 0;
+ size_t syncSample = 0;
+ mCurrentMoof = 0;
+ mCurrentSample = 0;
+ Sample* sample;
+ while (!!(sample = Get())) {
+ if (sample->mCompositionRange.start > aTime) {
+ break;
+ }
+ if (sample->mSync) {
+ syncMoof = mCurrentMoof;
+ syncSample = mCurrentSample;
+ }
+ if (sample->mCompositionRange.start == aTime) {
+ break;
+ }
+ Next();
+ }
+ mCurrentMoof = syncMoof;
+ mCurrentSample = syncSample;
+}
+
+Microseconds SampleIterator::GetNextKeyframeTime() {
+ SampleIterator itr(*this);
+ Sample* sample;
+ while (!!(sample = itr.Get())) {
+ if (sample->mSync) {
+ return sample->mCompositionRange.start;
+ }
+ itr.Next();
+ }
+ return -1;
+}
+
+Index::Index(const IndiceWrapper& aIndices, ByteStream* aSource,
+ uint32_t aTrackId, bool aIsAudio)
+ : mSource(aSource), mIsAudio(aIsAudio) {
+ if (!aIndices.Length()) {
+ mMoofParser =
+ MakeUnique<MoofParser>(aSource, AsVariant(aTrackId), aIsAudio);
+ } else {
+ if (!mIndex.SetCapacity(aIndices.Length(), fallible)) {
+ // OOM.
+ return;
+ }
+ media::IntervalSet<int64_t> intervalTime;
+ MediaByteRange intervalRange;
+ bool haveSync = false;
+ bool progressive = true;
+ int64_t lastOffset = 0;
+ for (size_t i = 0; i < aIndices.Length(); i++) {
+ Indice indice;
+ if (!aIndices.GetIndice(i, indice)) {
+ // Out of index?
+ return;
+ }
+ if (indice.sync || mIsAudio) {
+ haveSync = true;
+ }
+ if (!haveSync) {
+ continue;
+ }
+ Sample sample;
+ sample.mByteRange =
+ MediaByteRange(indice.start_offset, indice.end_offset);
+ sample.mCompositionRange = MP4Interval<Microseconds>(
+ indice.start_composition, indice.end_composition);
+ sample.mDecodeTime = indice.start_decode;
+ sample.mSync = indice.sync || mIsAudio;
+ // FIXME: Make this infallible after bug 968520 is done.
+ MOZ_ALWAYS_TRUE(mIndex.AppendElement(sample, fallible));
+ if (indice.start_offset < lastOffset) {
+ NS_WARNING("Chunks in MP4 out of order, expect slow down");
+ progressive = false;
+ }
+ lastOffset = indice.end_offset;
+
+ // Pack audio samples in group of 128.
+ if (sample.mSync && progressive && (!mIsAudio || !(i % 128))) {
+ if (mDataOffset.Length()) {
+ auto& last = mDataOffset.LastElement();
+ last.mEndOffset = intervalRange.mEnd;
+ NS_ASSERTION(intervalTime.Length() == 1,
+ "Discontinuous samples between keyframes");
+ last.mTime.start = intervalTime.GetStart();
+ last.mTime.end = intervalTime.GetEnd();
+ }
+ if (!mDataOffset.AppendElement(
+ MP4DataOffset(mIndex.Length() - 1, indice.start_offset),
+ fallible)) {
+ // OOM.
+ return;
+ }
+ intervalTime = media::IntervalSet<int64_t>();
+ intervalRange = MediaByteRange();
+ }
+ intervalTime += media::Interval<int64_t>(sample.mCompositionRange.start,
+ sample.mCompositionRange.end);
+ intervalRange = intervalRange.Span(sample.mByteRange);
+ }
+
+ if (mDataOffset.Length() && progressive) {
+ Indice indice;
+ if (!aIndices.GetIndice(aIndices.Length() - 1, indice)) {
+ return;
+ }
+ auto& last = mDataOffset.LastElement();
+ last.mEndOffset = indice.end_offset;
+ last.mTime =
+ MP4Interval<int64_t>(intervalTime.GetStart(), intervalTime.GetEnd());
+ } else {
+ mDataOffset.Clear();
+ }
+ }
+}
+
+Index::~Index() = default;
+
+void Index::UpdateMoofIndex(const MediaByteRangeSet& aByteRanges) {
+ UpdateMoofIndex(aByteRanges, false);
+}
+
+void Index::UpdateMoofIndex(const MediaByteRangeSet& aByteRanges,
+ bool aCanEvict) {
+ if (!mMoofParser) {
+ return;
+ }
+ size_t moofs = mMoofParser->Moofs().Length();
+ bool canEvict = aCanEvict && moofs > 1;
+ if (canEvict) {
+ // Check that we can trim the mMoofParser. We can only do so if all
+ // iterators have demuxed all possible samples.
+ for (const SampleIterator* iterator : mIterators) {
+ if ((iterator->mCurrentSample == 0 && iterator->mCurrentMoof == moofs) ||
+ iterator->mCurrentMoof == moofs - 1) {
+ continue;
+ }
+ canEvict = false;
+ break;
+ }
+ }
+ mMoofParser->RebuildFragmentedIndex(aByteRanges, &canEvict);
+ if (canEvict) {
+ // The moofparser got trimmed. Adjust all registered iterators.
+ for (SampleIterator* iterator : mIterators) {
+ iterator->mCurrentMoof -= moofs - 1;
+ }
+ }
+}
+
+Microseconds Index::GetEndCompositionIfBuffered(
+ const MediaByteRangeSet& aByteRanges) {
+ FallibleTArray<Sample>* index;
+ if (mMoofParser) {
+ if (!mMoofParser->ReachedEnd() || mMoofParser->Moofs().IsEmpty()) {
+ return 0;
+ }
+ index = &mMoofParser->Moofs().LastElement().mIndex;
+ } else {
+ index = &mIndex;
+ }
+
+ Microseconds lastComposition = 0;
+ RangeFinder rangeFinder(aByteRanges);
+ for (size_t i = index->Length(); i--;) {
+ const Sample& sample = (*index)[i];
+ if (!rangeFinder.Contains(sample.mByteRange)) {
+ return 0;
+ }
+ lastComposition = std::max(lastComposition, sample.mCompositionRange.end);
+ if (sample.mSync) {
+ return lastComposition;
+ }
+ }
+ return 0;
+}
+
+TimeIntervals Index::ConvertByteRangesToTimeRanges(
+ const MediaByteRangeSet& aByteRanges) {
+ if (aByteRanges == mLastCachedRanges) {
+ return mLastBufferedRanges;
+ }
+ mLastCachedRanges = aByteRanges;
+
+ if (mDataOffset.Length()) {
+ TimeIntervals timeRanges;
+ for (const auto& range : aByteRanges) {
+ uint32_t start = mDataOffset.IndexOfFirstElementGt(range.mStart - 1);
+ if (!mIsAudio && start == mDataOffset.Length()) {
+ continue;
+ }
+ uint32_t end = mDataOffset.IndexOfFirstElementGt(
+ range.mEnd, MP4DataOffset::EndOffsetComparator());
+ if (!mIsAudio && end < start) {
+ continue;
+ }
+ if (mIsAudio && start &&
+ range.Intersects(MediaByteRange(mDataOffset[start - 1].mStartOffset,
+ mDataOffset[start - 1].mEndOffset))) {
+ // Check if previous audio data block contains some available samples.
+ for (size_t i = mDataOffset[start - 1].mIndex; i < mIndex.Length();
+ i++) {
+ if (range.ContainsStrict(mIndex[i].mByteRange)) {
+ timeRanges += TimeInterval(
+ TimeUnit::FromMicroseconds(mIndex[i].mCompositionRange.start),
+ TimeUnit::FromMicroseconds(mIndex[i].mCompositionRange.end));
+ }
+ }
+ }
+ if (end > start) {
+ for (uint32_t i = start; i < end; i++) {
+ timeRanges += TimeInterval(
+ TimeUnit::FromMicroseconds(mDataOffset[i].mTime.start),
+ TimeUnit::FromMicroseconds(mDataOffset[i].mTime.end));
+ }
+ }
+ if (end < mDataOffset.Length()) {
+ // Find samples in partial block contained in the byte range.
+ for (size_t i = mDataOffset[end].mIndex;
+ i < mIndex.Length() && range.ContainsStrict(mIndex[i].mByteRange);
+ i++) {
+ timeRanges += TimeInterval(
+ TimeUnit::FromMicroseconds(mIndex[i].mCompositionRange.start),
+ TimeUnit::FromMicroseconds(mIndex[i].mCompositionRange.end));
+ }
+ }
+ }
+ mLastBufferedRanges = timeRanges;
+ return timeRanges;
+ }
+
+ RangeFinder rangeFinder(aByteRanges);
+ nsTArray<MP4Interval<Microseconds>> timeRanges;
+ nsTArray<FallibleTArray<Sample>*> indexes;
+ if (mMoofParser) {
+ // We take the index out of the moof parser and move it into a local
+ // variable so we don't get concurrency issues. It gets freed when we
+ // exit this function.
+ for (int i = 0; i < mMoofParser->Moofs().Length(); i++) {
+ Moof& moof = mMoofParser->Moofs()[i];
+
+ // We need the entire moof in order to play anything
+ if (rangeFinder.Contains(moof.mRange)) {
+ if (rangeFinder.Contains(moof.mMdatRange)) {
+ MP4Interval<Microseconds>::SemiNormalAppend(timeRanges,
+ moof.mTimeRange);
+ } else {
+ indexes.AppendElement(&moof.mIndex);
+ }
+ }
+ }
+ } else {
+ indexes.AppendElement(&mIndex);
+ }
+
+ bool hasSync = false;
+ for (size_t i = 0; i < indexes.Length(); i++) {
+ FallibleTArray<Sample>* index = indexes[i];
+ for (size_t j = 0; j < index->Length(); j++) {
+ const Sample& sample = (*index)[j];
+ if (!rangeFinder.Contains(sample.mByteRange)) {
+ // We process the index in decode order so we clear hasSync when we hit
+ // a range that isn't buffered.
+ hasSync = false;
+ continue;
+ }
+
+ hasSync |= sample.mSync;
+ if (!hasSync) {
+ continue;
+ }
+
+ MP4Interval<Microseconds>::SemiNormalAppend(timeRanges,
+ sample.mCompositionRange);
+ }
+ }
+
+ // This fixes up when the compositon order differs from the byte range order
+ nsTArray<MP4Interval<Microseconds>> timeRangesNormalized;
+ MP4Interval<Microseconds>::Normalize(timeRanges, &timeRangesNormalized);
+ // convert timeRanges.
+ media::TimeIntervals ranges;
+ for (size_t i = 0; i < timeRangesNormalized.Length(); i++) {
+ ranges += media::TimeInterval(
+ media::TimeUnit::FromMicroseconds(timeRangesNormalized[i].start),
+ media::TimeUnit::FromMicroseconds(timeRangesNormalized[i].end));
+ }
+ mLastBufferedRanges = ranges;
+ return ranges;
+}
+
+uint64_t Index::GetEvictionOffset(Microseconds aTime) {
+ uint64_t offset = std::numeric_limits<uint64_t>::max();
+ if (mMoofParser) {
+ // We need to keep the whole moof if we're keeping any of it because the
+ // parser doesn't keep parsed moofs.
+ for (int i = 0; i < mMoofParser->Moofs().Length(); i++) {
+ Moof& moof = mMoofParser->Moofs()[i];
+
+ if (moof.mTimeRange.Length() && moof.mTimeRange.end > aTime) {
+ offset = std::min(offset, uint64_t(std::min(moof.mRange.mStart,
+ moof.mMdatRange.mStart)));
+ }
+ }
+ } else {
+ // We've already parsed and stored the moov so we don't need to keep it.
+ // All we need to keep is the sample data itself.
+ for (size_t i = 0; i < mIndex.Length(); i++) {
+ const Sample& sample = mIndex[i];
+ if (aTime >= sample.mCompositionRange.end) {
+ offset = std::min(offset, uint64_t(sample.mByteRange.mEnd));
+ }
+ }
+ }
+ return offset;
+}
+
+void Index::RegisterIterator(SampleIterator* aIterator) {
+ mIterators.AppendElement(aIterator);
+}
+
+void Index::UnregisterIterator(SampleIterator* aIterator) {
+ mIterators.RemoveElement(aIterator);
+}
+
+} // namespace mozilla
diff --git a/dom/media/mp4/Index.h b/dom/media/mp4/Index.h
new file mode 100644
index 0000000000..afd0c051af
--- /dev/null
+++ b/dom/media/mp4/Index.h
@@ -0,0 +1,135 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef INDEX_H_
+#define INDEX_H_
+
+#include "ByteStream.h"
+#include "MediaData.h"
+#include "MediaResource.h"
+#include "MoofParser.h"
+#include "mozilla/ResultVariant.h"
+#include "MP4Interval.h"
+#include "nsISupportsImpl.h"
+#include "TimeUnits.h"
+
+namespace mozilla {
+class IndiceWrapper;
+struct Sample;
+struct CencSampleEncryptionInfoEntry;
+
+class Index;
+
+typedef int64_t Microseconds;
+
+class SampleIterator {
+ public:
+ explicit SampleIterator(Index* aIndex);
+ ~SampleIterator();
+ already_AddRefed<mozilla::MediaRawData> GetNext();
+ void Seek(Microseconds aTime);
+ Microseconds GetNextKeyframeTime();
+
+ private:
+ Sample* Get();
+
+ // Gets the sample description entry for the current moof, or nullptr if
+ // called without a valid current moof.
+ SampleDescriptionEntry* GetSampleDescriptionEntry();
+ CencSampleEncryptionInfoEntry* GetSampleEncryptionEntry();
+
+ // Determines the encryption scheme in use for the current sample. If the
+ // the scheme cannot be unambiguously determined, will return an error with
+ // the reason.
+ //
+ // Returns: Ok(CryptoScheme) if a crypto scheme, including None, can be
+ // determined, or Err(nsCString) if there is an issue determining the scheme.
+ Result<CryptoScheme, nsCString> GetEncryptionScheme();
+
+ void Next();
+ RefPtr<Index> mIndex;
+ friend class Index;
+ size_t mCurrentMoof;
+ size_t mCurrentSample;
+};
+
+class Index {
+ public:
+ NS_INLINE_DECL_THREADSAFE_REFCOUNTING(Index)
+
+ struct Indice {
+ uint64_t start_offset;
+ uint64_t end_offset;
+ uint64_t start_composition;
+ uint64_t end_composition;
+ uint64_t start_decode;
+ bool sync;
+ };
+
+ struct MP4DataOffset {
+ MP4DataOffset(uint32_t aIndex, int64_t aStartOffset)
+ : mIndex(aIndex), mStartOffset(aStartOffset), mEndOffset(0) {}
+
+ bool operator==(int64_t aStartOffset) const {
+ return mStartOffset == aStartOffset;
+ }
+
+ bool operator!=(int64_t aStartOffset) const {
+ return mStartOffset != aStartOffset;
+ }
+
+ bool operator<(int64_t aStartOffset) const {
+ return mStartOffset < aStartOffset;
+ }
+
+ struct EndOffsetComparator {
+ bool Equals(const MP4DataOffset& a, const int64_t& b) const {
+ return a.mEndOffset == b;
+ }
+
+ bool LessThan(const MP4DataOffset& a, const int64_t& b) const {
+ return a.mEndOffset < b;
+ }
+ };
+
+ uint32_t mIndex;
+ int64_t mStartOffset;
+ int64_t mEndOffset;
+ MP4Interval<Microseconds> mTime;
+ };
+
+ Index(const mozilla::IndiceWrapper& aIndices, ByteStream* aSource,
+ uint32_t aTrackId, bool aIsAudio);
+
+ void UpdateMoofIndex(const mozilla::MediaByteRangeSet& aByteRanges,
+ bool aCanEvict);
+ void UpdateMoofIndex(const mozilla::MediaByteRangeSet& aByteRanges);
+ Microseconds GetEndCompositionIfBuffered(
+ const mozilla::MediaByteRangeSet& aByteRanges);
+ mozilla::media::TimeIntervals ConvertByteRangesToTimeRanges(
+ const mozilla::MediaByteRangeSet& aByteRanges);
+ uint64_t GetEvictionOffset(Microseconds aTime);
+ bool IsFragmented() { return !!mMoofParser; }
+
+ friend class SampleIterator;
+
+ private:
+ ~Index();
+ void RegisterIterator(SampleIterator* aIterator);
+ void UnregisterIterator(SampleIterator* aIterator);
+
+ ByteStream* mSource;
+ FallibleTArray<Sample> mIndex;
+ FallibleTArray<MP4DataOffset> mDataOffset;
+ UniquePtr<MoofParser> mMoofParser;
+ nsTArray<SampleIterator*> mIterators;
+
+ // ConvertByteRangesToTimeRanges cache
+ mozilla::MediaByteRangeSet mLastCachedRanges;
+ mozilla::media::TimeIntervals mLastBufferedRanges;
+ bool mIsAudio;
+};
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/mp4/MP4Decoder.cpp b/dom/media/mp4/MP4Decoder.cpp
new file mode 100644
index 0000000000..7e2fdf63d9
--- /dev/null
+++ b/dom/media/mp4/MP4Decoder.cpp
@@ -0,0 +1,222 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "MP4Decoder.h"
+#include "H264.h"
+#include "VPXDecoder.h"
+#ifdef MOZ_AV1
+# include "AOMDecoder.h"
+#endif
+#include "MP4Demuxer.h"
+#include "MediaContainerType.h"
+#include "PDMFactory.h"
+#include "PlatformDecoderModule.h"
+#include "VideoUtils.h"
+#include "mozilla/StaticPrefs_media.h"
+#include "mozilla/gfx/Tools.h"
+#include "nsMimeTypes.h"
+#include "nsReadableUtils.h"
+
+namespace mozilla {
+
+static bool IsWhitelistedH264Codec(const nsAString& aCodec) {
+ uint8_t profile = 0, constraint = 0, level = 0;
+
+ if (!ExtractH264CodecDetails(aCodec, profile, constraint, level)) {
+ return false;
+ }
+
+ // Just assume what we can play on all platforms the codecs/formats that
+ // WMF can play, since we don't have documentation about what other
+ // platforms can play... According to the WMF documentation:
+ // http://msdn.microsoft.com/en-us/library/windows/desktop/dd797815%28v=vs.85%29.aspx
+ // "The Media Foundation H.264 video decoder is a Media Foundation Transform
+ // that supports decoding of Baseline, Main, and High profiles, up to level
+ // 5.1.". We extend the limit to level 5.2, relying on the decoder to handle
+ // any potential errors, the level limit being rather arbitrary.
+ // We also report that we can play Extended profile, as there are
+ // bitstreams that are Extended compliant that are also Baseline compliant.
+ return level >= H264_LEVEL_1 && level <= H264_LEVEL_5_2 &&
+ (profile == H264_PROFILE_BASE || profile == H264_PROFILE_MAIN ||
+ profile == H264_PROFILE_EXTENDED || profile == H264_PROFILE_HIGH);
+}
+
+static bool IsTypeValid(const MediaContainerType& aType) {
+ // Whitelist MP4 types, so they explicitly match what we encounter on
+ // the web, as opposed to what we use internally (i.e. what our demuxers
+ // etc output).
+ return aType.Type() == MEDIAMIMETYPE("audio/mp4") ||
+ aType.Type() == MEDIAMIMETYPE("audio/x-m4a") ||
+ aType.Type() == MEDIAMIMETYPE("video/mp4") ||
+ aType.Type() == MEDIAMIMETYPE("video/quicktime") ||
+ aType.Type() == MEDIAMIMETYPE("video/x-m4v");
+}
+
+/* statis */
+nsTArray<UniquePtr<TrackInfo>> MP4Decoder::GetTracksInfo(
+ const MediaContainerType& aType, MediaResult& aError) {
+ nsTArray<UniquePtr<TrackInfo>> tracks;
+
+ if (!IsTypeValid(aType)) {
+ aError = MediaResult(
+ NS_ERROR_DOM_MEDIA_FATAL_ERR,
+ RESULT_DETAIL("Invalid type:%s", aType.Type().AsString().get()));
+ return tracks;
+ }
+
+ aError = NS_OK;
+
+ const MediaCodecs& codecs = aType.ExtendedType().Codecs();
+ if (codecs.IsEmpty()) {
+ return tracks;
+ }
+
+ const bool isVideo = aType.Type() == MEDIAMIMETYPE("video/mp4") ||
+ aType.Type() == MEDIAMIMETYPE("video/quicktime") ||
+ aType.Type() == MEDIAMIMETYPE("video/x-m4v");
+
+ for (const auto& codec : codecs.Range()) {
+ if (IsAACCodecString(codec)) {
+ tracks.AppendElement(
+ CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters(
+ "audio/mp4a-latm"_ns, aType));
+ continue;
+ }
+ if (codec.EqualsLiteral("mp3")) {
+ tracks.AppendElement(
+ CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters(
+ "audio/mpeg"_ns, aType));
+ continue;
+ }
+ // The valid codecs parameter value with mp4 MIME types should be "Opus" and
+ // "fLaC", but "opus" and "flac" are acceptable due to historical reasons.
+ if (codec.EqualsLiteral("opus") || codec.EqualsLiteral("Opus") ||
+ codec.EqualsLiteral("flac") || codec.EqualsLiteral("fLaC")) {
+ NS_ConvertUTF16toUTF8 c(codec);
+ ToLowerCase(c);
+ tracks.AppendElement(
+ CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters(
+ "audio/"_ns + c, aType));
+ continue;
+ }
+ if (IsVP9CodecString(codec)) {
+ auto trackInfo =
+ CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters(
+ "video/vp9"_ns, aType);
+ VPXDecoder::SetVideoInfo(trackInfo->GetAsVideoInfo(), codec);
+ tracks.AppendElement(std::move(trackInfo));
+ continue;
+ }
+#ifdef MOZ_AV1
+ if (StaticPrefs::media_av1_enabled() && IsAV1CodecString(codec)) {
+ auto trackInfo =
+ CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters(
+ "video/av1"_ns, aType);
+ AOMDecoder::SetVideoInfo(trackInfo->GetAsVideoInfo(), codec);
+ tracks.AppendElement(std::move(trackInfo));
+ continue;
+ }
+#endif
+ if (isVideo && IsWhitelistedH264Codec(codec)) {
+ auto trackInfo =
+ CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters(
+ "video/avc"_ns, aType);
+ uint8_t profile = 0, constraint = 0, level = 0;
+ MOZ_ALWAYS_TRUE(
+ ExtractH264CodecDetails(codec, profile, constraint, level));
+ uint32_t width = aType.ExtendedType().GetWidth().refOr(1280);
+ uint32_t height = aType.ExtendedType().GetHeight().refOr(720);
+ trackInfo->GetAsVideoInfo()->mExtraData =
+ H264::CreateExtraData(profile, constraint, level, {width, height});
+ tracks.AppendElement(std::move(trackInfo));
+ continue;
+ }
+ // Unknown codec
+ aError = MediaResult(
+ NS_ERROR_DOM_MEDIA_FATAL_ERR,
+ RESULT_DETAIL("Unknown codec:%s", NS_ConvertUTF16toUTF8(codec).get()));
+ }
+ return tracks;
+}
+
+/* static */
+bool MP4Decoder::IsSupportedType(const MediaContainerType& aType,
+ DecoderDoctorDiagnostics* aDiagnostics) {
+ if (!IsEnabled()) {
+ return false;
+ }
+
+ MediaResult rv = NS_OK;
+ auto tracks = GetTracksInfo(aType, rv);
+ if (NS_FAILED(rv)) {
+ return false;
+ }
+
+ if (!tracks.IsEmpty()) {
+ // Look for exact match as we know used codecs.
+ RefPtr<PDMFactory> platform = new PDMFactory();
+ for (const auto& track : tracks) {
+ if (!track ||
+ platform->Supports(SupportDecoderParams(*track), aDiagnostics) ==
+ media::DecodeSupport::Unsupported) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ // We have only container info so try to guess the content type.
+ // Assume H.264/AV1 or AAC
+ if (aType.Type() == MEDIAMIMETYPE("audio/mp4") ||
+ aType.Type() == MEDIAMIMETYPE("audio/x-m4a")) {
+ tracks.AppendElement(
+ CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters(
+ "audio/mp4a-latm"_ns, aType));
+ } else {
+ tracks.AppendElement(
+ CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters(
+ "video/avc"_ns, aType));
+ if (StaticPrefs::media_av1_enabled()) {
+ tracks.AppendElement(
+ CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters(
+ "video/av1"_ns, aType));
+ }
+ }
+
+ // Check that something is supported at least.
+ RefPtr<PDMFactory> platform = new PDMFactory();
+ for (const auto& track : tracks) {
+ if (track &&
+ platform->Supports(SupportDecoderParams(*track), aDiagnostics) !=
+ media::DecodeSupport::Unsupported) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/* static */
+bool MP4Decoder::IsH264(const nsACString& aMimeType) {
+ return aMimeType.EqualsLiteral("video/mp4") ||
+ aMimeType.EqualsLiteral("video/avc");
+}
+
+/* static */
+bool MP4Decoder::IsAAC(const nsACString& aMimeType) {
+ return aMimeType.EqualsLiteral("audio/mp4a-latm");
+}
+
+/* static */
+bool MP4Decoder::IsEnabled() { return StaticPrefs::media_mp4_enabled(); }
+
+/* static */
+nsTArray<UniquePtr<TrackInfo>> MP4Decoder::GetTracksInfo(
+ const MediaContainerType& aType) {
+ MediaResult rv = NS_OK;
+ return GetTracksInfo(aType, rv);
+}
+
+} // namespace mozilla
diff --git a/dom/media/mp4/MP4Decoder.h b/dom/media/mp4/MP4Decoder.h
new file mode 100644
index 0000000000..07b085929b
--- /dev/null
+++ b/dom/media/mp4/MP4Decoder.h
@@ -0,0 +1,52 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#if !defined(MP4Decoder_h_)
+# define MP4Decoder_h_
+
+# include "mozilla/UniquePtr.h"
+# include "nsStringFwd.h"
+# include "nsTArray.h"
+
+namespace mozilla {
+
+class MediaContainerType;
+class MediaResult;
+class DecoderDoctorDiagnostics;
+class TrackInfo;
+
+// Decoder that uses a bundled MP4 demuxer and platform decoders to play MP4.
+class MP4Decoder {
+ public:
+ // Returns true if aContainerType is an MP4 type that we think we can render
+ // with the a platform decoder backend.
+ // If provided, codecs are checked for support.
+ static bool IsSupportedType(const MediaContainerType& aContainerType,
+ DecoderDoctorDiagnostics* aDiagnostics);
+
+ // Return true if aMimeType is a one of the strings used by our demuxers to
+ // identify H264. Does not parse general content type strings, i.e. white
+ // space matters.
+ static bool IsH264(const nsACString& aMimeType);
+
+ // Return true if aMimeType is a one of the strings used by our demuxers to
+ // identify AAC. Does not parse general content type strings, i.e. white
+ // space matters.
+ static bool IsAAC(const nsACString& aMimeType);
+
+ // Returns true if the MP4 backend is preffed on.
+ static bool IsEnabled();
+
+ static nsTArray<UniquePtr<TrackInfo>> GetTracksInfo(
+ const MediaContainerType& aType);
+
+ private:
+ static nsTArray<UniquePtr<TrackInfo>> GetTracksInfo(
+ const MediaContainerType& aType, MediaResult& aError);
+};
+
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/mp4/MP4Demuxer.cpp b/dom/media/mp4/MP4Demuxer.cpp
new file mode 100644
index 0000000000..59bba16577
--- /dev/null
+++ b/dom/media/mp4/MP4Demuxer.cpp
@@ -0,0 +1,557 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <algorithm>
+#include <limits>
+#include <stdint.h>
+
+#include "MP4Demuxer.h"
+
+#include "AnnexB.h"
+#include "BufferStream.h"
+#include "H264.h"
+#include "Index.h"
+#include "MP4Decoder.h"
+#include "MP4Metadata.h"
+#include "MoofParser.h"
+#include "ResourceStream.h"
+#include "VPXDecoder.h"
+#include "mozilla/Span.h"
+#include "mozilla/StaticPrefs_media.h"
+#include "mozilla/Telemetry.h"
+#include "nsPrintfCString.h"
+
+extern mozilla::LazyLogModule gMediaDemuxerLog;
+mozilla::LogModule* GetDemuxerLog() { return gMediaDemuxerLog; }
+
+#define LOG(arg, ...) \
+ DDMOZ_LOG(gMediaDemuxerLog, mozilla::LogLevel::Debug, "::%s: " arg, \
+ __func__, ##__VA_ARGS__)
+
+namespace mozilla {
+
+DDLoggedTypeDeclNameAndBase(MP4TrackDemuxer, MediaTrackDemuxer);
+
+class MP4TrackDemuxer : public MediaTrackDemuxer,
+ public DecoderDoctorLifeLogger<MP4TrackDemuxer> {
+ public:
+ MP4TrackDemuxer(MediaResource* aResource, UniquePtr<TrackInfo>&& aInfo,
+ const IndiceWrapper& aIndices);
+
+ UniquePtr<TrackInfo> GetInfo() const override;
+
+ RefPtr<SeekPromise> Seek(const media::TimeUnit& aTime) override;
+
+ RefPtr<SamplesPromise> GetSamples(int32_t aNumSamples = 1) override;
+
+ void Reset() override;
+
+ nsresult GetNextRandomAccessPoint(media::TimeUnit* aTime) override;
+
+ RefPtr<SkipAccessPointPromise> SkipToNextRandomAccessPoint(
+ const media::TimeUnit& aTimeThreshold) override;
+
+ media::TimeIntervals GetBuffered() override;
+
+ void NotifyDataRemoved();
+ void NotifyDataArrived();
+
+ private:
+ already_AddRefed<MediaRawData> GetNextSample();
+ void EnsureUpToDateIndex();
+ void SetNextKeyFrameTime();
+ RefPtr<MediaResource> mResource;
+ RefPtr<ResourceStream> mStream;
+ UniquePtr<TrackInfo> mInfo;
+ RefPtr<Index> mIndex;
+ UniquePtr<SampleIterator> mIterator;
+ Maybe<media::TimeUnit> mNextKeyframeTime;
+ // Queued samples extracted by the demuxer, but not yet returned.
+ RefPtr<MediaRawData> mQueuedSample;
+ bool mNeedReIndex;
+ enum CodecType { kH264, kVP9, kOther } mType = kOther;
+};
+
+MP4Demuxer::MP4Demuxer(MediaResource* aResource)
+ : mResource(aResource),
+ mStream(new ResourceStream(aResource)),
+ mIsSeekable(false) {
+ DDLINKCHILD("resource", aResource);
+ DDLINKCHILD("stream", mStream.get());
+}
+
+RefPtr<MP4Demuxer::InitPromise> MP4Demuxer::Init() {
+ AutoPinned<ResourceStream> stream(mStream);
+
+ // 'result' will capture the first warning, if any.
+ MediaResult result{NS_OK};
+
+ MP4Metadata::ResultAndByteBuffer initData = MP4Metadata::Metadata(stream);
+ if (!initData.Ref()) {
+ return InitPromise::CreateAndReject(
+ NS_FAILED(initData.Result())
+ ? std::move(initData.Result())
+ : MediaResult(NS_ERROR_DOM_MEDIA_DEMUXER_ERR,
+ RESULT_DETAIL("Invalid MP4 metadata or OOM")),
+ __func__);
+ } else if (NS_FAILED(initData.Result()) && result == NS_OK) {
+ result = std::move(initData.Result());
+ }
+
+ RefPtr<BufferStream> bufferstream = new BufferStream(initData.Ref());
+
+ MP4Metadata metadata{bufferstream};
+ DDLINKCHILD("metadata", &metadata);
+ nsresult rv = metadata.Parse();
+ if (NS_FAILED(rv)) {
+ return InitPromise::CreateAndReject(
+ MediaResult(rv, RESULT_DETAIL("Parse MP4 metadata failed")), __func__);
+ }
+
+ auto audioTrackCount = metadata.GetNumberTracks(TrackInfo::kAudioTrack);
+ if (audioTrackCount.Ref() == MP4Metadata::NumberTracksError()) {
+ if (StaticPrefs::media_playback_warnings_as_errors()) {
+ return InitPromise::CreateAndReject(
+ MediaResult(
+ NS_ERROR_DOM_MEDIA_DEMUXER_ERR,
+ RESULT_DETAIL("Invalid audio track (%s)",
+ audioTrackCount.Result().Description().get())),
+ __func__);
+ }
+ audioTrackCount.Ref() = 0;
+ }
+
+ auto videoTrackCount = metadata.GetNumberTracks(TrackInfo::kVideoTrack);
+ if (videoTrackCount.Ref() == MP4Metadata::NumberTracksError()) {
+ if (StaticPrefs::media_playback_warnings_as_errors()) {
+ return InitPromise::CreateAndReject(
+ MediaResult(
+ NS_ERROR_DOM_MEDIA_DEMUXER_ERR,
+ RESULT_DETAIL("Invalid video track (%s)",
+ videoTrackCount.Result().Description().get())),
+ __func__);
+ }
+ videoTrackCount.Ref() = 0;
+ }
+
+ if (audioTrackCount.Ref() == 0 && videoTrackCount.Ref() == 0) {
+ return InitPromise::CreateAndReject(
+ MediaResult(
+ NS_ERROR_DOM_MEDIA_DEMUXER_ERR,
+ RESULT_DETAIL("No MP4 audio (%s) or video (%s) tracks",
+ audioTrackCount.Result().Description().get(),
+ videoTrackCount.Result().Description().get())),
+ __func__);
+ }
+
+ if (NS_FAILED(audioTrackCount.Result()) && result == NS_OK) {
+ result = std::move(audioTrackCount.Result());
+ }
+ if (NS_FAILED(videoTrackCount.Result()) && result == NS_OK) {
+ result = std::move(videoTrackCount.Result());
+ }
+
+ if (audioTrackCount.Ref() != 0) {
+ for (size_t i = 0; i < audioTrackCount.Ref(); i++) {
+ MP4Metadata::ResultAndTrackInfo info =
+ metadata.GetTrackInfo(TrackInfo::kAudioTrack, i);
+ if (!info.Ref()) {
+ if (StaticPrefs::media_playback_warnings_as_errors()) {
+ return InitPromise::CreateAndReject(
+ MediaResult(NS_ERROR_DOM_MEDIA_DEMUXER_ERR,
+ RESULT_DETAIL("Invalid MP4 audio track (%s)",
+ info.Result().Description().get())),
+ __func__);
+ }
+ if (result == NS_OK) {
+ result =
+ MediaResult(NS_ERROR_DOM_MEDIA_DEMUXER_ERR,
+ RESULT_DETAIL("Invalid MP4 audio track (%s)",
+ info.Result().Description().get()));
+ }
+ continue;
+ } else if (NS_FAILED(info.Result()) && result == NS_OK) {
+ result = std::move(info.Result());
+ }
+ MP4Metadata::ResultAndIndice indices =
+ metadata.GetTrackIndice(info.Ref()->mTrackId);
+ if (!indices.Ref()) {
+ if (NS_FAILED(info.Result()) && result == NS_OK) {
+ result = std::move(indices.Result());
+ }
+ continue;
+ }
+ RefPtr<MP4TrackDemuxer> demuxer = new MP4TrackDemuxer(
+ mResource, std::move(info.Ref()), *indices.Ref().get());
+ DDLINKCHILD("audio demuxer", demuxer.get());
+ mAudioDemuxers.AppendElement(std::move(demuxer));
+ }
+ }
+
+ if (videoTrackCount.Ref() != 0) {
+ for (size_t i = 0; i < videoTrackCount.Ref(); i++) {
+ MP4Metadata::ResultAndTrackInfo info =
+ metadata.GetTrackInfo(TrackInfo::kVideoTrack, i);
+ if (!info.Ref()) {
+ if (StaticPrefs::media_playback_warnings_as_errors()) {
+ return InitPromise::CreateAndReject(
+ MediaResult(NS_ERROR_DOM_MEDIA_DEMUXER_ERR,
+ RESULT_DETAIL("Invalid MP4 video track (%s)",
+ info.Result().Description().get())),
+ __func__);
+ }
+ if (result == NS_OK) {
+ result =
+ MediaResult(NS_ERROR_DOM_MEDIA_DEMUXER_ERR,
+ RESULT_DETAIL("Invalid MP4 video track (%s)",
+ info.Result().Description().get()));
+ }
+ continue;
+ } else if (NS_FAILED(info.Result()) && result == NS_OK) {
+ result = std::move(info.Result());
+ }
+ MP4Metadata::ResultAndIndice indices =
+ metadata.GetTrackIndice(info.Ref()->mTrackId);
+ if (!indices.Ref()) {
+ if (NS_FAILED(info.Result()) && result == NS_OK) {
+ result = std::move(indices.Result());
+ }
+ continue;
+ }
+ RefPtr<MP4TrackDemuxer> demuxer = new MP4TrackDemuxer(
+ mResource, std::move(info.Ref()), *indices.Ref().get());
+ DDLINKCHILD("video demuxer", demuxer.get());
+ mVideoDemuxers.AppendElement(std::move(demuxer));
+ }
+ }
+
+ MP4Metadata::ResultAndCryptoFile cryptoFile = metadata.Crypto();
+ if (NS_FAILED(cryptoFile.Result()) && result == NS_OK) {
+ result = std::move(cryptoFile.Result());
+ }
+ MOZ_ASSERT(cryptoFile.Ref());
+ if (cryptoFile.Ref()->valid) {
+ const nsTArray<PsshInfo>& psshs = cryptoFile.Ref()->pssh;
+ for (uint32_t i = 0; i < psshs.Length(); i++) {
+ mCryptoInitData.AppendElements(psshs[i].data);
+ }
+ }
+
+ mIsSeekable = metadata.CanSeek();
+
+ return InitPromise::CreateAndResolve(result, __func__);
+}
+
+uint32_t MP4Demuxer::GetNumberTracks(TrackInfo::TrackType aType) const {
+ switch (aType) {
+ case TrackInfo::kAudioTrack:
+ return uint32_t(mAudioDemuxers.Length());
+ case TrackInfo::kVideoTrack:
+ return uint32_t(mVideoDemuxers.Length());
+ default:
+ return 0;
+ }
+}
+
+already_AddRefed<MediaTrackDemuxer> MP4Demuxer::GetTrackDemuxer(
+ TrackInfo::TrackType aType, uint32_t aTrackNumber) {
+ switch (aType) {
+ case TrackInfo::kAudioTrack:
+ if (aTrackNumber >= uint32_t(mAudioDemuxers.Length())) {
+ return nullptr;
+ }
+ return RefPtr<MediaTrackDemuxer>(mAudioDemuxers[aTrackNumber]).forget();
+ case TrackInfo::kVideoTrack:
+ if (aTrackNumber >= uint32_t(mVideoDemuxers.Length())) {
+ return nullptr;
+ }
+ return RefPtr<MediaTrackDemuxer>(mVideoDemuxers[aTrackNumber]).forget();
+ default:
+ return nullptr;
+ }
+}
+
+bool MP4Demuxer::IsSeekable() const { return mIsSeekable; }
+
+void MP4Demuxer::NotifyDataArrived() {
+ for (auto& dmx : mAudioDemuxers) {
+ dmx->NotifyDataArrived();
+ }
+ for (auto& dmx : mVideoDemuxers) {
+ dmx->NotifyDataArrived();
+ }
+}
+
+void MP4Demuxer::NotifyDataRemoved() {
+ for (auto& dmx : mAudioDemuxers) {
+ dmx->NotifyDataRemoved();
+ }
+ for (auto& dmx : mVideoDemuxers) {
+ dmx->NotifyDataRemoved();
+ }
+}
+
+UniquePtr<EncryptionInfo> MP4Demuxer::GetCrypto() {
+ UniquePtr<EncryptionInfo> crypto;
+ if (!mCryptoInitData.IsEmpty()) {
+ crypto.reset(new EncryptionInfo{});
+ crypto->AddInitData(u"cenc"_ns, mCryptoInitData);
+ }
+ return crypto;
+}
+
+MP4TrackDemuxer::MP4TrackDemuxer(MediaResource* aResource,
+ UniquePtr<TrackInfo>&& aInfo,
+ const IndiceWrapper& aIndices)
+ : mResource(aResource),
+ mStream(new ResourceStream(aResource)),
+ mInfo(std::move(aInfo)),
+ mIndex(new Index(aIndices, mStream, mInfo->mTrackId, mInfo->IsAudio())),
+ mIterator(MakeUnique<SampleIterator>(mIndex)),
+ mNeedReIndex(true) {
+ EnsureUpToDateIndex(); // Force update of index
+
+ VideoInfo* videoInfo = mInfo->GetAsVideoInfo();
+ if (videoInfo && MP4Decoder::IsH264(mInfo->mMimeType)) {
+ mType = kH264;
+ RefPtr<MediaByteBuffer> extraData = videoInfo->mExtraData;
+ SPSData spsdata;
+ if (H264::DecodeSPSFromExtraData(extraData, spsdata) &&
+ spsdata.pic_width > 0 && spsdata.pic_height > 0 &&
+ H264::EnsureSPSIsSane(spsdata)) {
+ videoInfo->mImage.width = spsdata.pic_width;
+ videoInfo->mImage.height = spsdata.pic_height;
+ videoInfo->mDisplay.width = spsdata.display_width;
+ videoInfo->mDisplay.height = spsdata.display_height;
+ }
+ } else {
+ if (videoInfo && VPXDecoder::IsVP9(mInfo->mMimeType)) {
+ mType = kVP9;
+ }
+ }
+}
+
+UniquePtr<TrackInfo> MP4TrackDemuxer::GetInfo() const { return mInfo->Clone(); }
+
+void MP4TrackDemuxer::EnsureUpToDateIndex() {
+ if (!mNeedReIndex) {
+ return;
+ }
+ AutoPinned<MediaResource> resource(mResource);
+ MediaByteRangeSet byteRanges;
+ nsresult rv = resource->GetCachedRanges(byteRanges);
+ if (NS_FAILED(rv)) {
+ return;
+ }
+ mIndex->UpdateMoofIndex(byteRanges);
+ mNeedReIndex = false;
+}
+
+RefPtr<MP4TrackDemuxer::SeekPromise> MP4TrackDemuxer::Seek(
+ const media::TimeUnit& aTime) {
+ auto seekTime = aTime;
+ mQueuedSample = nullptr;
+
+ mIterator->Seek(seekTime.ToMicroseconds());
+
+ // Check what time we actually seeked to.
+ do {
+ RefPtr<MediaRawData> sample = GetNextSample();
+ if (!sample) {
+ return SeekPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_END_OF_STREAM,
+ __func__);
+ }
+ if (!sample->Size()) {
+ // This sample can't be decoded, continue searching.
+ continue;
+ }
+ if (sample->mKeyframe) {
+ MOZ_DIAGNOSTIC_ASSERT(sample->HasValidTime());
+ mQueuedSample = sample;
+ seekTime = mQueuedSample->mTime;
+ }
+ } while (!mQueuedSample);
+
+ SetNextKeyFrameTime();
+
+ return SeekPromise::CreateAndResolve(seekTime, __func__);
+}
+
+already_AddRefed<MediaRawData> MP4TrackDemuxer::GetNextSample() {
+ RefPtr<MediaRawData> sample = mIterator->GetNext();
+ if (!sample) {
+ return nullptr;
+ }
+ if (mInfo->GetAsVideoInfo()) {
+ sample->mExtraData = mInfo->GetAsVideoInfo()->mExtraData;
+ if (mType == kH264 && !sample->mCrypto.IsEncrypted()) {
+ H264::FrameType type = H264::GetFrameType(sample);
+ switch (type) {
+ case H264::FrameType::I_FRAME:
+ [[fallthrough]];
+ case H264::FrameType::OTHER: {
+ bool keyframe = type == H264::FrameType::I_FRAME;
+ if (sample->mKeyframe != keyframe) {
+ NS_WARNING(nsPrintfCString("Frame incorrectly marked as %skeyframe "
+ "@ pts:%" PRId64 " dur:%" PRId64
+ " dts:%" PRId64,
+ keyframe ? "" : "non-",
+ sample->mTime.ToMicroseconds(),
+ sample->mDuration.ToMicroseconds(),
+ sample->mTimecode.ToMicroseconds())
+ .get());
+ sample->mKeyframe = keyframe;
+ }
+ break;
+ }
+ case H264::FrameType::INVALID:
+ NS_WARNING(nsPrintfCString("Invalid H264 frame @ pts:%" PRId64
+ " dur:%" PRId64 " dts:%" PRId64,
+ sample->mTime.ToMicroseconds(),
+ sample->mDuration.ToMicroseconds(),
+ sample->mTimecode.ToMicroseconds())
+ .get());
+ // We could reject the sample now, however demuxer errors are fatal.
+ // So we keep the invalid frame, relying on the H264 decoder to
+ // handle the error later.
+ // TODO: make demuxer errors non-fatal.
+ break;
+ }
+ } else if (mType == kVP9 && !sample->mCrypto.IsEncrypted()) {
+ bool keyframe = VPXDecoder::IsKeyframe(
+ Span<const uint8_t>(sample->Data(), sample->Size()),
+ VPXDecoder::Codec::VP9);
+ if (sample->mKeyframe != keyframe) {
+ NS_WARNING(nsPrintfCString(
+ "Frame incorrectly marked as %skeyframe "
+ "@ pts:%" PRId64 " dur:%" PRId64 " dts:%" PRId64,
+ keyframe ? "" : "non-", sample->mTime.ToMicroseconds(),
+ sample->mDuration.ToMicroseconds(),
+ sample->mTimecode.ToMicroseconds())
+ .get());
+ sample->mKeyframe = keyframe;
+ }
+ }
+ }
+
+ return sample.forget();
+}
+
+RefPtr<MP4TrackDemuxer::SamplesPromise> MP4TrackDemuxer::GetSamples(
+ int32_t aNumSamples) {
+ EnsureUpToDateIndex();
+ RefPtr<SamplesHolder> samples = new SamplesHolder;
+ if (!aNumSamples) {
+ return SamplesPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_DEMUXER_ERR,
+ __func__);
+ }
+
+ if (mQueuedSample) {
+ NS_ASSERTION(mQueuedSample->mKeyframe, "mQueuedSample must be a keyframe");
+ samples->AppendSample(mQueuedSample);
+ mQueuedSample = nullptr;
+ aNumSamples--;
+ }
+ RefPtr<MediaRawData> sample;
+ while (aNumSamples && (sample = GetNextSample())) {
+ if (!sample->Size()) {
+ continue;
+ }
+ MOZ_DIAGNOSTIC_ASSERT(sample->HasValidTime());
+ samples->AppendSample(sample);
+ aNumSamples--;
+ }
+
+ if (samples->GetSamples().IsEmpty()) {
+ return SamplesPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_END_OF_STREAM,
+ __func__);
+ }
+
+ if (mNextKeyframeTime.isNothing() ||
+ samples->GetSamples().LastElement()->mTime >= mNextKeyframeTime.value()) {
+ SetNextKeyFrameTime();
+ }
+ return SamplesPromise::CreateAndResolve(samples, __func__);
+}
+
+void MP4TrackDemuxer::SetNextKeyFrameTime() {
+ mNextKeyframeTime.reset();
+ Microseconds frameTime = mIterator->GetNextKeyframeTime();
+ if (frameTime != -1) {
+ mNextKeyframeTime.emplace(media::TimeUnit::FromMicroseconds(frameTime));
+ }
+}
+
+void MP4TrackDemuxer::Reset() {
+ mQueuedSample = nullptr;
+ // TODO, Seek to first frame available, which isn't always 0.
+ mIterator->Seek(0);
+ SetNextKeyFrameTime();
+}
+
+nsresult MP4TrackDemuxer::GetNextRandomAccessPoint(media::TimeUnit* aTime) {
+ if (mNextKeyframeTime.isNothing()) {
+ // There's no next key frame.
+ *aTime = media::TimeUnit::FromInfinity();
+ } else {
+ *aTime = mNextKeyframeTime.value();
+ }
+ return NS_OK;
+}
+
+RefPtr<MP4TrackDemuxer::SkipAccessPointPromise>
+MP4TrackDemuxer::SkipToNextRandomAccessPoint(
+ const media::TimeUnit& aTimeThreshold) {
+ mQueuedSample = nullptr;
+ // Loop until we reach the next keyframe after the threshold.
+ uint32_t parsed = 0;
+ bool found = false;
+ RefPtr<MediaRawData> sample;
+ while (!found && (sample = GetNextSample())) {
+ parsed++;
+ MOZ_DIAGNOSTIC_ASSERT(sample->HasValidTime());
+ if (sample->mKeyframe && sample->mTime >= aTimeThreshold) {
+ found = true;
+ mQueuedSample = sample;
+ }
+ }
+ SetNextKeyFrameTime();
+ if (found) {
+ return SkipAccessPointPromise::CreateAndResolve(parsed, __func__);
+ }
+ SkipFailureHolder failure(NS_ERROR_DOM_MEDIA_END_OF_STREAM, parsed);
+ return SkipAccessPointPromise::CreateAndReject(std::move(failure), __func__);
+}
+
+media::TimeIntervals MP4TrackDemuxer::GetBuffered() {
+ EnsureUpToDateIndex();
+ AutoPinned<MediaResource> resource(mResource);
+ MediaByteRangeSet byteRanges;
+ nsresult rv = resource->GetCachedRanges(byteRanges);
+
+ if (NS_FAILED(rv)) {
+ return media::TimeIntervals();
+ }
+
+ return mIndex->ConvertByteRangesToTimeRanges(byteRanges);
+}
+
+void MP4TrackDemuxer::NotifyDataArrived() { mNeedReIndex = true; }
+
+void MP4TrackDemuxer::NotifyDataRemoved() {
+ AutoPinned<MediaResource> resource(mResource);
+ MediaByteRangeSet byteRanges;
+ nsresult rv = resource->GetCachedRanges(byteRanges);
+ if (NS_FAILED(rv)) {
+ return;
+ }
+ mIndex->UpdateMoofIndex(byteRanges, true /* can evict */);
+ mNeedReIndex = false;
+}
+
+} // namespace mozilla
+
+#undef LOG
diff --git a/dom/media/mp4/MP4Demuxer.h b/dom/media/mp4/MP4Demuxer.h
new file mode 100644
index 0000000000..22fa5b137f
--- /dev/null
+++ b/dom/media/mp4/MP4Demuxer.h
@@ -0,0 +1,52 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#if !defined(MP4Demuxer_h_)
+# define MP4Demuxer_h_
+
+# include "mozilla/Maybe.h"
+# include "mozilla/Monitor.h"
+# include "MediaDataDemuxer.h"
+# include "MediaResource.h"
+
+namespace mozilla {
+class MP4TrackDemuxer;
+class ResourceStream;
+
+DDLoggedTypeDeclNameAndBase(MP4Demuxer, MediaDataDemuxer);
+
+class MP4Demuxer : public MediaDataDemuxer,
+ public DecoderDoctorLifeLogger<MP4Demuxer> {
+ public:
+ explicit MP4Demuxer(MediaResource* aResource);
+
+ RefPtr<InitPromise> Init() override;
+
+ uint32_t GetNumberTracks(TrackInfo::TrackType aType) const override;
+
+ already_AddRefed<MediaTrackDemuxer> GetTrackDemuxer(
+ TrackInfo::TrackType aType, uint32_t aTrackNumber) override;
+
+ bool IsSeekable() const override;
+
+ UniquePtr<EncryptionInfo> GetCrypto() override;
+
+ void NotifyDataArrived() override;
+
+ void NotifyDataRemoved() override;
+
+ private:
+ RefPtr<MediaResource> mResource;
+ RefPtr<ResourceStream> mStream;
+ AutoTArray<RefPtr<MP4TrackDemuxer>, 1> mAudioDemuxers;
+ AutoTArray<RefPtr<MP4TrackDemuxer>, 1> mVideoDemuxers;
+ nsTArray<uint8_t> mCryptoInitData;
+ bool mIsSeekable;
+};
+
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/mp4/MP4Interval.h b/dom/media/mp4/MP4Interval.h
new file mode 100644
index 0000000000..c7090a6253
--- /dev/null
+++ b/dom/media/mp4/MP4Interval.h
@@ -0,0 +1,131 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef INTERVAL_H_
+#define INTERVAL_H_
+
+#include "nsTArray.h"
+#include <algorithm>
+
+namespace mozilla {
+
+template <typename T>
+struct MP4Interval {
+ MP4Interval() : start(0), end(0) {}
+ MP4Interval(T aStart, T aEnd) : start(aStart), end(aEnd) {
+ MOZ_ASSERT(aStart <= aEnd);
+ }
+ T Length() { return end - start; }
+ MP4Interval Intersection(const MP4Interval& aOther) const {
+ T s = start > aOther.start ? start : aOther.start;
+ T e = end < aOther.end ? end : aOther.end;
+ if (s > e) {
+ return MP4Interval();
+ }
+ return MP4Interval(s, e);
+ }
+ bool Contains(const MP4Interval& aOther) const {
+ return aOther.start >= start && aOther.end <= end;
+ }
+ bool operator==(const MP4Interval& aOther) const {
+ return start == aOther.start && end == aOther.end;
+ }
+ bool operator!=(const MP4Interval& aOther) const {
+ return !(*this == aOther);
+ }
+ bool IsNull() const { return end == start; }
+ MP4Interval Extents(const MP4Interval& aOther) const {
+ if (IsNull()) {
+ return aOther;
+ }
+ return MP4Interval(std::min(start, aOther.start),
+ std::max(end, aOther.end));
+ }
+
+ T start;
+ T end;
+
+ static void SemiNormalAppend(nsTArray<MP4Interval<T>>& aIntervals,
+ MP4Interval<T> aMP4Interval) {
+ if (!aIntervals.IsEmpty() &&
+ aIntervals.LastElement().end == aMP4Interval.start) {
+ aIntervals.LastElement().end = aMP4Interval.end;
+ } else {
+ aIntervals.AppendElement(aMP4Interval);
+ }
+ }
+
+ static void Normalize(const nsTArray<MP4Interval<T>>& aIntervals,
+ nsTArray<MP4Interval<T>>* aNormalized) {
+ if (!aNormalized || !aIntervals.Length()) {
+ MOZ_ASSERT(aNormalized);
+ return;
+ }
+ MOZ_ASSERT(aNormalized->IsEmpty());
+
+ nsTArray<MP4Interval<T>> sorted = aIntervals.Clone();
+ sorted.Sort(Compare());
+
+ MP4Interval<T> current = sorted[0];
+ for (size_t i = 1; i < sorted.Length(); i++) {
+ MOZ_ASSERT(sorted[i].start <= sorted[i].end);
+ if (current.Contains(sorted[i])) {
+ continue;
+ }
+ if (current.end >= sorted[i].start) {
+ current.end = sorted[i].end;
+ } else {
+ aNormalized->AppendElement(current);
+ current = sorted[i];
+ }
+ }
+ aNormalized->AppendElement(current);
+ }
+
+ static void Intersection(const nsTArray<MP4Interval<T>>& a0,
+ const nsTArray<MP4Interval<T>>& a1,
+ nsTArray<MP4Interval<T>>* aIntersection) {
+ MOZ_ASSERT(IsNormalized(a0));
+ MOZ_ASSERT(IsNormalized(a1));
+ size_t i0 = 0;
+ size_t i1 = 0;
+ while (i0 < a0.Length() && i1 < a1.Length()) {
+ MP4Interval i = a0[i0].Intersection(a1[i1]);
+ if (i.Length()) {
+ aIntersection->AppendElement(i);
+ }
+ if (a0[i0].end < a1[i1].end) {
+ i0++;
+ // Assert that the array is sorted
+ MOZ_ASSERT(i0 == a0.Length() || a0[i0 - 1].start < a0[i0].start);
+ } else {
+ i1++;
+ // Assert that the array is sorted
+ MOZ_ASSERT(i1 == a1.Length() || a1[i1 - 1].start < a1[i1].start);
+ }
+ }
+ }
+
+ static bool IsNormalized(const nsTArray<MP4Interval<T>>& aIntervals) {
+ for (size_t i = 1; i < aIntervals.Length(); i++) {
+ if (aIntervals[i - 1].end >= aIntervals[i].start) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ struct Compare {
+ bool Equals(const MP4Interval<T>& a0, const MP4Interval<T>& a1) const {
+ return a0.start == a1.start && a0.end == a1.end;
+ }
+
+ bool LessThan(const MP4Interval<T>& a0, const MP4Interval<T>& a1) const {
+ return a0.start < a1.start;
+ }
+ };
+};
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/mp4/MP4Metadata.cpp b/dom/media/mp4/MP4Metadata.cpp
new file mode 100644
index 0000000000..872bd299e9
--- /dev/null
+++ b/dom/media/mp4/MP4Metadata.cpp
@@ -0,0 +1,478 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/Assertions.h"
+#include "mozilla/CheckedInt.h"
+#include "mozilla/EndianUtils.h"
+#include "mozilla/Logging.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/Telemetry.h"
+#include "mozilla/UniquePtr.h"
+#include "VideoUtils.h"
+#include "MoofParser.h"
+#include "MP4Metadata.h"
+#include "ByteStream.h"
+#include "mp4parse.h"
+
+#include <limits>
+#include <stdint.h>
+#include <vector>
+
+using mozilla::media::TimeUnit;
+
+namespace mozilla {
+LazyLogModule gMP4MetadataLog("MP4Metadata");
+
+IndiceWrapper::IndiceWrapper(Mp4parseByteData& aIndice) {
+ mIndice.data = nullptr;
+ mIndice.length = aIndice.length;
+ mIndice.indices = aIndice.indices;
+}
+
+size_t IndiceWrapper::Length() const { return mIndice.length; }
+
+bool IndiceWrapper::GetIndice(size_t aIndex, Index::Indice& aIndice) const {
+ if (aIndex >= mIndice.length) {
+ MOZ_LOG(gMP4MetadataLog, LogLevel::Error, ("Index overflow in indice"));
+ return false;
+ }
+
+ const Mp4parseIndice* indice = &mIndice.indices[aIndex];
+ aIndice.start_offset = indice->start_offset;
+ aIndice.end_offset = indice->end_offset;
+ aIndice.start_composition = indice->start_composition;
+ aIndice.end_composition = indice->end_composition;
+ aIndice.start_decode = indice->start_decode;
+ aIndice.sync = indice->sync;
+ return true;
+}
+
+static const char* TrackTypeToString(mozilla::TrackInfo::TrackType aType) {
+ switch (aType) {
+ case mozilla::TrackInfo::kAudioTrack:
+ return "audio";
+ case mozilla::TrackInfo::kVideoTrack:
+ return "video";
+ default:
+ return "unknown";
+ }
+}
+
+bool StreamAdaptor::Read(uint8_t* buffer, uintptr_t size, size_t* bytes_read) {
+ if (!mOffset.isValid()) {
+ MOZ_LOG(gMP4MetadataLog, LogLevel::Error,
+ ("Overflow in source stream offset"));
+ return false;
+ }
+ bool rv = mSource->ReadAt(mOffset.value(), buffer, size, bytes_read);
+ if (rv) {
+ mOffset += *bytes_read;
+ }
+ return rv;
+}
+
+// Wrapper to allow rust to call our read adaptor.
+static intptr_t read_source(uint8_t* buffer, uintptr_t size, void* userdata) {
+ MOZ_ASSERT(buffer);
+ MOZ_ASSERT(userdata);
+
+ auto source = reinterpret_cast<StreamAdaptor*>(userdata);
+ size_t bytes_read = 0;
+ bool rv = source->Read(buffer, size, &bytes_read);
+ if (!rv) {
+ MOZ_LOG(gMP4MetadataLog, LogLevel::Warning, ("Error reading source data"));
+ return -1;
+ }
+ return bytes_read;
+}
+
+MP4Metadata::MP4Metadata(ByteStream* aSource)
+ : mSource(aSource), mSourceAdaptor(aSource) {
+ DDLINKCHILD("source", aSource);
+}
+
+MP4Metadata::~MP4Metadata() = default;
+
+nsresult MP4Metadata::Parse() {
+ Mp4parseIo io = {read_source, &mSourceAdaptor};
+ Mp4parseParser* parser = nullptr;
+ Mp4parseStatus status = mp4parse_new(&io, &parser);
+ if (status == MP4PARSE_STATUS_OK && parser) {
+ mParser.reset(parser);
+ MOZ_ASSERT(mParser);
+ } else {
+ MOZ_ASSERT(!mParser);
+ MOZ_LOG(gMP4MetadataLog, LogLevel::Debug,
+ ("Parse failed, return code %d\n", status));
+ return status == MP4PARSE_STATUS_OOM ? NS_ERROR_OUT_OF_MEMORY
+ : NS_ERROR_DOM_MEDIA_METADATA_ERR;
+ }
+
+ UpdateCrypto();
+
+ return NS_OK;
+}
+
+void MP4Metadata::UpdateCrypto() {
+ Mp4parsePsshInfo info = {};
+ if (mp4parse_get_pssh_info(mParser.get(), &info) != MP4PARSE_STATUS_OK) {
+ return;
+ }
+
+ if (info.data.length == 0) {
+ return;
+ }
+
+ mCrypto.Update(info.data.data, info.data.length);
+}
+
+bool TrackTypeEqual(TrackInfo::TrackType aLHS, Mp4parseTrackType aRHS) {
+ switch (aLHS) {
+ case TrackInfo::kAudioTrack:
+ return aRHS == MP4PARSE_TRACK_TYPE_AUDIO;
+ case TrackInfo::kVideoTrack:
+ return aRHS == MP4PARSE_TRACK_TYPE_VIDEO;
+ default:
+ return false;
+ }
+}
+
+MP4Metadata::ResultAndTrackCount MP4Metadata::GetNumberTracks(
+ mozilla::TrackInfo::TrackType aType) const {
+ uint32_t tracks;
+ auto rv = mp4parse_get_track_count(mParser.get(), &tracks);
+ if (rv != MP4PARSE_STATUS_OK) {
+ MOZ_LOG(gMP4MetadataLog, LogLevel::Warning,
+ ("rust parser error %d counting tracks", rv));
+ return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+ RESULT_DETAIL("Rust parser error %d", rv)),
+ MP4Metadata::NumberTracksError()};
+ }
+
+ uint32_t total = 0;
+ for (uint32_t i = 0; i < tracks; ++i) {
+ Mp4parseTrackInfo track_info;
+ rv = mp4parse_get_track_info(mParser.get(), i, &track_info);
+ if (rv != MP4PARSE_STATUS_OK) {
+ continue;
+ }
+
+ if (track_info.track_type == MP4PARSE_TRACK_TYPE_AUDIO) {
+ Mp4parseTrackAudioInfo audio;
+ auto rv = mp4parse_get_track_audio_info(mParser.get(), i, &audio);
+ if (rv != MP4PARSE_STATUS_OK) {
+ MOZ_LOG(gMP4MetadataLog, LogLevel::Warning,
+ ("mp4parse_get_track_audio_info returned error %d", rv));
+ continue;
+ }
+ MOZ_DIAGNOSTIC_ASSERT(audio.sample_info_count > 0,
+ "Must have at least one audio sample info");
+ if (audio.sample_info_count == 0) {
+ return {
+ MediaResult(
+ NS_ERROR_DOM_MEDIA_METADATA_ERR,
+ RESULT_DETAIL(
+ "Got 0 audio sample info while checking number tracks")),
+ MP4Metadata::NumberTracksError()};
+ }
+ // We assume the codec of the first sample info is representative of the
+ // whole track and skip it if we don't recognize the codec.
+ if (audio.sample_info[0].codec_type == MP4PARSE_CODEC_UNKNOWN) {
+ continue;
+ }
+ } else if (track_info.track_type == MP4PARSE_TRACK_TYPE_VIDEO) {
+ Mp4parseTrackVideoInfo video;
+ auto rv = mp4parse_get_track_video_info(mParser.get(), i, &video);
+ if (rv != MP4PARSE_STATUS_OK) {
+ MOZ_LOG(gMP4MetadataLog, LogLevel::Warning,
+ ("mp4parse_get_track_video_info returned error %d", rv));
+ continue;
+ }
+ MOZ_DIAGNOSTIC_ASSERT(video.sample_info_count > 0,
+ "Must have at least one video sample info");
+ if (video.sample_info_count == 0) {
+ return {
+ MediaResult(
+ NS_ERROR_DOM_MEDIA_METADATA_ERR,
+ RESULT_DETAIL(
+ "Got 0 video sample info while checking number tracks")),
+ MP4Metadata::NumberTracksError()};
+ }
+ // We assume the codec of the first sample info is representative of the
+ // whole track and skip it if we don't recognize the codec.
+ if (video.sample_info[0].codec_type == MP4PARSE_CODEC_UNKNOWN) {
+ continue;
+ }
+ } else {
+ // Only audio and video are supported
+ continue;
+ }
+ if (TrackTypeEqual(aType, track_info.track_type)) {
+ total += 1;
+ }
+ }
+
+ MOZ_LOG(gMP4MetadataLog, LogLevel::Info,
+ ("%s tracks found: %u", TrackTypeToString(aType), total));
+
+ return {NS_OK, total};
+}
+
+Maybe<uint32_t> MP4Metadata::TrackTypeToGlobalTrackIndex(
+ mozilla::TrackInfo::TrackType aType, size_t aTrackNumber) const {
+ uint32_t tracks;
+ auto rv = mp4parse_get_track_count(mParser.get(), &tracks);
+ if (rv != MP4PARSE_STATUS_OK) {
+ return Nothing();
+ }
+
+ /* The MP4Metadata API uses a per-TrackType index of tracks, but mp4parse
+ (and libstagefright) use a global track index. Convert the index by
+ counting the tracks of the requested type and returning the global
+ track index when a match is found. */
+ uint32_t perType = 0;
+ for (uint32_t i = 0; i < tracks; ++i) {
+ Mp4parseTrackInfo track_info;
+ rv = mp4parse_get_track_info(mParser.get(), i, &track_info);
+ if (rv != MP4PARSE_STATUS_OK) {
+ continue;
+ }
+ if (TrackTypeEqual(aType, track_info.track_type)) {
+ if (perType == aTrackNumber) {
+ return Some(i);
+ }
+ perType += 1;
+ }
+ }
+
+ return Nothing();
+}
+
+MP4Metadata::ResultAndTrackInfo MP4Metadata::GetTrackInfo(
+ mozilla::TrackInfo::TrackType aType, size_t aTrackNumber) const {
+ Maybe<uint32_t> trackIndex = TrackTypeToGlobalTrackIndex(aType, aTrackNumber);
+ if (trackIndex.isNothing()) {
+ return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+ RESULT_DETAIL("No %s tracks", TrackTypeToStr(aType))),
+ nullptr};
+ }
+
+ Mp4parseTrackInfo info;
+ auto rv = mp4parse_get_track_info(mParser.get(), trackIndex.value(), &info);
+ if (rv != MP4PARSE_STATUS_OK) {
+ MOZ_LOG(gMP4MetadataLog, LogLevel::Warning,
+ ("mp4parse_get_track_info returned %d", rv));
+ return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+ RESULT_DETAIL("Cannot find %s track #%zu",
+ TrackTypeToStr(aType), aTrackNumber)),
+ nullptr};
+ }
+#ifdef DEBUG
+ bool haveSampleInfo = false;
+ const char* codecString = "unrecognized";
+ Mp4parseCodec codecType = MP4PARSE_CODEC_UNKNOWN;
+ if (info.track_type == MP4PARSE_TRACK_TYPE_AUDIO) {
+ Mp4parseTrackAudioInfo audio;
+ auto rv = mp4parse_get_track_audio_info(mParser.get(), trackIndex.value(),
+ &audio);
+ if (rv == MP4PARSE_STATUS_OK && audio.sample_info_count > 0) {
+ codecType = audio.sample_info[0].codec_type;
+ haveSampleInfo = true;
+ }
+ } else if (info.track_type == MP4PARSE_TRACK_TYPE_VIDEO) {
+ Mp4parseTrackVideoInfo video;
+ auto rv = mp4parse_get_track_video_info(mParser.get(), trackIndex.value(),
+ &video);
+ if (rv == MP4PARSE_STATUS_OK && video.sample_info_count > 0) {
+ codecType = video.sample_info[0].codec_type;
+ haveSampleInfo = true;
+ }
+ }
+ if (haveSampleInfo) {
+ switch (codecType) {
+ case MP4PARSE_CODEC_UNKNOWN:
+ codecString = "unknown";
+ break;
+ case MP4PARSE_CODEC_AAC:
+ codecString = "aac";
+ break;
+ case MP4PARSE_CODEC_OPUS:
+ codecString = "opus";
+ break;
+ case MP4PARSE_CODEC_FLAC:
+ codecString = "flac";
+ break;
+ case MP4PARSE_CODEC_ALAC:
+ codecString = "alac";
+ break;
+ case MP4PARSE_CODEC_H263:
+ codecString = "h.263";
+ break;
+ case MP4PARSE_CODEC_AVC:
+ codecString = "h.264";
+ break;
+ case MP4PARSE_CODEC_VP9:
+ codecString = "vp9";
+ break;
+ case MP4PARSE_CODEC_AV1:
+ codecString = "av1";
+ break;
+ case MP4PARSE_CODEC_MP3:
+ codecString = "mp3";
+ break;
+ case MP4PARSE_CODEC_MP4V:
+ codecString = "mp4v";
+ break;
+ case MP4PARSE_CODEC_JPEG:
+ codecString = "jpeg";
+ break;
+ case MP4PARSE_CODEC_AC3:
+ codecString = "ac-3";
+ break;
+ case MP4PARSE_CODEC_EC3:
+ codecString = "ec-3";
+ break;
+ }
+ }
+ MOZ_LOG(gMP4MetadataLog, LogLevel::Debug,
+ ("track codec %s (%u)\n", codecString, codecType));
+#endif
+
+ // This specialization interface is wild.
+ UniquePtr<mozilla::TrackInfo> e;
+ switch (aType) {
+ case TrackInfo::TrackType::kAudioTrack: {
+ Mp4parseTrackAudioInfo audio;
+ auto rv = mp4parse_get_track_audio_info(mParser.get(), trackIndex.value(),
+ &audio);
+ if (rv != MP4PARSE_STATUS_OK) {
+ MOZ_LOG(gMP4MetadataLog, LogLevel::Warning,
+ ("mp4parse_get_track_audio_info returned error %d", rv));
+ return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+ RESULT_DETAIL("Cannot parse %s track #%zu",
+ TrackTypeToStr(aType), aTrackNumber)),
+ nullptr};
+ }
+ auto track = mozilla::MakeUnique<MP4AudioInfo>();
+ MediaResult updateStatus = track->Update(&info, &audio);
+ if (NS_FAILED(updateStatus)) {
+ MOZ_LOG(gMP4MetadataLog, LogLevel::Warning,
+ ("Updating audio track failed with %s",
+ updateStatus.Message().get()));
+ return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+ RESULT_DETAIL(
+ "Failed to update %s track #%zu with error: %s",
+ TrackTypeToStr(aType), aTrackNumber,
+ updateStatus.Message().get())),
+ nullptr};
+ }
+ e = std::move(track);
+ } break;
+ case TrackInfo::TrackType::kVideoTrack: {
+ Mp4parseTrackVideoInfo video;
+ auto rv = mp4parse_get_track_video_info(mParser.get(), trackIndex.value(),
+ &video);
+ if (rv != MP4PARSE_STATUS_OK) {
+ MOZ_LOG(gMP4MetadataLog, LogLevel::Warning,
+ ("mp4parse_get_track_video_info returned error %d", rv));
+ return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+ RESULT_DETAIL("Cannot parse %s track #%zu",
+ TrackTypeToStr(aType), aTrackNumber)),
+ nullptr};
+ }
+ auto track = mozilla::MakeUnique<MP4VideoInfo>();
+ MediaResult updateStatus = track->Update(&info, &video);
+ if (NS_FAILED(updateStatus)) {
+ MOZ_LOG(gMP4MetadataLog, LogLevel::Warning,
+ ("Updating video track failed with %s",
+ updateStatus.Message().get()));
+ return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+ RESULT_DETAIL(
+ "Failed to update %s track #%zu with error: %s",
+ TrackTypeToStr(aType), aTrackNumber,
+ updateStatus.Message().get())),
+ nullptr};
+ }
+ e = std::move(track);
+ } break;
+ default:
+ MOZ_LOG(gMP4MetadataLog, LogLevel::Warning,
+ ("unhandled track type %d", aType));
+ return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+ RESULT_DETAIL("Cannot handle %s track #%zu",
+ TrackTypeToStr(aType), aTrackNumber)),
+ nullptr};
+ }
+
+ // No duration in track, use fragment_duration.
+ if (e && !e->mDuration.IsPositive()) {
+ Mp4parseFragmentInfo info;
+ auto rv = mp4parse_get_fragment_info(mParser.get(), &info);
+ if (rv == MP4PARSE_STATUS_OK) {
+ e->mDuration = TimeUnit::FromMicroseconds(info.fragment_duration);
+ }
+ }
+
+ if (e && e->IsValid()) {
+ return {NS_OK, std::move(e)};
+ }
+ MOZ_LOG(gMP4MetadataLog, LogLevel::Debug, ("TrackInfo didn't validate"));
+
+ return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+ RESULT_DETAIL("Invalid %s track #%zu",
+ TrackTypeToStr(aType), aTrackNumber)),
+ nullptr};
+}
+
+bool MP4Metadata::CanSeek() const { return true; }
+
+MP4Metadata::ResultAndCryptoFile MP4Metadata::Crypto() const {
+ return {NS_OK, &mCrypto};
+}
+
+MP4Metadata::ResultAndIndice MP4Metadata::GetTrackIndice(uint32_t aTrackId) {
+ Mp4parseByteData indiceRawData = {};
+
+ uint8_t fragmented = false;
+ auto rv = mp4parse_is_fragmented(mParser.get(), aTrackId, &fragmented);
+ if (rv != MP4PARSE_STATUS_OK) {
+ return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+ RESULT_DETAIL("Cannot parse whether track id %u is "
+ "fragmented, mp4parse_error=%d",
+ aTrackId, int(rv))),
+ nullptr};
+ }
+
+ if (!fragmented) {
+ rv = mp4parse_get_indice_table(mParser.get(), aTrackId, &indiceRawData);
+ if (rv != MP4PARSE_STATUS_OK) {
+ return {
+ MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+ RESULT_DETAIL("Cannot parse index table in track id %u, "
+ "mp4parse_error=%d",
+ aTrackId, int(rv))),
+ nullptr};
+ }
+ }
+
+ UniquePtr<IndiceWrapper> indice;
+ indice = mozilla::MakeUnique<IndiceWrapper>(indiceRawData);
+
+ return {NS_OK, std::move(indice)};
+}
+
+/*static*/ MP4Metadata::ResultAndByteBuffer MP4Metadata::Metadata(
+ ByteStream* aSource) {
+ auto parser = mozilla::MakeUnique<MoofParser>(
+ aSource, AsVariant(ParseAllTracks{}), false);
+ RefPtr<mozilla::MediaByteBuffer> buffer = parser->Metadata();
+ if (!buffer) {
+ return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR,
+ RESULT_DETAIL("Cannot parse metadata")),
+ nullptr};
+ }
+ return {NS_OK, std::move(buffer)};
+}
+
+} // namespace mozilla
diff --git a/dom/media/mp4/MP4Metadata.h b/dom/media/mp4/MP4Metadata.h
new file mode 100644
index 0000000000..df1e39ff1c
--- /dev/null
+++ b/dom/media/mp4/MP4Metadata.h
@@ -0,0 +1,116 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MP4METADATA_H_
+#define MP4METADATA_H_
+
+#include <type_traits>
+
+#include "mozilla/UniquePtr.h"
+#include "DecoderData.h"
+#include "Index.h"
+#include "MediaData.h"
+#include "MediaInfo.h"
+#include "MediaResult.h"
+#include "ByteStream.h"
+#include "mp4parse.h"
+
+namespace mozilla {
+
+DDLoggedTypeDeclName(MP4Metadata);
+
+// The memory owner in mIndice.indices is rust mp4 parser, so lifetime of this
+// class SHOULD NOT longer than rust parser.
+class IndiceWrapper {
+ public:
+ size_t Length() const;
+
+ bool GetIndice(size_t aIndex, Index::Indice& aIndice) const;
+
+ explicit IndiceWrapper(Mp4parseByteData& aRustIndice);
+
+ protected:
+ Mp4parseByteData mIndice;
+};
+
+struct FreeMP4Parser {
+ void operator()(Mp4parseParser* aPtr) { mp4parse_free(aPtr); }
+};
+
+// Wrap an Stream to remember the read offset.
+class StreamAdaptor {
+ public:
+ explicit StreamAdaptor(ByteStream* aSource) : mSource(aSource), mOffset(0) {}
+
+ ~StreamAdaptor() = default;
+
+ bool Read(uint8_t* buffer, uintptr_t size, size_t* bytes_read);
+
+ private:
+ ByteStream* mSource;
+ CheckedInt<size_t> mOffset;
+};
+
+class MP4Metadata : public DecoderDoctorLifeLogger<MP4Metadata> {
+ public:
+ explicit MP4Metadata(ByteStream* aSource);
+ ~MP4Metadata();
+
+ // Simple template class containing a MediaResult and another type.
+ template <typename T>
+ class ResultAndType {
+ public:
+ template <typename M2, typename T2>
+ ResultAndType(M2&& aM, T2&& aT)
+ : mResult(std::forward<M2>(aM)), mT(std::forward<T2>(aT)) {}
+ ResultAndType(const ResultAndType&) = default;
+ ResultAndType& operator=(const ResultAndType&) = default;
+ ResultAndType(ResultAndType&&) = default;
+ ResultAndType& operator=(ResultAndType&&) = default;
+
+ mozilla::MediaResult& Result() { return mResult; }
+ T& Ref() { return mT; }
+
+ private:
+ mozilla::MediaResult mResult;
+ std::decay_t<T> mT;
+ };
+
+ using ResultAndByteBuffer = ResultAndType<RefPtr<mozilla::MediaByteBuffer>>;
+ static ResultAndByteBuffer Metadata(ByteStream* aSource);
+
+ static constexpr uint32_t NumberTracksError() { return UINT32_MAX; }
+ using ResultAndTrackCount = ResultAndType<uint32_t>;
+ ResultAndTrackCount GetNumberTracks(
+ mozilla::TrackInfo::TrackType aType) const;
+
+ using ResultAndTrackInfo =
+ ResultAndType<mozilla::UniquePtr<mozilla::TrackInfo>>;
+ ResultAndTrackInfo GetTrackInfo(mozilla::TrackInfo::TrackType aType,
+ size_t aTrackNumber) const;
+
+ bool CanSeek() const;
+
+ using ResultAndCryptoFile = ResultAndType<const CryptoFile*>;
+ ResultAndCryptoFile Crypto() const;
+
+ using ResultAndIndice = ResultAndType<mozilla::UniquePtr<IndiceWrapper>>;
+ ResultAndIndice GetTrackIndice(uint32_t aTrackId);
+
+ nsresult Parse();
+
+ private:
+ void UpdateCrypto();
+ Maybe<uint32_t> TrackTypeToGlobalTrackIndex(
+ mozilla::TrackInfo::TrackType aType, size_t aTrackNumber) const;
+
+ CryptoFile mCrypto;
+ RefPtr<ByteStream> mSource;
+ StreamAdaptor mSourceAdaptor;
+ mozilla::UniquePtr<Mp4parseParser, FreeMP4Parser> mParser;
+};
+
+} // namespace mozilla
+
+#endif // MP4METADATA_H_
diff --git a/dom/media/mp4/MoofParser.cpp b/dom/media/mp4/MoofParser.cpp
new file mode 100644
index 0000000000..ee73c10c22
--- /dev/null
+++ b/dom/media/mp4/MoofParser.cpp
@@ -0,0 +1,1276 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "MoofParser.h"
+#include "Box.h"
+#include "SinfParser.h"
+#include <limits>
+#include "MP4Interval.h"
+
+#include "mozilla/CheckedInt.h"
+#include "mozilla/HelperMacros.h"
+#include "mozilla/Logging.h"
+
+#if defined(MOZ_FMP4)
+extern mozilla::LogModule* GetDemuxerLog();
+
+# define LOG_ERROR(name, arg, ...) \
+ MOZ_LOG( \
+ GetDemuxerLog(), mozilla::LogLevel::Error, \
+ (MOZ_STRINGIFY(name) "(%p)::%s: " arg, this, __func__, ##__VA_ARGS__))
+# define LOG_WARN(name, arg, ...) \
+ MOZ_LOG( \
+ GetDemuxerLog(), mozilla::LogLevel::Warning, \
+ (MOZ_STRINGIFY(name) "(%p)::%s: " arg, this, __func__, ##__VA_ARGS__))
+# define LOG_DEBUG(name, arg, ...) \
+ MOZ_LOG( \
+ GetDemuxerLog(), mozilla::LogLevel::Debug, \
+ (MOZ_STRINGIFY(name) "(%p)::%s: " arg, this, __func__, ##__VA_ARGS__))
+
+#else
+# define LOG_ERROR(...)
+# define LOG_WARN(...)
+# define LOG_DEBUG(...)
+#endif
+
+namespace mozilla {
+
+const uint32_t kKeyIdSize = 16;
+
+// We ensure there are no gaps in samples' CTS between the last sample in a
+// Moof, and the first sample in the next Moof, if they're within these many
+// Microseconds of each other.
+const Microseconds CROSS_MOOF_CTS_MERGE_THRESHOLD = 1;
+
+bool MoofParser::RebuildFragmentedIndex(const MediaByteRangeSet& aByteRanges) {
+ BoxContext context(mSource, aByteRanges);
+ return RebuildFragmentedIndex(context);
+}
+
+bool MoofParser::RebuildFragmentedIndex(const MediaByteRangeSet& aByteRanges,
+ bool* aCanEvict) {
+ MOZ_ASSERT(aCanEvict);
+ if (*aCanEvict && mMoofs.Length() > 1) {
+ MOZ_ASSERT(mMoofs.Length() == mMediaRanges.Length());
+ mMoofs.RemoveElementsAt(0, mMoofs.Length() - 1);
+ mMediaRanges.RemoveElementsAt(0, mMediaRanges.Length() - 1);
+ *aCanEvict = true;
+ } else {
+ *aCanEvict = false;
+ }
+ return RebuildFragmentedIndex(aByteRanges);
+}
+
+bool MoofParser::RebuildFragmentedIndex(BoxContext& aContext) {
+ LOG_DEBUG(
+ Moof,
+ "Starting, mTrackParseMode=%s, track#=%" PRIu32
+ " (ignore if multitrack).",
+ mTrackParseMode.is<ParseAllTracks>() ? "multitrack" : "single track",
+ mTrackParseMode.is<ParseAllTracks>() ? 0
+ : mTrackParseMode.as<uint32_t>());
+ bool foundValidMoof = false;
+
+ for (Box box(&aContext, mOffset); box.IsAvailable(); box = box.Next()) {
+ if (box.IsType("moov") && mInitRange.IsEmpty()) {
+ mInitRange = MediaByteRange(0, box.Range().mEnd);
+ ParseMoov(box);
+ } else if (box.IsType("moof")) {
+ Moof moof(box, mTrackParseMode, mTrex, mMvhd, mMdhd, mEdts, mSinf,
+ &mLastDecodeTime, mIsAudio, mTracksEndCts);
+
+ if (!moof.IsValid() && !box.Next().IsAvailable()) {
+ // Moof isn't valid abort search for now.
+ LOG_WARN(Moof,
+ "Could not find valid moof, moof may not be complete yet.");
+ break;
+ }
+
+ if (!mMoofs.IsEmpty()) {
+ // Stitch time ranges together in the case of a (hopefully small) time
+ // range gap between moofs.
+ mMoofs.LastElement().FixRounding(moof);
+ }
+
+ mMediaRanges.AppendElement(moof.mRange);
+ mMoofs.AppendElement(std::move(moof));
+ foundValidMoof = true;
+ } else if (box.IsType("mdat") && !Moofs().IsEmpty()) {
+ // Check if we have all our data from last moof.
+ Moof& moof = Moofs().LastElement();
+ media::Interval<int64_t> datarange(moof.mMdatRange.mStart,
+ moof.mMdatRange.mEnd, 0);
+ media::Interval<int64_t> mdat(box.Range().mStart, box.Range().mEnd, 0);
+ if (datarange.Intersects(mdat)) {
+ mMediaRanges.LastElement() =
+ mMediaRanges.LastElement().Span(box.Range());
+ }
+ }
+ mOffset = box.NextOffset();
+ }
+ MOZ_ASSERT(mTrackParseMode.is<ParseAllTracks>() ||
+ mTrex.mTrackId == mTrackParseMode.as<uint32_t>(),
+ "If not parsing all tracks, mTrex should have the same track id "
+ "as the track being parsed.");
+ LOG_DEBUG(Moof, "Done, foundValidMoof=%s.",
+ foundValidMoof ? "true" : "false");
+ return foundValidMoof;
+}
+
+MediaByteRange MoofParser::FirstCompleteMediaHeader() {
+ if (Moofs().IsEmpty()) {
+ return MediaByteRange();
+ }
+ return Moofs()[0].mRange;
+}
+
+MediaByteRange MoofParser::FirstCompleteMediaSegment() {
+ for (uint32_t i = 0; i < mMediaRanges.Length(); i++) {
+ if (mMediaRanges[i].Contains(Moofs()[i].mMdatRange)) {
+ return mMediaRanges[i];
+ }
+ }
+ return MediaByteRange();
+}
+
+DDLoggedTypeDeclNameAndBase(BlockingStream, ByteStream);
+
+class BlockingStream : public ByteStream,
+ public DecoderDoctorLifeLogger<BlockingStream> {
+ public:
+ explicit BlockingStream(ByteStream* aStream) : mStream(aStream) {
+ DDLINKCHILD("stream", aStream);
+ }
+
+ bool ReadAt(int64_t offset, void* data, size_t size,
+ size_t* bytes_read) override {
+ return mStream->ReadAt(offset, data, size, bytes_read);
+ }
+
+ bool CachedReadAt(int64_t offset, void* data, size_t size,
+ size_t* bytes_read) override {
+ return mStream->ReadAt(offset, data, size, bytes_read);
+ }
+
+ virtual bool Length(int64_t* size) override { return mStream->Length(size); }
+
+ private:
+ RefPtr<ByteStream> mStream;
+};
+
+bool MoofParser::BlockingReadNextMoof() {
+ LOG_DEBUG(Moof, "Starting.");
+ int64_t length = std::numeric_limits<int64_t>::max();
+ mSource->Length(&length);
+ RefPtr<BlockingStream> stream = new BlockingStream(mSource);
+ MediaByteRangeSet byteRanges(MediaByteRange(0, length));
+
+ BoxContext context(stream, byteRanges);
+ for (Box box(&context, mOffset); box.IsAvailable(); box = box.Next()) {
+ if (box.IsType("moof")) {
+ MediaByteRangeSet parseByteRanges(
+ MediaByteRange(mOffset, box.Range().mEnd));
+ BoxContext parseContext(stream, parseByteRanges);
+ if (RebuildFragmentedIndex(parseContext)) {
+ LOG_DEBUG(Moof, "Succeeded on RebuildFragmentedIndex, returning true.");
+ return true;
+ }
+ }
+ }
+ LOG_DEBUG(Moof, "Couldn't read next moof, returning false.");
+ return false;
+}
+
+void MoofParser::ScanForMetadata(mozilla::MediaByteRange& aMoov) {
+ LOG_DEBUG(Moof, "Starting.");
+ int64_t length = std::numeric_limits<int64_t>::max();
+ mSource->Length(&length);
+ MediaByteRangeSet byteRanges;
+ byteRanges += MediaByteRange(0, length);
+ RefPtr<BlockingStream> stream = new BlockingStream(mSource);
+
+ BoxContext context(stream, byteRanges);
+ for (Box box(&context, mOffset); box.IsAvailable(); box = box.Next()) {
+ if (box.IsType("moov")) {
+ aMoov = box.Range();
+ break;
+ }
+ }
+ mInitRange = aMoov;
+ LOG_DEBUG(Moof,
+ "Done, mInitRange.mStart=%" PRIi64 ", mInitRange.mEnd=%" PRIi64,
+ mInitRange.mStart, mInitRange.mEnd);
+}
+
+already_AddRefed<mozilla::MediaByteBuffer> MoofParser::Metadata() {
+ LOG_DEBUG(Moof, "Starting.");
+ MediaByteRange moov;
+ ScanForMetadata(moov);
+ CheckedInt<MediaByteBuffer::size_type> moovLength = moov.Length();
+ if (!moovLength.isValid() || !moovLength.value()) {
+ // No moov, or cannot be used as array size.
+ LOG_WARN(Moof,
+ "Did not get usable moov length while trying to parse Metadata.");
+ return nullptr;
+ }
+
+ RefPtr<MediaByteBuffer> metadata = new MediaByteBuffer();
+ if (!metadata->SetLength(moovLength.value(), fallible)) {
+ LOG_ERROR(Moof, "OOM");
+ return nullptr;
+ }
+
+ RefPtr<BlockingStream> stream = new BlockingStream(mSource);
+ size_t read;
+ bool rv = stream->ReadAt(moov.mStart, metadata->Elements(),
+ moovLength.value(), &read);
+ if (!rv || read != moovLength.value()) {
+ LOG_WARN(Moof, "Failed to read moov while trying to parse Metadata.");
+ return nullptr;
+ }
+ LOG_DEBUG(Moof, "Done, found metadata.");
+ return metadata.forget();
+}
+
+MP4Interval<Microseconds> MoofParser::GetCompositionRange(
+ const MediaByteRangeSet& aByteRanges) {
+ LOG_DEBUG(Moof, "Starting.");
+ MP4Interval<Microseconds> compositionRange;
+ BoxContext context(mSource, aByteRanges);
+ for (size_t i = 0; i < mMoofs.Length(); i++) {
+ Moof& moof = mMoofs[i];
+ Box box(&context, moof.mRange.mStart);
+ if (box.IsAvailable()) {
+ compositionRange = compositionRange.Extents(moof.mTimeRange);
+ }
+ }
+ LOG_DEBUG(Moof,
+ "Done, compositionRange.start=%" PRIi64
+ ", compositionRange.end=%" PRIi64 ".",
+ compositionRange.start, compositionRange.end);
+ return compositionRange;
+}
+
+bool MoofParser::ReachedEnd() {
+ int64_t length;
+ return mSource->Length(&length) && mOffset == length;
+}
+
+void MoofParser::ParseMoov(Box& aBox) {
+ LOG_DEBUG(Moof, "Starting.");
+ for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+ if (box.IsType("mvhd")) {
+ mMvhd = Mvhd(box);
+ } else if (box.IsType("trak")) {
+ ParseTrak(box);
+ } else if (box.IsType("mvex")) {
+ ParseMvex(box);
+ }
+ }
+ LOG_DEBUG(Moof, "Done.");
+}
+
+void MoofParser::ParseTrak(Box& aBox) {
+ LOG_DEBUG(Trak, "Starting.");
+ Tkhd tkhd;
+ for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+ if (box.IsType("tkhd")) {
+ tkhd = Tkhd(box);
+ } else if (box.IsType("mdia")) {
+ if (mTrackParseMode.is<ParseAllTracks>() ||
+ tkhd.mTrackId == mTrackParseMode.as<uint32_t>()) {
+ ParseMdia(box);
+ }
+ } else if (box.IsType("edts") &&
+ (mTrackParseMode.is<ParseAllTracks>() ||
+ tkhd.mTrackId == mTrackParseMode.as<uint32_t>())) {
+ mEdts = Edts(box);
+ }
+ }
+ LOG_DEBUG(Trak, "Done.");
+}
+
+void MoofParser::ParseMdia(Box& aBox) {
+ LOG_DEBUG(Mdia, "Starting.");
+ for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+ if (box.IsType("mdhd")) {
+ mMdhd = Mdhd(box);
+ } else if (box.IsType("minf")) {
+ ParseMinf(box);
+ }
+ }
+ LOG_DEBUG(Mdia, "Done.");
+}
+
+void MoofParser::ParseMvex(Box& aBox) {
+ LOG_DEBUG(Mvex, "Starting.");
+ for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+ if (box.IsType("trex")) {
+ Trex trex = Trex(box);
+ if (mTrackParseMode.is<ParseAllTracks>() ||
+ trex.mTrackId == mTrackParseMode.as<uint32_t>()) {
+ mTrex = trex;
+ }
+ }
+ }
+ LOG_DEBUG(Mvex, "Done.");
+}
+
+void MoofParser::ParseMinf(Box& aBox) {
+ LOG_DEBUG(Minf, "Starting.");
+ for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+ if (box.IsType("stbl")) {
+ ParseStbl(box);
+ }
+ }
+ LOG_DEBUG(Minf, "Done.");
+}
+
+void MoofParser::ParseStbl(Box& aBox) {
+ LOG_DEBUG(Stbl, "Starting.");
+ for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+ if (box.IsType("stsd")) {
+ ParseStsd(box);
+ } else if (box.IsType("sgpd")) {
+ Sgpd sgpd(box);
+ if (sgpd.IsValid() && sgpd.mGroupingType == "seig") {
+ mTrackSampleEncryptionInfoEntries.Clear();
+ if (!mTrackSampleEncryptionInfoEntries.AppendElements(
+ sgpd.mEntries, mozilla::fallible)) {
+ LOG_ERROR(Stbl, "OOM");
+ return;
+ }
+ }
+ } else if (box.IsType("sbgp")) {
+ Sbgp sbgp(box);
+ if (sbgp.IsValid() && sbgp.mGroupingType == "seig") {
+ mTrackSampleToGroupEntries.Clear();
+ if (!mTrackSampleToGroupEntries.AppendElements(sbgp.mEntries,
+ mozilla::fallible)) {
+ LOG_ERROR(Stbl, "OOM");
+ return;
+ }
+ }
+ }
+ }
+ LOG_DEBUG(Stbl, "Done.");
+}
+
+void MoofParser::ParseStsd(Box& aBox) {
+ LOG_DEBUG(Stsd, "Starting.");
+ if (mTrackParseMode.is<ParseAllTracks>()) {
+ // It is not a sane operation to try and map sample description boxes from
+ // multiple tracks onto the parser, which is modeled around storing metadata
+ // for a single track.
+ LOG_DEBUG(Stsd, "Early return due to multitrack parser.");
+ return;
+ }
+ MOZ_ASSERT(
+ mSampleDescriptions.IsEmpty(),
+ "Shouldn't have any sample descriptions yet when starting to parse stsd");
+ uint32_t numberEncryptedEntries = 0;
+ for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+ SampleDescriptionEntry sampleDescriptionEntry{false};
+ if (box.IsType("encv") || box.IsType("enca")) {
+ ParseEncrypted(box);
+ sampleDescriptionEntry.mIsEncryptedEntry = true;
+ numberEncryptedEntries++;
+ }
+ if (!mSampleDescriptions.AppendElement(sampleDescriptionEntry,
+ mozilla::fallible)) {
+ LOG_ERROR(Stsd, "OOM");
+ return;
+ }
+ }
+ if (mSampleDescriptions.IsEmpty()) {
+ LOG_WARN(Stsd,
+ "No sample description entries found while parsing Stsd! This "
+ "shouldn't happen, as the spec requires one for each track!");
+ }
+ if (numberEncryptedEntries > 1) {
+ LOG_WARN(Stsd,
+ "More than one encrypted sample description entry found while "
+ "parsing track! We don't expect this, and it will likely break "
+ "during fragment look up!");
+ }
+ LOG_DEBUG(Stsd,
+ "Done, numberEncryptedEntries=%" PRIu32
+ ", mSampleDescriptions.Length=%zu",
+ numberEncryptedEntries, mSampleDescriptions.Length());
+}
+
+void MoofParser::ParseEncrypted(Box& aBox) {
+ LOG_DEBUG(Moof, "Starting.");
+ for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+ // Some MP4 files have been found to have multiple sinf boxes in the same
+ // enc* box. This does not match spec anyway, so just choose the first
+ // one that parses properly.
+ if (box.IsType("sinf")) {
+ mSinf = Sinf(box);
+
+ if (mSinf.IsValid()) {
+ break;
+ }
+ }
+ }
+ LOG_DEBUG(Moof, "Done.");
+}
+
+class CtsComparator {
+ public:
+ bool Equals(Sample* const aA, Sample* const aB) const {
+ return aA->mCompositionRange.start == aB->mCompositionRange.start;
+ }
+ bool LessThan(Sample* const aA, Sample* const aB) const {
+ return aA->mCompositionRange.start < aB->mCompositionRange.start;
+ }
+};
+
+Moof::Moof(Box& aBox, const TrackParseMode& aTrackParseMode, Trex& aTrex,
+ Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf,
+ uint64_t* aDecodeTime, bool aIsAudio,
+ nsTArray<TrackEndCts>& aTracksEndCts)
+ : mRange(aBox.Range()), mTfhd(aTrex), mMaxRoundingError(35000) {
+ LOG_DEBUG(
+ Moof,
+ "Starting, aTrackParseMode=%s, track#=%" PRIu32
+ " (ignore if multitrack).",
+ aTrackParseMode.is<ParseAllTracks>() ? "multitrack" : "single track",
+ aTrackParseMode.is<ParseAllTracks>() ? 0
+ : aTrackParseMode.as<uint32_t>());
+ MOZ_ASSERT(aTrackParseMode.is<ParseAllTracks>() ||
+ aTrex.mTrackId == aTrackParseMode.as<uint32_t>(),
+ "If not parsing all tracks, aTrex should have the same track id "
+ "as the track being parsed.");
+ nsTArray<Box> psshBoxes;
+ for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+ if (box.IsType("traf")) {
+ ParseTraf(box, aTrackParseMode, aTrex, aMvhd, aMdhd, aEdts, aSinf,
+ aDecodeTime, aIsAudio);
+ }
+ if (box.IsType("pssh")) {
+ psshBoxes.AppendElement(box);
+ }
+ }
+
+ // The EME spec requires that PSSH boxes which are contiguous in the
+ // file are dispatched to the media element in a single "encrypted" event.
+ // So append contiguous boxes here.
+ for (size_t i = 0; i < psshBoxes.Length(); ++i) {
+ Box box = psshBoxes[i];
+ if (i == 0 || box.Offset() != psshBoxes[i - 1].NextOffset()) {
+ mPsshes.AppendElement();
+ }
+ nsTArray<uint8_t>& pssh = mPsshes.LastElement();
+ pssh.AppendElements(std::move(box.ReadCompleteBox()));
+ }
+
+ if (IsValid()) {
+ if (mIndex.Length()) {
+ // Ensure the samples are contiguous with no gaps.
+ nsTArray<Sample*> ctsOrder;
+ for (auto& sample : mIndex) {
+ ctsOrder.AppendElement(&sample);
+ }
+ ctsOrder.Sort(CtsComparator());
+
+ for (size_t i = 1; i < ctsOrder.Length(); i++) {
+ ctsOrder[i - 1]->mCompositionRange.end =
+ ctsOrder[i]->mCompositionRange.start;
+ }
+
+ // Ensure that there are no gaps between the first sample in this
+ // Moof and the preceeding Moof.
+ if (!ctsOrder.IsEmpty()) {
+ bool found = false;
+ // Track ID of the track we're parsing.
+ const uint32_t trackId = aTrex.mTrackId;
+ // Find the previous CTS end time of Moof preceeding the Moofs we just
+ // parsed, for the track we're parsing.
+ for (auto& prevCts : aTracksEndCts) {
+ if (prevCts.mTrackId == trackId) {
+ // We have previously parsed a Moof for this track. Smooth the gap
+ // between samples for this track across the Moof bounary.
+ if (ctsOrder[0]->mCompositionRange.start > prevCts.mCtsEndTime &&
+ ctsOrder[0]->mCompositionRange.start - prevCts.mCtsEndTime <=
+ CROSS_MOOF_CTS_MERGE_THRESHOLD) {
+ ctsOrder[0]->mCompositionRange.start = prevCts.mCtsEndTime;
+ }
+ prevCts.mCtsEndTime = ctsOrder.LastElement()->mCompositionRange.end;
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ // We've not parsed a Moof for this track yet. Save its CTS end
+ // time for the next Moof we parse.
+ aTracksEndCts.AppendElement(TrackEndCts(
+ trackId, ctsOrder.LastElement()->mCompositionRange.end));
+ }
+ }
+
+ // In MP4, the duration of a sample is defined as the delta between two
+ // decode timestamps. The operation above has updated the duration of each
+ // sample as a Sample's duration is mCompositionRange.end -
+ // mCompositionRange.start MSE's TrackBuffersManager expects dts that
+ // increased by the sample's duration, so we rewrite the dts accordingly.
+ int64_t presentationDuration =
+ ctsOrder.LastElement()->mCompositionRange.end -
+ ctsOrder[0]->mCompositionRange.start;
+ auto decodeOffset =
+ aMdhd.ToMicroseconds((int64_t)*aDecodeTime - aEdts.mMediaStart);
+ auto offsetOffset = aMvhd.ToMicroseconds(aEdts.mEmptyOffset);
+ int64_t endDecodeTime =
+ (decodeOffset.isOk() && offsetOffset.isOk())
+ ? decodeOffset.unwrap() + offsetOffset.unwrap()
+ : 0;
+ int64_t decodeDuration = endDecodeTime - mIndex[0].mDecodeTime;
+ double adjust = !!presentationDuration
+ ? (double)decodeDuration / presentationDuration
+ : 0;
+ int64_t dtsOffset = mIndex[0].mDecodeTime;
+ int64_t compositionDuration = 0;
+ // Adjust the dts, ensuring that the new adjusted dts will never be
+ // greater than decodeTime (the next moof's decode start time).
+ for (auto& sample : mIndex) {
+ sample.mDecodeTime = dtsOffset + int64_t(compositionDuration * adjust);
+ compositionDuration += sample.mCompositionRange.Length();
+ }
+ mTimeRange = MP4Interval<Microseconds>(
+ ctsOrder[0]->mCompositionRange.start,
+ ctsOrder.LastElement()->mCompositionRange.end);
+ }
+ ProcessCencAuxInfo(aSinf.mDefaultEncryptionType);
+ }
+ LOG_DEBUG(Moof, "Done.");
+}
+
+bool Moof::GetAuxInfo(AtomType aType,
+ FallibleTArray<MediaByteRange>* aByteRanges) {
+ LOG_DEBUG(Moof, "Starting.");
+ aByteRanges->Clear();
+
+ Saiz* saiz = nullptr;
+ for (int i = 0;; i++) {
+ if (i == mSaizs.Length()) {
+ LOG_DEBUG(Moof, "Could not find saiz matching aType. Returning false.");
+ return false;
+ }
+ if (mSaizs[i].mAuxInfoType == aType) {
+ saiz = &mSaizs[i];
+ break;
+ }
+ }
+ Saio* saio = nullptr;
+ for (int i = 0;; i++) {
+ if (i == mSaios.Length()) {
+ LOG_DEBUG(Moof, "Could not find saio matching aType. Returning false.");
+ return false;
+ }
+ if (mSaios[i].mAuxInfoType == aType) {
+ saio = &mSaios[i];
+ break;
+ }
+ }
+
+ if (saio->mOffsets.Length() == 1) {
+ if (!aByteRanges->SetCapacity(saiz->mSampleInfoSize.Length(),
+ mozilla::fallible)) {
+ LOG_ERROR(Moof, "OOM");
+ return false;
+ }
+ uint64_t offset = mRange.mStart + saio->mOffsets[0];
+ for (size_t i = 0; i < saiz->mSampleInfoSize.Length(); i++) {
+ if (!aByteRanges->AppendElement(
+ MediaByteRange(offset, offset + saiz->mSampleInfoSize[i]),
+ mozilla::fallible)) {
+ LOG_ERROR(Moof, "OOM");
+ return false;
+ }
+ offset += saiz->mSampleInfoSize[i];
+ }
+ LOG_DEBUG(
+ Moof,
+ "Saio has 1 entry. aByteRanges populated accordingly. Returning true.");
+ return true;
+ }
+
+ if (saio->mOffsets.Length() == saiz->mSampleInfoSize.Length()) {
+ if (!aByteRanges->SetCapacity(saiz->mSampleInfoSize.Length(),
+ mozilla::fallible)) {
+ LOG_ERROR(Moof, "OOM");
+ return false;
+ }
+ for (size_t i = 0; i < saio->mOffsets.Length(); i++) {
+ uint64_t offset = mRange.mStart + saio->mOffsets[i];
+ if (!aByteRanges->AppendElement(
+ MediaByteRange(offset, offset + saiz->mSampleInfoSize[i]),
+ mozilla::fallible)) {
+ LOG_ERROR(Moof, "OOM");
+ return false;
+ }
+ }
+ LOG_DEBUG(
+ Moof,
+ "Saio and saiz have same number of entries. aByteRanges populated "
+ "accordingly. Returning true.");
+ return true;
+ }
+
+ LOG_DEBUG(Moof,
+ "Moof::GetAuxInfo could not find any Aux info, returning false.");
+ return false;
+}
+
+bool Moof::ProcessCencAuxInfo(AtomType aScheme) {
+ LOG_DEBUG(Moof, "Starting.");
+ FallibleTArray<MediaByteRange> cencRanges;
+ if (!GetAuxInfo(aScheme, &cencRanges) ||
+ cencRanges.Length() != mIndex.Length()) {
+ LOG_DEBUG(Moof, "Couldn't find cenc aux info.");
+ return false;
+ }
+ for (int i = 0; i < cencRanges.Length(); i++) {
+ mIndex[i].mCencRange = cencRanges[i];
+ }
+ LOG_DEBUG(Moof, "Found cenc aux info and stored on index.");
+ return true;
+}
+
+void Moof::ParseTraf(Box& aBox, const TrackParseMode& aTrackParseMode,
+ Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts,
+ Sinf& aSinf, uint64_t* aDecodeTime, bool aIsAudio) {
+ LOG_DEBUG(
+ Traf,
+ "Starting, aTrackParseMode=%s, track#=%" PRIu32
+ " (ignore if multitrack).",
+ aTrackParseMode.is<ParseAllTracks>() ? "multitrack" : "single track",
+ aTrackParseMode.is<ParseAllTracks>() ? 0
+ : aTrackParseMode.as<uint32_t>());
+ MOZ_ASSERT(aDecodeTime);
+ MOZ_ASSERT(aTrackParseMode.is<ParseAllTracks>() ||
+ aTrex.mTrackId == aTrackParseMode.as<uint32_t>(),
+ "If not parsing all tracks, aTrex should have the same track id "
+ "as the track being parsed.");
+ Tfdt tfdt;
+
+ for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+ if (box.IsType("tfhd")) {
+ mTfhd = Tfhd(box, aTrex);
+ } else if (aTrackParseMode.is<ParseAllTracks>() ||
+ mTfhd.mTrackId == aTrackParseMode.as<uint32_t>()) {
+ if (box.IsType("tfdt")) {
+ tfdt = Tfdt(box);
+ } else if (box.IsType("sgpd")) {
+ Sgpd sgpd(box);
+ if (sgpd.IsValid() && sgpd.mGroupingType == "seig") {
+ mFragmentSampleEncryptionInfoEntries.Clear();
+ if (!mFragmentSampleEncryptionInfoEntries.AppendElements(
+ sgpd.mEntries, mozilla::fallible)) {
+ LOG_ERROR(Moof, "OOM");
+ return;
+ }
+ }
+ } else if (box.IsType("sbgp")) {
+ Sbgp sbgp(box);
+ if (sbgp.IsValid() && sbgp.mGroupingType == "seig") {
+ mFragmentSampleToGroupEntries.Clear();
+ if (!mFragmentSampleToGroupEntries.AppendElements(
+ sbgp.mEntries, mozilla::fallible)) {
+ LOG_ERROR(Moof, "OOM");
+ return;
+ }
+ }
+ } else if (box.IsType("saiz")) {
+ if (!mSaizs.AppendElement(Saiz(box, aSinf.mDefaultEncryptionType),
+ mozilla::fallible)) {
+ LOG_ERROR(Moof, "OOM");
+ return;
+ }
+ } else if (box.IsType("saio")) {
+ if (!mSaios.AppendElement(Saio(box, aSinf.mDefaultEncryptionType),
+ mozilla::fallible)) {
+ LOG_ERROR(Moof, "OOM");
+ return;
+ }
+ }
+ }
+ }
+ if (aTrackParseMode.is<uint32_t>() &&
+ mTfhd.mTrackId != aTrackParseMode.as<uint32_t>()) {
+ LOG_DEBUG(Traf,
+ "Early return as not multitrack parser and track id didn't match "
+ "mTfhd.mTrackId=%" PRIu32,
+ mTfhd.mTrackId);
+ return;
+ }
+ // Now search for TRUN boxes.
+ uint64_t decodeTime =
+ tfdt.IsValid() ? tfdt.mBaseMediaDecodeTime : *aDecodeTime;
+ for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+ if (box.IsType("trun")) {
+ if (ParseTrun(box, aMvhd, aMdhd, aEdts, &decodeTime, aIsAudio).isOk()) {
+ mValid = true;
+ } else {
+ LOG_WARN(Moof, "ParseTrun failed");
+ mValid = false;
+ break;
+ }
+ }
+ }
+ *aDecodeTime = decodeTime;
+ LOG_DEBUG(Traf, "Done, setting aDecodeTime=%." PRIu64 ".", decodeTime);
+}
+
+void Moof::FixRounding(const Moof& aMoof) {
+ Microseconds gap = aMoof.mTimeRange.start - mTimeRange.end;
+ if (gap > 0 && gap <= mMaxRoundingError) {
+ mTimeRange.end = aMoof.mTimeRange.start;
+ }
+}
+
+Result<Ok, nsresult> Moof::ParseTrun(Box& aBox, Mvhd& aMvhd, Mdhd& aMdhd,
+ Edts& aEdts, uint64_t* aDecodeTime,
+ bool aIsAudio) {
+ LOG_DEBUG(Trun, "Starting.");
+ if (!mTfhd.IsValid() || !aMvhd.IsValid() || !aMdhd.IsValid() ||
+ !aEdts.IsValid()) {
+ LOG_WARN(
+ Moof, "Invalid dependencies: mTfhd(%d) aMvhd(%d) aMdhd(%d) aEdts(%d)",
+ mTfhd.IsValid(), aMvhd.IsValid(), aMdhd.IsValid(), !aEdts.IsValid());
+ return Err(NS_ERROR_FAILURE);
+ }
+
+ BoxReader reader(aBox);
+ if (!reader->CanReadType<uint32_t>()) {
+ LOG_WARN(Moof, "Incomplete Box (missing flags)");
+ return Err(NS_ERROR_FAILURE);
+ }
+ uint32_t flags;
+ MOZ_TRY_VAR(flags, reader->ReadU32());
+
+ if (!reader->CanReadType<uint32_t>()) {
+ LOG_WARN(Moof, "Incomplete Box (missing sampleCount)");
+ return Err(NS_ERROR_FAILURE);
+ }
+ uint32_t sampleCount;
+ MOZ_TRY_VAR(sampleCount, reader->ReadU32());
+ if (sampleCount == 0) {
+ LOG_DEBUG(Trun, "Trun with no samples, returning.");
+ return Ok();
+ }
+
+ uint64_t offset = mTfhd.mBaseDataOffset;
+ if (flags & 0x01) {
+ uint32_t tmp;
+ MOZ_TRY_VAR(tmp, reader->ReadU32());
+ offset += tmp;
+ }
+ uint32_t firstSampleFlags = mTfhd.mDefaultSampleFlags;
+ if (flags & 0x04) {
+ MOZ_TRY_VAR(firstSampleFlags, reader->ReadU32());
+ }
+ uint64_t decodeTime = *aDecodeTime;
+ nsTArray<MP4Interval<Microseconds>> timeRanges;
+
+ if (!mIndex.SetCapacity(sampleCount, fallible)) {
+ LOG_ERROR(Moof, "Out of Memory");
+ return Err(NS_ERROR_FAILURE);
+ }
+
+ for (size_t i = 0; i < sampleCount; i++) {
+ uint32_t sampleDuration = mTfhd.mDefaultSampleDuration;
+ if (flags & 0x100) {
+ MOZ_TRY_VAR(sampleDuration, reader->ReadU32());
+ }
+ uint32_t sampleSize = mTfhd.mDefaultSampleSize;
+ if (flags & 0x200) {
+ MOZ_TRY_VAR(sampleSize, reader->ReadU32());
+ }
+ uint32_t sampleFlags = i ? mTfhd.mDefaultSampleFlags : firstSampleFlags;
+ if (flags & 0x400) {
+ MOZ_TRY_VAR(sampleFlags, reader->ReadU32());
+ }
+ int32_t ctsOffset = 0;
+ if (flags & 0x800) {
+ MOZ_TRY_VAR(ctsOffset, reader->Read32());
+ }
+
+ if (sampleSize) {
+ Sample sample;
+ sample.mByteRange = MediaByteRange(offset, offset + sampleSize);
+ offset += sampleSize;
+
+ Microseconds decodeOffset, emptyOffset, startCts, endCts;
+ MOZ_TRY_VAR(decodeOffset, aMdhd.ToMicroseconds((int64_t)decodeTime -
+ aEdts.mMediaStart));
+ MOZ_TRY_VAR(emptyOffset, aMvhd.ToMicroseconds(aEdts.mEmptyOffset));
+ sample.mDecodeTime = decodeOffset + emptyOffset;
+ MOZ_TRY_VAR(startCts,
+ aMdhd.ToMicroseconds((int64_t)decodeTime + ctsOffset -
+ aEdts.mMediaStart));
+ MOZ_TRY_VAR(endCts,
+ aMdhd.ToMicroseconds((int64_t)decodeTime + ctsOffset +
+ sampleDuration - aEdts.mMediaStart));
+ sample.mCompositionRange = MP4Interval<Microseconds>(
+ startCts + emptyOffset, endCts + emptyOffset);
+ // Sometimes audio streams don't properly mark their samples as keyframes,
+ // because every audio sample is a keyframe.
+ sample.mSync = !(sampleFlags & 0x1010000) || aIsAudio;
+
+ // FIXME: Make this infallible after bug 968520 is done.
+ MOZ_ALWAYS_TRUE(mIndex.AppendElement(sample, fallible));
+
+ mMdatRange = mMdatRange.Span(sample.mByteRange);
+ }
+ decodeTime += sampleDuration;
+ }
+ Microseconds roundTime;
+ MOZ_TRY_VAR(roundTime, aMdhd.ToMicroseconds(sampleCount));
+ mMaxRoundingError += roundTime;
+
+ *aDecodeTime = decodeTime;
+
+ LOG_DEBUG(Trun, "Done.");
+ return Ok();
+}
+
+Tkhd::Tkhd(Box& aBox) : mTrackId(0) {
+ mValid = Parse(aBox).isOk();
+ if (!mValid) {
+ LOG_WARN(Tkhd, "Parse failed");
+ }
+}
+
+Result<Ok, nsresult> Tkhd::Parse(Box& aBox) {
+ BoxReader reader(aBox);
+ uint32_t flags;
+ MOZ_TRY_VAR(flags, reader->ReadU32());
+ uint8_t version = flags >> 24;
+ if (version == 0) {
+ uint32_t creationTime, modificationTime, reserved, duration;
+ MOZ_TRY_VAR(creationTime, reader->ReadU32());
+ MOZ_TRY_VAR(modificationTime, reader->ReadU32());
+ MOZ_TRY_VAR(mTrackId, reader->ReadU32());
+ MOZ_TRY_VAR(reserved, reader->ReadU32());
+ MOZ_TRY_VAR(duration, reader->ReadU32());
+
+ (void)reserved;
+ NS_ASSERTION(!reserved, "reserved should be 0");
+
+ mCreationTime = creationTime;
+ mModificationTime = modificationTime;
+ mDuration = duration;
+ } else if (version == 1) {
+ uint32_t reserved;
+ MOZ_TRY_VAR(mCreationTime, reader->ReadU64());
+ MOZ_TRY_VAR(mModificationTime, reader->ReadU64());
+ MOZ_TRY_VAR(mTrackId, reader->ReadU32());
+ MOZ_TRY_VAR(reserved, reader->ReadU32());
+ (void)reserved;
+ NS_ASSERTION(!reserved, "reserved should be 0");
+ MOZ_TRY_VAR(mDuration, reader->ReadU64());
+ }
+ return Ok();
+}
+
+Mvhd::Mvhd(Box& aBox)
+ : mCreationTime(0), mModificationTime(0), mTimescale(0), mDuration(0) {
+ mValid = Parse(aBox).isOk();
+ if (!mValid) {
+ LOG_WARN(Mvhd, "Parse failed");
+ }
+}
+
+Result<Ok, nsresult> Mvhd::Parse(Box& aBox) {
+ BoxReader reader(aBox);
+
+ uint32_t flags;
+ MOZ_TRY_VAR(flags, reader->ReadU32());
+ uint8_t version = flags >> 24;
+
+ if (version == 0) {
+ uint32_t creationTime, modificationTime, duration;
+ MOZ_TRY_VAR(creationTime, reader->ReadU32());
+ MOZ_TRY_VAR(modificationTime, reader->ReadU32());
+ MOZ_TRY_VAR(mTimescale, reader->ReadU32());
+ MOZ_TRY_VAR(duration, reader->ReadU32());
+ mCreationTime = creationTime;
+ mModificationTime = modificationTime;
+ mDuration = duration;
+ } else if (version == 1) {
+ MOZ_TRY_VAR(mCreationTime, reader->ReadU64());
+ MOZ_TRY_VAR(mModificationTime, reader->ReadU64());
+ MOZ_TRY_VAR(mTimescale, reader->ReadU32());
+ MOZ_TRY_VAR(mDuration, reader->ReadU64());
+ } else {
+ return Err(NS_ERROR_FAILURE);
+ }
+ return Ok();
+}
+
+Mdhd::Mdhd(Box& aBox) : Mvhd(aBox) {}
+
+Trex::Trex(Box& aBox)
+ : mFlags(0),
+ mTrackId(0),
+ mDefaultSampleDescriptionIndex(0),
+ mDefaultSampleDuration(0),
+ mDefaultSampleSize(0),
+ mDefaultSampleFlags(0) {
+ mValid = Parse(aBox).isOk();
+ if (!mValid) {
+ LOG_WARN(Trex, "Parse failed");
+ }
+}
+
+Result<Ok, nsresult> Trex::Parse(Box& aBox) {
+ BoxReader reader(aBox);
+
+ MOZ_TRY_VAR(mFlags, reader->ReadU32());
+ MOZ_TRY_VAR(mTrackId, reader->ReadU32());
+ MOZ_TRY_VAR(mDefaultSampleDescriptionIndex, reader->ReadU32());
+ MOZ_TRY_VAR(mDefaultSampleDuration, reader->ReadU32());
+ MOZ_TRY_VAR(mDefaultSampleSize, reader->ReadU32());
+ MOZ_TRY_VAR(mDefaultSampleFlags, reader->ReadU32());
+
+ return Ok();
+}
+
+Tfhd::Tfhd(Box& aBox, Trex& aTrex) : Trex(aTrex), mBaseDataOffset(0) {
+ mValid = Parse(aBox).isOk();
+ if (!mValid) {
+ LOG_WARN(Tfhd, "Parse failed");
+ }
+}
+
+Result<Ok, nsresult> Tfhd::Parse(Box& aBox) {
+ MOZ_ASSERT(aBox.IsType("tfhd"));
+ MOZ_ASSERT(aBox.Parent()->IsType("traf"));
+ MOZ_ASSERT(aBox.Parent()->Parent()->IsType("moof"));
+
+ BoxReader reader(aBox);
+
+ MOZ_TRY_VAR(mFlags, reader->ReadU32());
+ MOZ_TRY_VAR(mTrackId, reader->ReadU32());
+ mBaseDataOffset = aBox.Parent()->Parent()->Offset();
+ if (mFlags & 0x01) {
+ MOZ_TRY_VAR(mBaseDataOffset, reader->ReadU64());
+ }
+ if (mFlags & 0x02) {
+ MOZ_TRY_VAR(mDefaultSampleDescriptionIndex, reader->ReadU32());
+ }
+ if (mFlags & 0x08) {
+ MOZ_TRY_VAR(mDefaultSampleDuration, reader->ReadU32());
+ }
+ if (mFlags & 0x10) {
+ MOZ_TRY_VAR(mDefaultSampleSize, reader->ReadU32());
+ }
+ if (mFlags & 0x20) {
+ MOZ_TRY_VAR(mDefaultSampleFlags, reader->ReadU32());
+ }
+
+ return Ok();
+}
+
+Tfdt::Tfdt(Box& aBox) : mBaseMediaDecodeTime(0) {
+ mValid = Parse(aBox).isOk();
+ if (!mValid) {
+ LOG_WARN(Tfdt, "Parse failed");
+ }
+}
+
+Result<Ok, nsresult> Tfdt::Parse(Box& aBox) {
+ BoxReader reader(aBox);
+
+ uint32_t flags;
+ MOZ_TRY_VAR(flags, reader->ReadU32());
+ uint8_t version = flags >> 24;
+ if (version == 0) {
+ uint32_t tmp;
+ MOZ_TRY_VAR(tmp, reader->ReadU32());
+ mBaseMediaDecodeTime = tmp;
+ } else if (version == 1) {
+ MOZ_TRY_VAR(mBaseMediaDecodeTime, reader->ReadU64());
+ }
+ return Ok();
+}
+
+Edts::Edts(Box& aBox) : mMediaStart(0), mEmptyOffset(0) {
+ mValid = Parse(aBox).isOk();
+ if (!mValid) {
+ LOG_WARN(Edts, "Parse failed");
+ }
+}
+
+Result<Ok, nsresult> Edts::Parse(Box& aBox) {
+ Box child = aBox.FirstChild();
+ if (!child.IsType("elst")) {
+ return Err(NS_ERROR_FAILURE);
+ }
+
+ BoxReader reader(child);
+ uint32_t flags;
+ MOZ_TRY_VAR(flags, reader->ReadU32());
+ uint8_t version = flags >> 24;
+ bool emptyEntry = false;
+ uint32_t entryCount;
+ MOZ_TRY_VAR(entryCount, reader->ReadU32());
+ for (uint32_t i = 0; i < entryCount; i++) {
+ uint64_t segment_duration;
+ int64_t media_time;
+ if (version == 1) {
+ MOZ_TRY_VAR(segment_duration, reader->ReadU64());
+ MOZ_TRY_VAR(media_time, reader->Read64());
+ } else {
+ uint32_t tmp;
+ MOZ_TRY_VAR(tmp, reader->ReadU32());
+ segment_duration = tmp;
+ int32_t tmp2;
+ MOZ_TRY_VAR(tmp2, reader->Read32());
+ media_time = tmp2;
+ }
+ if (media_time == -1 && i) {
+ LOG_WARN(Edts, "Multiple empty edit, not handled");
+ } else if (media_time == -1) {
+ mEmptyOffset = segment_duration;
+ emptyEntry = true;
+ } else if (i > 1 || (i > 0 && !emptyEntry)) {
+ LOG_WARN(Edts,
+ "More than one edit entry, not handled. A/V sync will be wrong");
+ break;
+ } else {
+ mMediaStart = media_time;
+ }
+ MOZ_TRY(reader->ReadU32()); // media_rate_integer and media_rate_fraction
+ }
+
+ return Ok();
+}
+
+Saiz::Saiz(Box& aBox, AtomType aDefaultType)
+ : mAuxInfoType(aDefaultType), mAuxInfoTypeParameter(0) {
+ mValid = Parse(aBox).isOk();
+ if (!mValid) {
+ LOG_WARN(Saiz, "Parse failed");
+ }
+}
+
+Result<Ok, nsresult> Saiz::Parse(Box& aBox) {
+ BoxReader reader(aBox);
+
+ uint32_t flags;
+ MOZ_TRY_VAR(flags, reader->ReadU32());
+ if (flags & 1) {
+ MOZ_TRY_VAR(mAuxInfoType, reader->ReadU32());
+ MOZ_TRY_VAR(mAuxInfoTypeParameter, reader->ReadU32());
+ }
+ uint8_t defaultSampleInfoSize;
+ MOZ_TRY_VAR(defaultSampleInfoSize, reader->ReadU8());
+ uint32_t count;
+ MOZ_TRY_VAR(count, reader->ReadU32());
+ if (defaultSampleInfoSize) {
+ if (!mSampleInfoSize.SetLength(count, fallible)) {
+ LOG_ERROR(Saiz, "OOM");
+ return Err(NS_ERROR_FAILURE);
+ }
+ memset(mSampleInfoSize.Elements(), defaultSampleInfoSize,
+ mSampleInfoSize.Length());
+ } else {
+ if (!reader->ReadArray(mSampleInfoSize, count)) {
+ LOG_WARN(Saiz, "Incomplete Box (OOM or missing count:%u)", count);
+ return Err(NS_ERROR_FAILURE);
+ }
+ }
+ return Ok();
+}
+
+Saio::Saio(Box& aBox, AtomType aDefaultType)
+ : mAuxInfoType(aDefaultType), mAuxInfoTypeParameter(0) {
+ mValid = Parse(aBox).isOk();
+ if (!mValid) {
+ LOG_WARN(Saio, "Parse failed");
+ }
+}
+
+Result<Ok, nsresult> Saio::Parse(Box& aBox) {
+ BoxReader reader(aBox);
+
+ uint32_t flags;
+ MOZ_TRY_VAR(flags, reader->ReadU32());
+ uint8_t version = flags >> 24;
+ if (flags & 1) {
+ MOZ_TRY_VAR(mAuxInfoType, reader->ReadU32());
+ MOZ_TRY_VAR(mAuxInfoTypeParameter, reader->ReadU32());
+ }
+
+ size_t count;
+ MOZ_TRY_VAR(count, reader->ReadU32());
+ if (!mOffsets.SetCapacity(count, fallible)) {
+ LOG_ERROR(Saiz, "OOM");
+ return Err(NS_ERROR_FAILURE);
+ }
+ if (version == 0) {
+ uint32_t offset;
+ for (size_t i = 0; i < count; i++) {
+ MOZ_TRY_VAR(offset, reader->ReadU32());
+ MOZ_ALWAYS_TRUE(mOffsets.AppendElement(offset, fallible));
+ }
+ } else {
+ uint64_t offset;
+ for (size_t i = 0; i < count; i++) {
+ MOZ_TRY_VAR(offset, reader->ReadU64());
+ MOZ_ALWAYS_TRUE(mOffsets.AppendElement(offset, fallible));
+ }
+ }
+ return Ok();
+}
+
+Sbgp::Sbgp(Box& aBox) : mGroupingTypeParam(0) {
+ mValid = Parse(aBox).isOk();
+ if (!mValid) {
+ LOG_WARN(Sbgp, "Parse failed");
+ }
+}
+
+Result<Ok, nsresult> Sbgp::Parse(Box& aBox) {
+ BoxReader reader(aBox);
+
+ uint32_t flags;
+ MOZ_TRY_VAR(flags, reader->ReadU32());
+ const uint8_t version = flags >> 24;
+
+ uint32_t type;
+ MOZ_TRY_VAR(type, reader->ReadU32());
+ mGroupingType = type;
+
+ if (version == 1) {
+ MOZ_TRY_VAR(mGroupingTypeParam, reader->ReadU32());
+ }
+
+ uint32_t count;
+ MOZ_TRY_VAR(count, reader->ReadU32());
+
+ for (uint32_t i = 0; i < count; i++) {
+ uint32_t sampleCount;
+ MOZ_TRY_VAR(sampleCount, reader->ReadU32());
+ uint32_t groupDescriptionIndex;
+ MOZ_TRY_VAR(groupDescriptionIndex, reader->ReadU32());
+
+ SampleToGroupEntry entry(sampleCount, groupDescriptionIndex);
+ if (!mEntries.AppendElement(entry, mozilla::fallible)) {
+ LOG_ERROR(Sbgp, "OOM");
+ return Err(NS_ERROR_FAILURE);
+ }
+ }
+ return Ok();
+}
+
+Sgpd::Sgpd(Box& aBox) {
+ mValid = Parse(aBox).isOk();
+ if (!mValid) {
+ LOG_WARN(Sgpd, "Parse failed");
+ }
+}
+
+Result<Ok, nsresult> Sgpd::Parse(Box& aBox) {
+ BoxReader reader(aBox);
+
+ uint32_t flags;
+ MOZ_TRY_VAR(flags, reader->ReadU32());
+ const uint8_t version = flags >> 24;
+
+ uint32_t type;
+ MOZ_TRY_VAR(type, reader->ReadU32());
+ mGroupingType = type;
+
+ const uint32_t entrySize = sizeof(uint32_t) + kKeyIdSize;
+ uint32_t defaultLength = 0;
+
+ if (version == 1) {
+ MOZ_TRY_VAR(defaultLength, reader->ReadU32());
+ if (defaultLength < entrySize && defaultLength != 0) {
+ return Err(NS_ERROR_FAILURE);
+ }
+ }
+
+ uint32_t count;
+ MOZ_TRY_VAR(count, reader->ReadU32());
+
+ for (uint32_t i = 0; i < count; ++i) {
+ if (version == 1 && defaultLength == 0) {
+ uint32_t descriptionLength;
+ MOZ_TRY_VAR(descriptionLength, reader->ReadU32());
+ if (descriptionLength < entrySize) {
+ return Err(NS_ERROR_FAILURE);
+ }
+ }
+
+ CencSampleEncryptionInfoEntry entry;
+ bool valid = entry.Init(reader).isOk();
+ if (!valid) {
+ return Err(NS_ERROR_FAILURE);
+ }
+ if (!mEntries.AppendElement(entry, mozilla::fallible)) {
+ LOG_ERROR(Sgpd, "OOM");
+ return Err(NS_ERROR_FAILURE);
+ }
+ }
+ return Ok();
+}
+
+Result<Ok, nsresult> CencSampleEncryptionInfoEntry::Init(BoxReader& aReader) {
+ // Skip a reserved byte.
+ MOZ_TRY(aReader->ReadU8());
+
+ uint8_t pattern;
+ MOZ_TRY_VAR(pattern, aReader->ReadU8());
+ mCryptByteBlock = pattern >> 4;
+ mSkipByteBlock = pattern & 0x0f;
+
+ uint8_t isEncrypted;
+ MOZ_TRY_VAR(isEncrypted, aReader->ReadU8());
+ mIsEncrypted = isEncrypted != 0;
+
+ MOZ_TRY_VAR(mIVSize, aReader->ReadU8());
+
+ // Read the key id.
+ if (!mKeyId.SetLength(kKeyIdSize, fallible)) {
+ LOG_ERROR(CencSampleEncryptionInfoEntry, "OOM");
+ return Err(NS_ERROR_FAILURE);
+ }
+ for (uint32_t i = 0; i < kKeyIdSize; ++i) {
+ MOZ_TRY_VAR(mKeyId.ElementAt(i), aReader->ReadU8());
+ }
+
+ if (mIsEncrypted) {
+ if (mIVSize != 8 && mIVSize != 16) {
+ return Err(NS_ERROR_FAILURE);
+ }
+ } else if (mIVSize != 0) {
+ // Protected content with 0 sized IV indicates a constant IV is present.
+ // This is used for the cbcs scheme.
+ uint8_t constantIVSize;
+ MOZ_TRY_VAR(constantIVSize, aReader->ReadU8());
+ if (constantIVSize != 8 && constantIVSize != 16) {
+ LOG_WARN(CencSampleEncryptionInfoEntry,
+ "Unexpected constantIVSize: %" PRIu8, constantIVSize);
+ return Err(NS_ERROR_FAILURE);
+ }
+ if (!mConsantIV.SetLength(constantIVSize, mozilla::fallible)) {
+ LOG_ERROR(CencSampleEncryptionInfoEntry, "OOM");
+ return Err(NS_ERROR_FAILURE);
+ }
+ for (uint32_t i = 0; i < constantIVSize; ++i) {
+ MOZ_TRY_VAR(mConsantIV.ElementAt(i), aReader->ReadU8());
+ }
+ }
+
+ return Ok();
+}
+} // namespace mozilla
+
+#undef LOG_DEBUG
+#undef LOG_WARN
+#undef LOG_ERROR
diff --git a/dom/media/mp4/MoofParser.h b/dom/media/mp4/MoofParser.h
new file mode 100644
index 0000000000..9099df7d14
--- /dev/null
+++ b/dom/media/mp4/MoofParser.h
@@ -0,0 +1,364 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOOF_PARSER_H_
+#define MOOF_PARSER_H_
+
+#include "mozilla/ResultExtensions.h"
+#include "mozilla/Variant.h"
+#include "Atom.h"
+#include "AtomType.h"
+#include "SinfParser.h"
+#include "ByteStream.h"
+#include "MP4Interval.h"
+#include "MediaResource.h"
+
+namespace mozilla {
+
+typedef int64_t Microseconds;
+
+class Box;
+class BoxContext;
+class BoxReader;
+class Moof;
+
+// Used to track the CTS end time of the last sample of a track
+// in the preceeding Moof, so that we can smooth tracks' timestamps
+// across Moofs.
+struct TrackEndCts {
+ TrackEndCts(uint32_t aTrackId, Microseconds aCtsEndTime)
+ : mTrackId(aTrackId), mCtsEndTime(aCtsEndTime) {}
+ uint32_t mTrackId;
+ Microseconds mCtsEndTime;
+};
+
+class Mvhd : public Atom {
+ public:
+ Mvhd()
+ : mCreationTime(0), mModificationTime(0), mTimescale(0), mDuration(0) {}
+ explicit Mvhd(Box& aBox);
+
+ Result<Microseconds, nsresult> ToMicroseconds(int64_t aTimescaleUnits) {
+ if (!mTimescale) {
+ NS_WARNING("invalid mTimescale");
+ return Err(NS_ERROR_FAILURE);
+ }
+ int64_t major = aTimescaleUnits / mTimescale;
+ int64_t remainder = aTimescaleUnits % mTimescale;
+ return major * 1000000ll + remainder * 1000000ll / mTimescale;
+ }
+
+ uint64_t mCreationTime;
+ uint64_t mModificationTime;
+ uint32_t mTimescale;
+ uint64_t mDuration;
+
+ protected:
+ Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+class Tkhd : public Mvhd {
+ public:
+ Tkhd() : mTrackId(0) {}
+ explicit Tkhd(Box& aBox);
+
+ uint32_t mTrackId;
+
+ protected:
+ Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+class Mdhd : public Mvhd {
+ public:
+ Mdhd() = default;
+ explicit Mdhd(Box& aBox);
+};
+
+class Trex : public Atom {
+ public:
+ explicit Trex(uint32_t aTrackId)
+ : mFlags(0),
+ mTrackId(aTrackId),
+ mDefaultSampleDescriptionIndex(0),
+ mDefaultSampleDuration(0),
+ mDefaultSampleSize(0),
+ mDefaultSampleFlags(0) {}
+
+ explicit Trex(Box& aBox);
+
+ uint32_t mFlags;
+ uint32_t mTrackId;
+ uint32_t mDefaultSampleDescriptionIndex;
+ uint32_t mDefaultSampleDuration;
+ uint32_t mDefaultSampleSize;
+ uint32_t mDefaultSampleFlags;
+
+ protected:
+ Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+class Tfhd : public Trex {
+ public:
+ explicit Tfhd(Trex& aTrex) : Trex(aTrex), mBaseDataOffset(0) {
+ mValid = aTrex.IsValid();
+ }
+ Tfhd(Box& aBox, Trex& aTrex);
+
+ uint64_t mBaseDataOffset;
+
+ protected:
+ Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+class Tfdt : public Atom {
+ public:
+ Tfdt() : mBaseMediaDecodeTime(0) {}
+ explicit Tfdt(Box& aBox);
+
+ uint64_t mBaseMediaDecodeTime;
+
+ protected:
+ Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+class Edts : public Atom {
+ public:
+ Edts() : mMediaStart(0), mEmptyOffset(0) {}
+ explicit Edts(Box& aBox);
+ virtual bool IsValid() override {
+ // edts is optional
+ return true;
+ }
+
+ int64_t mMediaStart;
+ int64_t mEmptyOffset;
+
+ protected:
+ Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+struct Sample {
+ mozilla::MediaByteRange mByteRange;
+ mozilla::MediaByteRange mCencRange;
+ Microseconds mDecodeTime;
+ MP4Interval<Microseconds> mCompositionRange;
+ bool mSync;
+};
+
+class Saiz final : public Atom {
+ public:
+ Saiz(Box& aBox, AtomType aDefaultType);
+
+ AtomType mAuxInfoType;
+ uint32_t mAuxInfoTypeParameter;
+ FallibleTArray<uint8_t> mSampleInfoSize;
+
+ protected:
+ Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+class Saio final : public Atom {
+ public:
+ Saio(Box& aBox, AtomType aDefaultType);
+
+ AtomType mAuxInfoType;
+ uint32_t mAuxInfoTypeParameter;
+ FallibleTArray<uint64_t> mOffsets;
+
+ protected:
+ Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+struct SampleToGroupEntry {
+ public:
+ static const uint32_t kTrackGroupDescriptionIndexBase = 0;
+ static const uint32_t kFragmentGroupDescriptionIndexBase = 0x10000;
+
+ SampleToGroupEntry(uint32_t aSampleCount, uint32_t aGroupDescriptionIndex)
+ : mSampleCount(aSampleCount),
+ mGroupDescriptionIndex(aGroupDescriptionIndex) {}
+
+ uint32_t mSampleCount;
+ uint32_t mGroupDescriptionIndex;
+};
+
+class Sbgp final : public Atom // SampleToGroup box.
+{
+ public:
+ explicit Sbgp(Box& aBox);
+
+ AtomType mGroupingType;
+ uint32_t mGroupingTypeParam;
+ FallibleTArray<SampleToGroupEntry> mEntries;
+
+ protected:
+ Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+// Stores information form CencSampleEncryptionInformationGroupEntry (seig).
+// Cenc here refers to the common encryption standard, rather than the specific
+// cenc scheme from that standard. This structure is used for all encryption
+// schemes. I.e. it is used for both cenc and cbcs, not just cenc.
+struct CencSampleEncryptionInfoEntry final {
+ public:
+ CencSampleEncryptionInfoEntry() = default;
+
+ Result<Ok, nsresult> Init(BoxReader& aReader);
+
+ bool mIsEncrypted = false;
+ uint8_t mIVSize = 0;
+ CopyableTArray<uint8_t> mKeyId;
+ uint8_t mCryptByteBlock = 0;
+ uint8_t mSkipByteBlock = 0;
+ CopyableTArray<uint8_t> mConsantIV;
+};
+
+class Sgpd final : public Atom // SampleGroupDescription box.
+{
+ public:
+ explicit Sgpd(Box& aBox);
+
+ AtomType mGroupingType;
+ FallibleTArray<CencSampleEncryptionInfoEntry> mEntries;
+
+ protected:
+ Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+// Audio/video entries from the sample description box (stsd). We only need to
+// store if these are encrypted, so do not need a specialized class for
+// different audio and video data. Currently most of the parsing of these
+// entries is by the mp4parse-rust, but moof pasrser needs to know which of
+// these are encrypted when parsing the track fragment header (tfhd).
+struct SampleDescriptionEntry {
+ bool mIsEncryptedEntry = false;
+};
+
+// Used to indicate in variants if all tracks should be parsed.
+struct ParseAllTracks {};
+
+typedef Variant<ParseAllTracks, uint32_t> TrackParseMode;
+
+class Moof final : public Atom {
+ public:
+ Moof(Box& aBox, const TrackParseMode& aTrackParseMode, Trex& aTrex,
+ Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf,
+ uint64_t* aDecodeTime, bool aIsAudio,
+ nsTArray<TrackEndCts>& aTracksEndCts);
+ bool GetAuxInfo(AtomType aType, FallibleTArray<MediaByteRange>* aByteRanges);
+ void FixRounding(const Moof& aMoof);
+
+ mozilla::MediaByteRange mRange;
+ mozilla::MediaByteRange mMdatRange;
+ MP4Interval<Microseconds> mTimeRange;
+ FallibleTArray<Sample> mIndex;
+
+ FallibleTArray<CencSampleEncryptionInfoEntry>
+ mFragmentSampleEncryptionInfoEntries;
+ FallibleTArray<SampleToGroupEntry> mFragmentSampleToGroupEntries;
+
+ Tfhd mTfhd;
+ FallibleTArray<Saiz> mSaizs;
+ FallibleTArray<Saio> mSaios;
+ nsTArray<nsTArray<uint8_t>> mPsshes;
+
+ private:
+ // aDecodeTime is updated to the end of the parsed TRAF on return.
+ void ParseTraf(Box& aBox, const TrackParseMode& aTrackParseMode, Trex& aTrex,
+ Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf,
+ uint64_t* aDecodeTime, bool aIsAudio);
+ // aDecodeTime is updated to the end of the parsed TRUN on return.
+ Result<Ok, nsresult> ParseTrun(Box& aBox, Mvhd& aMvhd, Mdhd& aMdhd,
+ Edts& aEdts, uint64_t* aDecodeTime,
+ bool aIsAudio);
+ // Process the sample auxiliary information used by common encryption.
+ // aScheme is used to select the appropriate auxiliary information and should
+ // be set based on the encryption scheme used by the track being processed.
+ // Note, the term cenc here refers to the standard, not the specific scheme
+ // from that standard. I.e. this function is used to handle up auxiliary
+ // information from the cenc and cbcs schemes.
+ bool ProcessCencAuxInfo(AtomType aScheme);
+ uint64_t mMaxRoundingError;
+};
+
+DDLoggedTypeDeclName(MoofParser);
+
+class MoofParser : public DecoderDoctorLifeLogger<MoofParser> {
+ public:
+ MoofParser(ByteStream* aSource, const TrackParseMode& aTrackParseMode,
+ bool aIsAudio)
+ : mSource(aSource),
+ mOffset(0),
+ mTrex(aTrackParseMode.is<uint32_t>() ? aTrackParseMode.as<uint32_t>()
+ : 0),
+ mIsAudio(aIsAudio),
+ mLastDecodeTime(0),
+ mTrackParseMode(aTrackParseMode) {
+ // Setting mIsMultitrackParser is a nasty work around for calculating
+ // the composition range for MSE that causes the parser to parse multiple
+ // tracks. Ideally we'd store an array of tracks with different metadata
+ // for each.
+ DDLINKCHILD("source", aSource);
+ }
+ bool RebuildFragmentedIndex(const mozilla::MediaByteRangeSet& aByteRanges);
+ // If *aCanEvict is set to true. then will remove all moofs already parsed
+ // from index then rebuild the index. *aCanEvict is set to true upon return if
+ // some moofs were removed.
+ bool RebuildFragmentedIndex(const mozilla::MediaByteRangeSet& aByteRanges,
+ bool* aCanEvict);
+ bool RebuildFragmentedIndex(BoxContext& aContext);
+ MP4Interval<Microseconds> GetCompositionRange(
+ const mozilla::MediaByteRangeSet& aByteRanges);
+ bool ReachedEnd();
+ void ParseMoov(Box& aBox);
+ void ParseTrak(Box& aBox);
+ void ParseMdia(Box& aBox);
+ void ParseMvex(Box& aBox);
+
+ void ParseMinf(Box& aBox);
+ void ParseStbl(Box& aBox);
+ void ParseStsd(Box& aBox);
+ void ParseEncrypted(Box& aBox);
+
+ bool BlockingReadNextMoof();
+
+ already_AddRefed<mozilla::MediaByteBuffer> Metadata();
+ MediaByteRange FirstCompleteMediaSegment();
+ MediaByteRange FirstCompleteMediaHeader();
+
+ mozilla::MediaByteRange mInitRange;
+ RefPtr<ByteStream> mSource;
+ uint64_t mOffset;
+ Mvhd mMvhd;
+ Mdhd mMdhd;
+ Trex mTrex;
+ Tfdt mTfdt;
+ Edts mEdts;
+ Sinf mSinf;
+
+ FallibleTArray<CencSampleEncryptionInfoEntry>
+ mTrackSampleEncryptionInfoEntries;
+ FallibleTArray<SampleToGroupEntry> mTrackSampleToGroupEntries;
+ FallibleTArray<SampleDescriptionEntry> mSampleDescriptions;
+
+ nsTArray<Moof>& Moofs() { return mMoofs; }
+
+ private:
+ void ScanForMetadata(mozilla::MediaByteRange& aMoov);
+ nsTArray<Moof> mMoofs;
+ nsTArray<MediaByteRange> mMediaRanges;
+ nsTArray<TrackEndCts> mTracksEndCts;
+ bool mIsAudio;
+ uint64_t mLastDecodeTime;
+ // Either a ParseAllTracks if in multitrack mode, or an integer representing
+ // the track_id for the track being parsed. If parsing a specific track, mTrex
+ // should have an id matching mTrackParseMode.as<uint32_t>(). In this case 0
+ // is a valid track id -- this is not allowed in the spec, but such mp4s
+ // appear in the wild. In the ParseAllTracks case, mTrex can have an arbitrary
+ // id based on the tracks being parsed.
+ const TrackParseMode mTrackParseMode;
+};
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/mp4/ResourceStream.cpp b/dom/media/mp4/ResourceStream.cpp
new file mode 100644
index 0000000000..ce2fb6f2f6
--- /dev/null
+++ b/dom/media/mp4/ResourceStream.cpp
@@ -0,0 +1,56 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ResourceStream.h"
+
+namespace mozilla {
+
+ResourceStream::ResourceStream(mozilla::MediaResource* aResource)
+ : mResource(aResource), mPinCount(0) {
+ MOZ_ASSERT(aResource);
+ DDLINKCHILD("resource", &mResource);
+}
+
+ResourceStream::~ResourceStream() { MOZ_ASSERT(mPinCount == 0); }
+
+bool ResourceStream::ReadAt(int64_t aOffset, void* aBuffer, size_t aCount,
+ size_t* aBytesRead) {
+ uint32_t sum = 0;
+ uint32_t bytesRead = 0;
+ do {
+ uint64_t offset = aOffset + sum;
+ char* buffer = reinterpret_cast<char*>(aBuffer) + sum;
+ uint32_t toRead = aCount - sum;
+ nsresult rv = mResource.ReadAt(offset, buffer, toRead, &bytesRead);
+ if (NS_FAILED(rv)) {
+ return false;
+ }
+ sum += bytesRead;
+ } while (sum < aCount && bytesRead > 0);
+
+ *aBytesRead = sum;
+ return true;
+}
+
+bool ResourceStream::CachedReadAt(int64_t aOffset, void* aBuffer, size_t aCount,
+ size_t* aBytesRead) {
+ nsresult rv = mResource.GetResource()->ReadFromCache(
+ reinterpret_cast<char*>(aBuffer), aOffset, aCount);
+ if (NS_FAILED(rv)) {
+ *aBytesRead = 0;
+ return false;
+ }
+ *aBytesRead = aCount;
+ return true;
+}
+
+bool ResourceStream::Length(int64_t* aSize) {
+ if (mResource.GetLength() < 0) return false;
+ *aSize = mResource.GetLength();
+ return true;
+}
+
+} // namespace mozilla
diff --git a/dom/media/mp4/ResourceStream.h b/dom/media/mp4/ResourceStream.h
new file mode 100644
index 0000000000..1aa59fdaed
--- /dev/null
+++ b/dom/media/mp4/ResourceStream.h
@@ -0,0 +1,48 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef RESOURCESTREAM_H_
+#define RESOURCESTREAM_H_
+
+#include "MediaResource.h"
+#include "ByteStream.h"
+#include "mozilla/RefPtr.h"
+
+namespace mozilla {
+
+DDLoggedTypeDeclNameAndBase(ResourceStream, ByteStream);
+
+class ResourceStream : public ByteStream,
+ public DecoderDoctorLifeLogger<ResourceStream> {
+ public:
+ explicit ResourceStream(mozilla::MediaResource* aResource);
+
+ virtual bool ReadAt(int64_t offset, void* aBuffer, size_t aCount,
+ size_t* aBytesRead) override;
+ virtual bool CachedReadAt(int64_t aOffset, void* aBuffer, size_t aCount,
+ size_t* aBytesRead) override;
+ virtual bool Length(int64_t* size) override;
+
+ void Pin() {
+ mResource.GetResource()->Pin();
+ ++mPinCount;
+ }
+
+ void Unpin() {
+ mResource.GetResource()->Unpin();
+ MOZ_ASSERT(mPinCount);
+ --mPinCount;
+ }
+
+ protected:
+ virtual ~ResourceStream();
+
+ private:
+ mozilla::MediaResourceIndex mResource;
+ uint32_t mPinCount;
+};
+
+} // namespace mozilla
+
+#endif // RESOURCESTREAM_H_
diff --git a/dom/media/mp4/SinfParser.cpp b/dom/media/mp4/SinfParser.cpp
new file mode 100644
index 0000000000..4ea14adaaa
--- /dev/null
+++ b/dom/media/mp4/SinfParser.cpp
@@ -0,0 +1,95 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/Unused.h"
+#include "SinfParser.h"
+#include "AtomType.h"
+#include "Box.h"
+#include "ByteStream.h"
+
+namespace mozilla {
+
+Sinf::Sinf(Box& aBox) : mDefaultIVSize(0), mDefaultEncryptionType() {
+ SinfParser parser(aBox);
+ if (parser.GetSinf().IsValid()) {
+ *this = parser.GetSinf();
+ }
+}
+
+SinfParser::SinfParser(Box& aBox) {
+ for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+ if (box.IsType("schm")) {
+ mozilla::Unused << ParseSchm(box);
+ } else if (box.IsType("schi")) {
+ mozilla::Unused << ParseSchi(box);
+ }
+ }
+}
+
+Result<Ok, nsresult> SinfParser::ParseSchm(Box& aBox) {
+ BoxReader reader(aBox);
+
+ if (reader->Remaining() < 8) {
+ return Err(NS_ERROR_FAILURE);
+ }
+
+ MOZ_TRY(reader->ReadU32()); // flags -- ignore
+ MOZ_TRY_VAR(mSinf.mDefaultEncryptionType, reader->ReadU32());
+ return Ok();
+}
+
+Result<Ok, nsresult> SinfParser::ParseSchi(Box& aBox) {
+ for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
+ if (box.IsType("tenc") && ParseTenc(box).isErr()) {
+ return Err(NS_ERROR_FAILURE);
+ }
+ }
+ return Ok();
+}
+
+Result<Ok, nsresult> SinfParser::ParseTenc(Box& aBox) {
+ BoxReader reader(aBox);
+
+ if (reader->Remaining() < 24) {
+ return Err(NS_ERROR_FAILURE);
+ }
+
+ uint32_t flags;
+ MOZ_TRY_VAR(flags, reader->ReadU32());
+ uint8_t version = flags >> 24;
+
+ // Skip reserved byte
+ MOZ_TRY(reader->ReadU8());
+ if (version >= 1) {
+ uint8_t pattern;
+ MOZ_TRY_VAR(pattern, reader->ReadU8());
+ mSinf.mDefaultCryptByteBlock = pattern >> 4;
+ mSinf.mDefaultSkipByteBlock = pattern & 0x0f;
+ } else {
+ // Reserved if version is less than 1
+ MOZ_TRY(reader->ReadU8());
+ mSinf.mDefaultCryptByteBlock = 0;
+ mSinf.mDefaultSkipByteBlock = 0;
+ }
+
+ uint8_t isEncrypted;
+ MOZ_TRY_VAR(isEncrypted, reader->ReadU8());
+ MOZ_TRY_VAR(mSinf.mDefaultIVSize, reader->ReadU8());
+ memcpy(mSinf.mDefaultKeyID, reader->Read(16), 16);
+
+ if (isEncrypted && mSinf.mDefaultIVSize == 0) {
+ uint8_t defaultConstantIVSize;
+ MOZ_TRY_VAR(defaultConstantIVSize, reader->ReadU8());
+ if (!mSinf.mDefaultConstantIV.SetLength(defaultConstantIVSize,
+ mozilla::fallible)) {
+ return Err(NS_ERROR_FAILURE);
+ }
+ for (uint8_t i = 0; i < defaultConstantIVSize; i++) {
+ MOZ_TRY_VAR(mSinf.mDefaultConstantIV.ElementAt(i), reader->ReadU8());
+ }
+ }
+ return Ok();
+}
+
+} // namespace mozilla
diff --git a/dom/media/mp4/SinfParser.h b/dom/media/mp4/SinfParser.h
new file mode 100644
index 0000000000..084892854c
--- /dev/null
+++ b/dom/media/mp4/SinfParser.h
@@ -0,0 +1,56 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef SINF_PARSER_H_
+#define SINF_PARSER_H_
+
+#include "mozilla/ResultExtensions.h"
+#include "Atom.h"
+#include "AtomType.h"
+#include "nsTArray.h"
+
+namespace mozilla {
+
+class Box;
+
+class Sinf : public Atom {
+ public:
+ Sinf()
+ : mDefaultIVSize(0),
+ mDefaultEncryptionType(),
+ mDefaultCryptByteBlock(0),
+ mDefaultSkipByteBlock(0) {}
+ explicit Sinf(Box& aBox);
+
+ bool IsValid() override {
+ return !!mDefaultEncryptionType && // Should have an encryption scheme
+ (mDefaultIVSize > 0 || // and either a default IV size
+ mDefaultConstantIV.Length() > 0); // or a constant IV.
+ }
+
+ uint8_t mDefaultIVSize;
+ AtomType mDefaultEncryptionType;
+ uint8_t mDefaultKeyID[16];
+ uint8_t mDefaultCryptByteBlock;
+ uint8_t mDefaultSkipByteBlock;
+ CopyableTArray<uint8_t> mDefaultConstantIV;
+};
+
+class SinfParser {
+ public:
+ explicit SinfParser(Box& aBox);
+
+ Sinf& GetSinf() { return mSinf; }
+
+ private:
+ Result<Ok, nsresult> ParseSchm(Box& aBox);
+ Result<Ok, nsresult> ParseSchi(Box& aBox);
+ Result<Ok, nsresult> ParseTenc(Box& aBox);
+
+ Sinf mSinf;
+};
+
+} // namespace mozilla
+
+#endif // SINF_PARSER_H_
diff --git a/dom/media/mp4/moz.build b/dom/media/mp4/moz.build
new file mode 100644
index 0000000000..cf93a172db
--- /dev/null
+++ b/dom/media/mp4/moz.build
@@ -0,0 +1,45 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+EXPORTS += [
+ "Atom.h",
+ "AtomType.h",
+ "Box.h",
+ "BufferStream.h",
+ "ByteStream.h",
+ "DecoderData.h",
+ "Index.h",
+ "MoofParser.h",
+ "MP4Decoder.h",
+ "MP4Demuxer.h",
+ "MP4Interval.h",
+ "MP4Metadata.h",
+ "ResourceStream.h",
+ "SinfParser.h",
+]
+
+UNIFIED_SOURCES += [
+ "Box.cpp",
+ "BufferStream.cpp",
+ "DecoderData.cpp",
+ "Index.cpp",
+ "MoofParser.cpp",
+ "MP4Decoder.cpp",
+ "MP4Demuxer.cpp",
+ "MP4Metadata.cpp",
+ "ResourceStream.cpp",
+ "SinfParser.cpp",
+]
+
+FINAL_LIBRARY = "xul"
+
+# Suppress warnings for now.
+CXXFLAGS += [
+ "-Wno-sign-compare",
+]
+
+# Add libFuzzer configuration directives
+include("/tools/fuzzing/libfuzzer-config.mozbuild")