diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /dom/media/mp4 | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'dom/media/mp4')
25 files changed, 5513 insertions, 0 deletions
diff --git a/dom/media/mp4/Atom.h b/dom/media/mp4/Atom.h new file mode 100644 index 0000000000..f008dfe148 --- /dev/null +++ b/dom/media/mp4/Atom.h @@ -0,0 +1,21 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ATOM_H_ +#define ATOM_H_ + +namespace mozilla { + +class Atom { + public: + Atom() : mValid(false) {} + virtual bool IsValid() { return mValid; } + + protected: + bool mValid; +}; + +} // namespace mozilla + +#endif // ATOM_H_ diff --git a/dom/media/mp4/AtomType.h b/dom/media/mp4/AtomType.h new file mode 100644 index 0000000000..dcecde845d --- /dev/null +++ b/dom/media/mp4/AtomType.h @@ -0,0 +1,29 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ATOM_TYPE_H_ +#define ATOM_TYPE_H_ + +#include <stdint.h> +#include "mozilla/EndianUtils.h" + +namespace mozilla { + +class AtomType { + public: + AtomType() : mType(0) {} + MOZ_IMPLICIT AtomType(uint32_t aType) : mType(aType) {} + MOZ_IMPLICIT AtomType(const char* aType) + : mType(BigEndian::readUint32(aType)) {} + bool operator==(const AtomType& aType) const { return mType == aType.mType; } + bool operator!() const { return !mType; } + + private: + uint32_t mType; +}; +} // namespace mozilla + +#endif diff --git a/dom/media/mp4/Box.cpp b/dom/media/mp4/Box.cpp new file mode 100644 index 0000000000..334ba3e3f8 --- /dev/null +++ b/dom/media/mp4/Box.cpp @@ -0,0 +1,230 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "Box.h" +#include "ByteStream.h" +#include "mozilla/EndianUtils.h" +#include "mozilla/Unused.h" +#include <algorithm> + +namespace mozilla { + +// Limit reads to 32MiB max. +// static +const uint64_t Box::kMAX_BOX_READ = 32 * 1024 * 1024; + +// Returns the offset from the start of the body of a box of type |aType| +// to the start of its first child. +static uint32_t BoxOffset(AtomType aType) { + const uint32_t FULLBOX_OFFSET = 4; + + if (aType == AtomType("mp4a") || aType == AtomType("enca")) { + // AudioSampleEntry; ISO 14496-12, section 8.16 + return 28; + } else if (aType == AtomType("mp4v") || aType == AtomType("encv")) { + // VideoSampleEntry; ISO 14496-12, section 8.16 + return 78; + } else if (aType == AtomType("stsd")) { + // SampleDescriptionBox; ISO 14496-12, section 8.16 + // This is a FullBox, and contains a |count| member before its child + // boxes. + return FULLBOX_OFFSET + 4; + } + + return 0; +} + +Box::Box(BoxContext* aContext, uint64_t aOffset, const Box* aParent) + : mContext(aContext), mParent(aParent) { + uint8_t header[8]; + + if (aOffset > INT64_MAX - sizeof(header)) { + return; + } + + MediaByteRange headerRange(aOffset, aOffset + sizeof(header)); + if (mParent && !mParent->mRange.Contains(headerRange)) { + return; + } + + const MediaByteRange* byteRange; + for (int i = 0;; i++) { + if (i == mContext->mByteRanges.Length()) { + return; + } + + byteRange = static_cast<const MediaByteRange*>(&mContext->mByteRanges[i]); + if (byteRange->Contains(headerRange)) { + break; + } + } + + size_t bytes; + if (!mContext->mSource->CachedReadAt(aOffset, header, sizeof(header), + &bytes) || + bytes != sizeof(header)) { + return; + } + + uint64_t size = BigEndian::readUint32(header); + if (size == 1) { + uint8_t bigLength[8]; + if (aOffset > INT64_MAX - sizeof(header) - sizeof(bigLength)) { + return; + } + MediaByteRange bigLengthRange(headerRange.mEnd, + headerRange.mEnd + sizeof(bigLength)); + if ((mParent && !mParent->mRange.Contains(bigLengthRange)) || + !byteRange->Contains(bigLengthRange) || + !mContext->mSource->CachedReadAt(aOffset + sizeof(header), bigLength, + sizeof(bigLength), &bytes) || + bytes != sizeof(bigLength)) { + return; + } + size = BigEndian::readUint64(bigLength); + mBodyOffset = bigLengthRange.mEnd; + } else if (size == 0) { + // box extends to end of file. + size = mContext->mByteRanges.LastInterval().mEnd - aOffset; + mBodyOffset = headerRange.mEnd; + } else { + mBodyOffset = headerRange.mEnd; + } + + if (size > INT64_MAX) { + return; + } + int64_t end = static_cast<int64_t>(aOffset) + static_cast<int64_t>(size); + if (end < static_cast<int64_t>(aOffset)) { + // Overflowed. + return; + } + + mType = BigEndian::readUint32(&header[4]); + mChildOffset = mBodyOffset + BoxOffset(mType); + + MediaByteRange boxRange(aOffset, end); + if (mChildOffset > boxRange.mEnd || + (mParent && !mParent->mRange.Contains(boxRange)) || + !byteRange->Contains(boxRange)) { + return; + } + + mRange = boxRange; +} + +Box::Box() + : mContext(nullptr), mBodyOffset(0), mChildOffset(0), mParent(nullptr) {} + +Box Box::Next() const { + MOZ_ASSERT(IsAvailable()); + return Box(mContext, mRange.mEnd, mParent); +} + +Box Box::FirstChild() const { + MOZ_ASSERT(IsAvailable()); + if (mChildOffset == mRange.mEnd) { + return Box(); + } + return Box(mContext, mChildOffset, this); +} + +nsTArray<uint8_t> Box::ReadCompleteBox() const { + const size_t length = mRange.mEnd - mRange.mStart; + nsTArray<uint8_t> out(length); + out.SetLength(length); + size_t bytesRead = 0; + if (!mContext->mSource->CachedReadAt(mRange.mStart, out.Elements(), length, + &bytesRead) || + bytesRead != length) { + // Byte ranges are being reported incorrectly + NS_WARNING("Read failed in mozilla::Box::ReadCompleteBox()"); + return nsTArray<uint8_t>(0); + } + return out; +} + +nsTArray<uint8_t> Box::Read() const { + nsTArray<uint8_t> out; + Unused << Read(&out, mRange); + return out; +} + +bool Box::Read(nsTArray<uint8_t>* aDest, const MediaByteRange& aRange) const { + int64_t length; + if (!mContext->mSource->Length(&length)) { + // The HTTP server didn't give us a length to work with. + // Limit the read to kMAX_BOX_READ max. + length = std::min(aRange.mEnd - mChildOffset, kMAX_BOX_READ); + } else { + length = aRange.mEnd - mChildOffset; + } + aDest->SetLength(length); + size_t bytes; + if (!mContext->mSource->CachedReadAt(mChildOffset, aDest->Elements(), + aDest->Length(), &bytes) || + bytes != aDest->Length()) { + // Byte ranges are being reported incorrectly + NS_WARNING("Read failed in mozilla::Box::Read()"); + aDest->Clear(); + return false; + } + return true; +} + +ByteSlice Box::ReadAsSlice() { + if (!mContext || mRange.IsEmpty()) { + return ByteSlice{nullptr, 0}; + } + + int64_t length; + if (!mContext->mSource->Length(&length)) { + // The HTTP server didn't give us a length to work with. + // Limit the read to kMAX_BOX_READ max. + length = std::min(mRange.mEnd - mChildOffset, kMAX_BOX_READ); + } else { + length = mRange.mEnd - mChildOffset; + } + + const uint8_t* data = + mContext->mSource->GetContiguousAccess(mChildOffset, length); + if (data) { + // We can direct access the underlying storage of the ByteStream. + return ByteSlice{data, size_t(length)}; + } + + uint8_t* p = mContext->mAllocator.Allocate(size_t(length)); + size_t bytes; + if (!mContext->mSource->CachedReadAt(mChildOffset, p, length, &bytes) || + bytes != length) { + // Byte ranges are being reported incorrectly + NS_WARNING("Read failed in mozilla::Box::ReadAsSlice()"); + return ByteSlice{nullptr, 0}; + } + return ByteSlice{p, size_t(length)}; +} + +const size_t BLOCK_CAPACITY = 16 * 1024; + +uint8_t* BumpAllocator::Allocate(size_t aNumBytes) { + if (aNumBytes > BLOCK_CAPACITY) { + mBuffers.AppendElement(nsTArray<uint8_t>(aNumBytes)); + mBuffers.LastElement().SetLength(aNumBytes); + return mBuffers.LastElement().Elements(); + } + for (nsTArray<uint8_t>& buffer : mBuffers) { + if (buffer.Length() + aNumBytes < BLOCK_CAPACITY) { + size_t offset = buffer.Length(); + buffer.SetLength(buffer.Length() + aNumBytes); + return buffer.Elements() + offset; + } + } + mBuffers.AppendElement(nsTArray<uint8_t>(BLOCK_CAPACITY)); + mBuffers.LastElement().SetLength(aNumBytes); + return mBuffers.LastElement().Elements(); +} + +} // namespace mozilla diff --git a/dom/media/mp4/Box.h b/dom/media/mp4/Box.h new file mode 100644 index 0000000000..e63bfbcc90 --- /dev/null +++ b/dom/media/mp4/Box.h @@ -0,0 +1,100 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef BOX_H_ +#define BOX_H_ + +#include <stdint.h> +#include "nsTArray.h" +#include "MediaResource.h" +#include "mozilla/EndianUtils.h" +#include "AtomType.h" +#include "BufferReader.h" + +namespace mozilla { +class ByteStream; + +class BumpAllocator { + public: + uint8_t* Allocate(size_t aNumBytes); + + private: + nsTArray<nsTArray<uint8_t>> mBuffers; +}; + +class BoxContext { + public: + BoxContext(ByteStream* aSource, const MediaByteRangeSet& aByteRanges) + : mSource(aSource), mByteRanges(aByteRanges) {} + + RefPtr<ByteStream> mSource; + const MediaByteRangeSet& mByteRanges; + BumpAllocator mAllocator; +}; + +struct ByteSlice { + const uint8_t* mBytes; + size_t mSize; +}; + +class Box { + public: + Box(BoxContext* aContext, uint64_t aOffset, const Box* aParent = nullptr); + Box(); + + bool IsAvailable() const { return !mRange.IsEmpty(); } + uint64_t Offset() const { return mRange.mStart; } + uint64_t Length() const { return mRange.mEnd - mRange.mStart; } + uint64_t NextOffset() const { return mRange.mEnd; } + const MediaByteRange& Range() const { return mRange; } + const Box* Parent() const { return mParent; } + bool IsType(const char* aType) const { return mType == AtomType(aType); } + + Box Next() const; + Box FirstChild() const; + // Reads the box contents, excluding the header. + nsTArray<uint8_t> Read() const; + + // Reads the complete box; its header and body. + nsTArray<uint8_t> ReadCompleteBox() const; + + // Reads from the content of the box, excluding header. + bool Read(nsTArray<uint8_t>* aDest, const MediaByteRange& aRange) const; + + static const uint64_t kMAX_BOX_READ; + + // Returns a slice, pointing to the data of this box. The lifetime of + // the memory this slice points to matches the box's context's lifetime. + ByteSlice ReadAsSlice(); + + private: + bool Contains(MediaByteRange aRange) const; + BoxContext* mContext; + mozilla::MediaByteRange mRange; + uint64_t mBodyOffset; + uint64_t mChildOffset; + AtomType mType; + const Box* mParent; +}; + +// BoxReader serves box data through an AutoByteReader. The box data is +// stored either in the box's context's bump allocator, or in the ByteStream +// itself if the ByteStream implements the Access() method. +// NOTE: The data the BoxReader reads may be stored in the Box's BoxContext. +// Ensure that the BoxReader doesn't outlive the BoxContext! +class MOZ_RAII BoxReader { + public: + explicit BoxReader(Box& aBox) + : mData(aBox.ReadAsSlice()), mReader(mData.mBytes, mData.mSize) {} + BufferReader* operator->() { return &mReader; } + + private: + ByteSlice mData; + BufferReader mReader; +}; +} // namespace mozilla + +#endif diff --git a/dom/media/mp4/BufferStream.cpp b/dom/media/mp4/BufferStream.cpp new file mode 100644 index 0000000000..c2fa40cb8a --- /dev/null +++ b/dom/media/mp4/BufferStream.cpp @@ -0,0 +1,59 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "BufferStream.h" +#include "MediaData.h" +#include "MediaResource.h" +#include <algorithm> + +namespace mozilla { + +BufferStream::BufferStream() + : mStartOffset(0), mData(new mozilla::MediaByteBuffer) {} + +BufferStream::BufferStream(mozilla::MediaByteBuffer* aBuffer) + : mStartOffset(0), mData(aBuffer) {} + +BufferStream::~BufferStream() = default; + +/*virtual*/ +bool BufferStream::ReadAt(int64_t aOffset, void* aData, size_t aLength, + size_t* aBytesRead) { + if (aOffset < mStartOffset || aOffset > mStartOffset + mData->Length()) { + return false; + } + *aBytesRead = + std::min(aLength, size_t(mStartOffset + mData->Length() - aOffset)); + memcpy(aData, mData->Elements() + aOffset - mStartOffset, *aBytesRead); + return true; +} + +/*virtual*/ +bool BufferStream::CachedReadAt(int64_t aOffset, void* aData, size_t aLength, + size_t* aBytesRead) { + return ReadAt(aOffset, aData, aLength, aBytesRead); +} + +/*virtual*/ +bool BufferStream::Length(int64_t* aLength) { + *aLength = mStartOffset + mData->Length(); + return true; +} + +/* virtual */ +void BufferStream::DiscardBefore(int64_t aOffset) { + if (aOffset > mStartOffset) { + mData->RemoveElementsAt(0, aOffset - mStartOffset); + mStartOffset = aOffset; + } +} + +bool BufferStream::AppendBytes(const uint8_t* aData, size_t aLength) { + return mData->AppendElements(aData, aLength, fallible); +} + +MediaByteRange BufferStream::GetByteRange() { + return MediaByteRange(mStartOffset, mStartOffset + mData->Length()); +} +} // namespace mozilla diff --git a/dom/media/mp4/BufferStream.h b/dom/media/mp4/BufferStream.h new file mode 100644 index 0000000000..fb817b5916 --- /dev/null +++ b/dom/media/mp4/BufferStream.h @@ -0,0 +1,45 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef BUFFER_STREAM_H_ +#define BUFFER_STREAM_H_ + +#include "ByteStream.h" +#include "nsTArray.h" +#include "MediaResource.h" + +namespace mozilla { +class MediaByteBuffer; + +DDLoggedTypeDeclNameAndBase(BufferStream, ByteStream); + +class BufferStream : public ByteStream, + public mozilla::DecoderDoctorLifeLogger<BufferStream> { + public: + /* BufferStream does not take ownership of aData nor does it make a copy. + * Therefore BufferStream shouldn't get used after aData is destroyed. + */ + BufferStream(); + explicit BufferStream(mozilla::MediaByteBuffer* aBuffer); + + virtual bool ReadAt(int64_t aOffset, void* aData, size_t aLength, + size_t* aBytesRead) override; + virtual bool CachedReadAt(int64_t aOffset, void* aData, size_t aLength, + size_t* aBytesRead) override; + virtual bool Length(int64_t* aLength) override; + + virtual void DiscardBefore(int64_t aOffset) override; + + bool AppendBytes(const uint8_t* aData, size_t aLength); + + mozilla::MediaByteRange GetByteRange(); + + private: + ~BufferStream(); + int64_t mStartOffset; + RefPtr<mozilla::MediaByteBuffer> mData; +}; +} // namespace mozilla + +#endif diff --git a/dom/media/mp4/ByteStream.h b/dom/media/mp4/ByteStream.h new file mode 100644 index 0000000000..0f733dfb97 --- /dev/null +++ b/dom/media/mp4/ByteStream.h @@ -0,0 +1,41 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef STREAM_H_ +#define STREAM_H_ + +#include "DecoderDoctorLogger.h" +#include "nsISupportsImpl.h" + +namespace mozilla { + +DDLoggedTypeDeclName(ByteStream); + +class ByteStream : public DecoderDoctorLifeLogger<ByteStream> { + public: + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(ByteStream); + + virtual bool ReadAt(int64_t offset, void* data, size_t size, + size_t* bytes_read) = 0; + virtual bool CachedReadAt(int64_t offset, void* data, size_t size, + size_t* bytes_read) = 0; + virtual bool Length(int64_t* size) = 0; + + virtual void DiscardBefore(int64_t offset) {} + + // If this ByteStream's underlying storage of media is in-memory, this + // function returns a pointer to the in-memory storage of data at offset. + // Note that even if a ByteStream stores data in memory, it may not be + // stored contiguously, in which case this returns nullptr. + virtual const uint8_t* GetContiguousAccess(int64_t aOffset, size_t aSize) { + return nullptr; + } + + protected: + virtual ~ByteStream() = default; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/mp4/DecoderData.cpp b/dom/media/mp4/DecoderData.cpp new file mode 100644 index 0000000000..185bfb87e4 --- /dev/null +++ b/dom/media/mp4/DecoderData.cpp @@ -0,0 +1,356 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "Adts.h" +#include "AnnexB.h" +#include "BufferReader.h" +#include "DecoderData.h" +#include "mozilla/ArrayUtils.h" +#include "mozilla/EndianUtils.h" +#include "mozilla/Telemetry.h" +#include "VideoUtils.h" +#include "MP4Metadata.h" +#include "mozilla/Logging.h" + +#include "mp4parse.h" + +#define LOG(...) \ + MOZ_LOG(gMP4MetadataLog, mozilla::LogLevel::Debug, (__VA_ARGS__)) + +using mozilla::media::TimeUnit; + +namespace mozilla { + +mozilla::Result<mozilla::Ok, nsresult> CryptoFile::DoUpdate( + const uint8_t* aData, size_t aLength) { + BufferReader reader(aData, aLength); + while (reader.Remaining()) { + PsshInfo psshInfo; + if (!reader.ReadArray(psshInfo.uuid, 16)) { + return mozilla::Err(NS_ERROR_FAILURE); + } + + if (!reader.CanReadType<uint32_t>()) { + return mozilla::Err(NS_ERROR_FAILURE); + } + auto length = reader.ReadType<uint32_t>(); + + if (!reader.ReadArray(psshInfo.data, length)) { + return mozilla::Err(NS_ERROR_FAILURE); + } + pssh.AppendElement(std::move(psshInfo)); + } + return mozilla::Ok(); +} + +static MediaResult UpdateTrackProtectedInfo(mozilla::TrackInfo& aConfig, + const Mp4parseSinfInfo& aSinf) { + if (aSinf.is_encrypted != 0) { + if (aSinf.scheme_type == MP4_PARSE_ENCRYPTION_SCHEME_TYPE_CENC) { + aConfig.mCrypto.mCryptoScheme = CryptoScheme::Cenc; + } else if (aSinf.scheme_type == MP4_PARSE_ENCRYPTION_SCHEME_TYPE_CBCS) { + aConfig.mCrypto.mCryptoScheme = CryptoScheme::Cbcs; + } else { + // Unsupported encryption type; + return MediaResult( + NS_ERROR_DOM_MEDIA_METADATA_ERR, + RESULT_DETAIL( + "Unsupported encryption scheme encountered aSinf.scheme_type=%d", + static_cast<int>(aSinf.scheme_type))); + } + aConfig.mCrypto.mIVSize = aSinf.iv_size; + aConfig.mCrypto.mKeyId.AppendElements(aSinf.kid.data, aSinf.kid.length); + aConfig.mCrypto.mCryptByteBlock = aSinf.crypt_byte_block; + aConfig.mCrypto.mSkipByteBlock = aSinf.skip_byte_block; + aConfig.mCrypto.mConstantIV.AppendElements(aSinf.constant_iv.data, + aSinf.constant_iv.length); + } + return NS_OK; +} + +// Verify various information shared by Mp4ParseTrackAudioInfo and +// Mp4ParseTrackVideoInfo and record telemetry on that info. Returns an +// appropriate MediaResult indicating if the info is valid or not. +// This verifies: +// - That we have a sample_info_count > 0 (valid tracks should have at least one +// sample description entry) +// - That only a single codec is used across all sample infos, as we don't +// handle multiple. +// - If more than one sample information structures contain crypto info. This +// case is not fatal (we don't return an error), but does record telemetry +// to help judge if we need more handling in gecko for multiple crypto. +// +// Telemetry is also recorded on the above. As of writing, the +// telemetry is recorded to give us early warning if MP4s exist that we're not +// handling. Note, if adding new checks and telemetry to this function, +// telemetry should be recorded before returning to ensure it is gathered. +template <typename Mp4ParseTrackAudioOrVideoInfo> +static MediaResult VerifyAudioOrVideoInfoAndRecordTelemetry( + Mp4ParseTrackAudioOrVideoInfo* audioOrVideoInfo) { + Telemetry::Accumulate( + Telemetry::MEDIA_MP4_PARSE_NUM_SAMPLE_DESCRIPTION_ENTRIES, + audioOrVideoInfo->sample_info_count); + + bool hasMultipleCodecs = false; + uint32_t cryptoCount = 0; + Mp4parseCodec codecType = audioOrVideoInfo->sample_info[0].codec_type; + for (uint32_t i = 0; i < audioOrVideoInfo->sample_info_count; i++) { + if (audioOrVideoInfo->sample_info[0].codec_type != codecType) { + hasMultipleCodecs = true; + } + + // Update our encryption info if any is present on the sample info. + if (audioOrVideoInfo->sample_info[i].protected_data.is_encrypted) { + cryptoCount += 1; + } + } + + Telemetry::Accumulate( + Telemetry:: + MEDIA_MP4_PARSE_SAMPLE_DESCRIPTION_ENTRIES_HAVE_MULTIPLE_CODECS, + hasMultipleCodecs); + + // Accumulate if we have multiple (2 or more) crypto entries. + // TODO(1715283): rework this to count number of crypto entries + gather + // richer data. + Telemetry::Accumulate( + Telemetry:: + MEDIA_MP4_PARSE_SAMPLE_DESCRIPTION_ENTRIES_HAVE_MULTIPLE_CRYPTO, + cryptoCount >= 2); + + if (audioOrVideoInfo->sample_info_count == 0) { + return MediaResult( + NS_ERROR_DOM_MEDIA_METADATA_ERR, + RESULT_DETAIL("Got 0 sample info while verifying track.")); + } + + if (hasMultipleCodecs) { + // Different codecs in a single track. We don't handle this. + return MediaResult( + NS_ERROR_DOM_MEDIA_METADATA_ERR, + RESULT_DETAIL("Multiple codecs encountered while verifying track.")); + } + + return NS_OK; +} + +MediaResult MP4AudioInfo::Update(const Mp4parseTrackInfo* aTrack, + const Mp4parseTrackAudioInfo* aAudio, + const IndiceWrapper* aIndices) { + auto rv = VerifyAudioOrVideoInfoAndRecordTelemetry(aAudio); + NS_ENSURE_SUCCESS(rv, rv); + + Mp4parseCodec codecType = aAudio->sample_info[0].codec_type; + for (uint32_t i = 0; i < aAudio->sample_info_count; i++) { + if (aAudio->sample_info[i].protected_data.is_encrypted) { + auto rv = UpdateTrackProtectedInfo(*this, + aAudio->sample_info[i].protected_data); + NS_ENSURE_SUCCESS(rv, rv); + break; + } + } + + // We assume that the members of the first sample info are representative of + // the entire track. This code will need to be updated should this assumption + // ever not hold. E.g. if we need to handle different codecs in a single + // track, or if we have different numbers or channels in a single track. + Mp4parseByteData mp4ParseSampleCodecSpecific = + aAudio->sample_info[0].codec_specific_config; + Mp4parseByteData extraData = aAudio->sample_info[0].extra_data; + MOZ_ASSERT(mCodecSpecificConfig.is<NoCodecSpecificData>(), + "Should have no codec specific data yet"); + if (codecType == MP4PARSE_CODEC_OPUS) { + mMimeType = "audio/opus"_ns; + OpusCodecSpecificData opusCodecSpecificData{}; + // The Opus decoder expects the container's codec delay or + // pre-skip value, in microseconds, as a 64-bit int at the + // start of the codec-specific config blob. + if (mp4ParseSampleCodecSpecific.data && + mp4ParseSampleCodecSpecific.length >= 12) { + uint16_t preskip = mozilla::LittleEndian::readUint16( + mp4ParseSampleCodecSpecific.data + 10); + opusCodecSpecificData.mContainerCodecDelayFrames = preskip; + LOG("Opus stream in MP4 container, %" PRId64 + " microseconds of encoder delay (%" PRIu16 ").", + opusCodecSpecificData.mContainerCodecDelayFrames, preskip); + } else { + // This file will error later as it will be rejected by the opus decoder. + opusCodecSpecificData.mContainerCodecDelayFrames = 0; + } + opusCodecSpecificData.mHeadersBinaryBlob->AppendElements( + mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length); + mCodecSpecificConfig = + AudioCodecSpecificVariant{std::move(opusCodecSpecificData)}; + } else if (codecType == MP4PARSE_CODEC_AAC) { + mMimeType = "audio/mp4a-latm"_ns; + int64_t codecDelayUS = aTrack->media_time; + double USECS_PER_S = 1e6; + // We can't use mozilla::UsecsToFrames here because we need to round, and it + // floors. + uint32_t encoderDelayFrameCount = 0; + if (codecDelayUS > 0) { + encoderDelayFrameCount = static_cast<uint32_t>( + std::lround(static_cast<double>(codecDelayUS) * + aAudio->sample_info->sample_rate / USECS_PER_S)); + LOG("AAC stream in MP4 container, %" PRIu32 " frames of encoder delay.", + encoderDelayFrameCount); + } + + uint64_t mediaFrameCount = 0; + // Pass the padding number, in frames, to the AAC decoder as well. + if (aIndices) { + MP4SampleIndex::Indice firstIndice = {0}; + MP4SampleIndex::Indice lastIndice = {0}; + bool rv = aIndices->GetIndice(0, firstIndice); + rv |= aIndices->GetIndice(aIndices->Length() - 1, lastIndice); + if (rv) { + if (firstIndice.start_composition > lastIndice.end_composition) { + return MediaResult( + NS_ERROR_DOM_MEDIA_METADATA_ERR, + RESULT_DETAIL("Inconsistent start and end time in index")); + } + // The `end_composition` member of the very last index member is the + // duration of the media in microseconds, excluding decoder delay and + // padding. Convert to frames and give to the decoder so that trimming + // can be done properly. + mediaFrameCount = + lastIndice.end_composition - firstIndice.start_composition; + LOG("AAC stream in MP4 container, total media duration is %" PRIu64 + " frames", + mediaFrameCount); + } else { + LOG("AAC stream in MP4 container, couldn't determine total media time"); + } + } + + AacCodecSpecificData aacCodecSpecificData{}; + + aacCodecSpecificData.mEncoderDelayFrames = encoderDelayFrameCount; + aacCodecSpecificData.mMediaFrameCount = mediaFrameCount; + + // codec specific data is used to store the DecoderConfigDescriptor. + aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob->AppendElements( + mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length); + // extra data stores the ES_Descriptor. + aacCodecSpecificData.mEsDescriptorBinaryBlob->AppendElements( + extraData.data, extraData.length); + mCodecSpecificConfig = + AudioCodecSpecificVariant{std::move(aacCodecSpecificData)}; + } else if (codecType == MP4PARSE_CODEC_FLAC) { + MOZ_ASSERT(extraData.length == 0, + "FLAC doesn't expect extra data so doesn't handle it!"); + mMimeType = "audio/flac"_ns; + FlacCodecSpecificData flacCodecSpecificData{}; + flacCodecSpecificData.mStreamInfoBinaryBlob->AppendElements( + mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length); + mCodecSpecificConfig = + AudioCodecSpecificVariant{std::move(flacCodecSpecificData)}; + } else if (codecType == MP4PARSE_CODEC_MP3) { + // mp3 in mp4 can contain ES_Descriptor info (it also has a flash in mp4 + // specific box, which the rust parser recognizes). However, we don't + // handle any such data here. + mMimeType = "audio/mpeg"_ns; + // TODO(bug 1705812): parse the encoder delay values from the mp4. + mCodecSpecificConfig = AudioCodecSpecificVariant{Mp3CodecSpecificData{}}; + } + + mRate = aAudio->sample_info[0].sample_rate; + mChannels = aAudio->sample_info[0].channels; + mBitDepth = aAudio->sample_info[0].bit_depth; + mExtendedProfile = + AssertedCast<int8_t>(aAudio->sample_info[0].extended_profile); + if (aTrack->duration > TimeUnit::MaxTicks()) { + mDuration = TimeUnit::FromInfinity(); + } else { + mDuration = + TimeUnit(AssertedCast<int64_t>(aTrack->duration), aTrack->time_scale); + } + mMediaTime = TimeUnit(aTrack->media_time, aTrack->time_scale); + mTrackId = aTrack->track_id; + + // In stagefright, mProfile is kKeyAACProfile, mExtendedProfile is kKeyAACAOT. + if (aAudio->sample_info[0].profile <= 4) { + mProfile = AssertedCast<int8_t>(aAudio->sample_info[0].profile); + } + + if (mCodecSpecificConfig.is<NoCodecSpecificData>()) { + // Handle codecs that are not explicitly handled above. + MOZ_ASSERT( + extraData.length == 0, + "Codecs that use extra data should be explicitly handled already"); + AudioCodecSpecificBinaryBlob codecSpecificBinaryBlob; + // No codec specific metadata set, use the generic form. + codecSpecificBinaryBlob.mBinaryBlob->AppendElements( + mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length); + mCodecSpecificConfig = + AudioCodecSpecificVariant{std::move(codecSpecificBinaryBlob)}; + } + + return NS_OK; +} + +bool MP4AudioInfo::IsValid() const { + return mChannels > 0 && mRate > 0 && + // Accept any mime type here, but if it's aac, validate the profile. + (!mMimeType.EqualsLiteral("audio/mp4a-latm") || mProfile > 0 || + mExtendedProfile > 0); +} + +MediaResult MP4VideoInfo::Update(const Mp4parseTrackInfo* track, + const Mp4parseTrackVideoInfo* video) { + auto rv = VerifyAudioOrVideoInfoAndRecordTelemetry(video); + NS_ENSURE_SUCCESS(rv, rv); + + Mp4parseCodec codecType = video->sample_info[0].codec_type; + for (uint32_t i = 0; i < video->sample_info_count; i++) { + if (video->sample_info[i].protected_data.is_encrypted) { + auto rv = + UpdateTrackProtectedInfo(*this, video->sample_info[i].protected_data); + NS_ENSURE_SUCCESS(rv, rv); + break; + } + } + + // We assume that the members of the first sample info are representative of + // the entire track. This code will need to be updated should this assumption + // ever not hold. E.g. if we need to handle different codecs in a single + // track, or if we have different numbers or channels in a single track. + if (codecType == MP4PARSE_CODEC_AVC) { + mMimeType = "video/avc"_ns; + } else if (codecType == MP4PARSE_CODEC_VP9) { + mMimeType = "video/vp9"_ns; + } else if (codecType == MP4PARSE_CODEC_AV1) { + mMimeType = "video/av1"_ns; + } else if (codecType == MP4PARSE_CODEC_MP4V) { + mMimeType = "video/mp4v-es"_ns; + } else if (codecType == MP4PARSE_CODEC_HEVC) { + mMimeType = "video/hevc"_ns; + } + mTrackId = track->track_id; + if (track->duration > TimeUnit::MaxTicks()) { + mDuration = TimeUnit::FromInfinity(); + } else { + mDuration = + TimeUnit(AssertedCast<int64_t>(track->duration), track->time_scale); + } + mMediaTime = TimeUnit(track->media_time, track->time_scale); + mDisplay.width = AssertedCast<int32_t>(video->display_width); + mDisplay.height = AssertedCast<int32_t>(video->display_height); + mImage.width = video->sample_info[0].image_width; + mImage.height = video->sample_info[0].image_height; + mRotation = ToSupportedRotation(video->rotation); + Mp4parseByteData extraData = video->sample_info[0].extra_data; + // If length is 0 we append nothing + mExtraData->AppendElements(extraData.data, extraData.length); + return NS_OK; +} + +bool MP4VideoInfo::IsValid() const { + return (mDisplay.width > 0 && mDisplay.height > 0) || + (mImage.width > 0 && mImage.height > 0); +} + +} // namespace mozilla + +#undef LOG diff --git a/dom/media/mp4/DecoderData.h b/dom/media/mp4/DecoderData.h new file mode 100644 index 0000000000..a8d38d0abc --- /dev/null +++ b/dom/media/mp4/DecoderData.h @@ -0,0 +1,76 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef DECODER_DATA_H_ +#define DECODER_DATA_H_ + +#include "MediaInfo.h" +#include "MediaResult.h" +#include "mozilla/RefPtr.h" +#include "mozilla/Result.h" +#include "mozilla/Types.h" +#include "mozilla/Vector.h" +#include "nsString.h" +#include "nsTArray.h" +#include "mp4parse.h" + +namespace mozilla { + +class IndiceWrapper; +class MP4Demuxer; + +struct PsshInfo { + PsshInfo() = default; + PsshInfo(const PsshInfo& aOther) = delete; + PsshInfo(PsshInfo&& aOther) = default; + + nsTArray<uint8_t> uuid; + nsTArray<uint8_t> data; + + bool operator==(const PsshInfo& aOther) const { + return uuid == aOther.uuid && data == aOther.data; + } +}; + +class CryptoFile { + public: + CryptoFile() : valid(false) {} + CryptoFile(const CryptoFile& aCryptoFile) = delete; + + void Update(const uint8_t* aData, size_t aLength) { + valid = DoUpdate(aData, aLength).isOk(); + } + + bool valid; + nsTArray<PsshInfo> pssh; + + private: + mozilla::Result<mozilla::Ok, nsresult> DoUpdate(const uint8_t* aData, + size_t aLength); +}; + +class MP4AudioInfo : public mozilla::AudioInfo { + public: + MP4AudioInfo() = default; + + MediaResult Update(const Mp4parseTrackInfo* aTrack, + const Mp4parseTrackAudioInfo* aAudio, + const IndiceWrapper* aIndices); + + virtual bool IsValid() const override; +}; + +class MP4VideoInfo : public mozilla::VideoInfo { + public: + MP4VideoInfo() = default; + + MediaResult Update(const Mp4parseTrackInfo* track, + const Mp4parseTrackVideoInfo* video); + + virtual bool IsValid() const override; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/mp4/MP4Decoder.cpp b/dom/media/mp4/MP4Decoder.cpp new file mode 100644 index 0000000000..90feec4fd3 --- /dev/null +++ b/dom/media/mp4/MP4Decoder.cpp @@ -0,0 +1,242 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "MP4Decoder.h" +#include "H264.h" +#include "VPXDecoder.h" +#ifdef MOZ_AV1 +# include "AOMDecoder.h" +#endif +#include "MP4Demuxer.h" +#include "MediaContainerType.h" +#include "PDMFactory.h" +#include "PlatformDecoderModule.h" +#include "VideoUtils.h" +#include "mozilla/StaticPrefs_media.h" +#include "mozilla/gfx/Tools.h" +#include "nsMimeTypes.h" +#include "nsReadableUtils.h" + +namespace mozilla { + +static bool IsWhitelistedH264Codec(const nsAString& aCodec) { + uint8_t profile = 0, constraint = 0, level = 0; + + if (!ExtractH264CodecDetails(aCodec, profile, constraint, level)) { + return false; + } + + // Just assume what we can play on all platforms the codecs/formats that + // WMF can play, since we don't have documentation about what other + // platforms can play... According to the WMF documentation: + // http://msdn.microsoft.com/en-us/library/windows/desktop/dd797815%28v=vs.85%29.aspx + // "The Media Foundation H.264 video decoder is a Media Foundation Transform + // that supports decoding of Baseline, Main, and High profiles, up to level + // 5.1.". We extend the limit to level 5.2, relying on the decoder to handle + // any potential errors, the level limit being rather arbitrary. + // We also report that we can play Extended profile, as there are + // bitstreams that are Extended compliant that are also Baseline compliant. + return level >= H264_LEVEL_1 && level <= H264_LEVEL_5_2 && + (profile == H264_PROFILE_BASE || profile == H264_PROFILE_MAIN || + profile == H264_PROFILE_EXTENDED || profile == H264_PROFILE_HIGH); +} + +static bool IsTypeValid(const MediaContainerType& aType) { + // Whitelist MP4 types, so they explicitly match what we encounter on + // the web, as opposed to what we use internally (i.e. what our demuxers + // etc output). + return aType.Type() == MEDIAMIMETYPE("audio/mp4") || + aType.Type() == MEDIAMIMETYPE("audio/x-m4a") || + aType.Type() == MEDIAMIMETYPE("video/mp4") || + aType.Type() == MEDIAMIMETYPE("video/quicktime") || + aType.Type() == MEDIAMIMETYPE("video/x-m4v"); +} + +/* statis */ +nsTArray<UniquePtr<TrackInfo>> MP4Decoder::GetTracksInfo( + const MediaContainerType& aType, MediaResult& aError) { + nsTArray<UniquePtr<TrackInfo>> tracks; + + if (!IsTypeValid(aType)) { + aError = MediaResult( + NS_ERROR_DOM_MEDIA_FATAL_ERR, + RESULT_DETAIL("Invalid type:%s", aType.Type().AsString().get())); + return tracks; + } + + aError = NS_OK; + + const MediaCodecs& codecs = aType.ExtendedType().Codecs(); + if (codecs.IsEmpty()) { + return tracks; + } + + const bool isVideo = aType.Type() == MEDIAMIMETYPE("video/mp4") || + aType.Type() == MEDIAMIMETYPE("video/quicktime") || + aType.Type() == MEDIAMIMETYPE("video/x-m4v"); + + for (const auto& codec : codecs.Range()) { + if (IsAACCodecString(codec)) { + tracks.AppendElement( + CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters( + "audio/mp4a-latm"_ns, aType)); + continue; + } + if (codec.EqualsLiteral("mp3")) { + tracks.AppendElement( + CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters( + "audio/mpeg"_ns, aType)); + continue; + } + // The valid codecs parameter value with mp4 MIME types should be "Opus" and + // "fLaC", but "opus" and "flac" are acceptable due to historical reasons. + if (codec.EqualsLiteral("opus") || codec.EqualsLiteral("Opus") || + codec.EqualsLiteral("flac") || codec.EqualsLiteral("fLaC")) { + NS_ConvertUTF16toUTF8 c(codec); + ToLowerCase(c); + tracks.AppendElement( + CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters( + "audio/"_ns + c, aType)); + continue; + } + if (IsVP9CodecString(codec)) { + auto trackInfo = + CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters( + "video/vp9"_ns, aType); + VPXDecoder::SetVideoInfo(trackInfo->GetAsVideoInfo(), codec); + tracks.AppendElement(std::move(trackInfo)); + continue; + } +#ifdef MOZ_AV1 + if (StaticPrefs::media_av1_enabled() && IsAV1CodecString(codec)) { + auto trackInfo = + CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters( + "video/av1"_ns, aType); + AOMDecoder::SetVideoInfo(trackInfo->GetAsVideoInfo(), codec); + tracks.AppendElement(std::move(trackInfo)); + continue; + } +#endif +#ifdef MOZ_WMF + if (StaticPrefs::media_wmf_hevc_enabled() && IsH265CodecString(codec)) { + auto trackInfo = + CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters( + "video/hevc"_ns, aType); + tracks.AppendElement(std::move(trackInfo)); + continue; + } +#endif + if (isVideo && IsWhitelistedH264Codec(codec)) { + auto trackInfo = + CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters( + "video/avc"_ns, aType); + uint8_t profile = 0, constraint = 0, level = 0; + MOZ_ALWAYS_TRUE( + ExtractH264CodecDetails(codec, profile, constraint, level)); + uint32_t width = aType.ExtendedType().GetWidth().refOr(1280); + uint32_t height = aType.ExtendedType().GetHeight().refOr(720); + trackInfo->GetAsVideoInfo()->mExtraData = + H264::CreateExtraData(profile, constraint, level, {width, height}); + tracks.AppendElement(std::move(trackInfo)); + continue; + } + // Unknown codec + aError = MediaResult( + NS_ERROR_DOM_MEDIA_FATAL_ERR, + RESULT_DETAIL("Unknown codec:%s", NS_ConvertUTF16toUTF8(codec).get())); + } + return tracks; +} + +/* static */ +bool MP4Decoder::IsSupportedType(const MediaContainerType& aType, + DecoderDoctorDiagnostics* aDiagnostics) { + if (!IsEnabled()) { + return false; + } + + MediaResult rv = NS_OK; + auto tracks = GetTracksInfo(aType, rv); + if (NS_FAILED(rv)) { + return false; + } + + if (!tracks.IsEmpty()) { + // Look for exact match as we know used codecs. + RefPtr<PDMFactory> platform = new PDMFactory(); + for (const auto& track : tracks) { + if (!track || + platform->Supports(SupportDecoderParams(*track), aDiagnostics) + .isEmpty()) { + return false; + } + } + return true; + } + + // We have only container info so try to guess the content type. + // Assume H.264/AV1 or AAC + if (aType.Type() == MEDIAMIMETYPE("audio/mp4") || + aType.Type() == MEDIAMIMETYPE("audio/x-m4a")) { + tracks.AppendElement( + CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters( + "audio/mp4a-latm"_ns, aType)); + } else { + tracks.AppendElement( + CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters( + "video/avc"_ns, aType)); + if (StaticPrefs::media_av1_enabled()) { + tracks.AppendElement( + CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters( + "video/av1"_ns, aType)); + } +#ifdef MOZ_WMF + if (StaticPrefs::media_wmf_hevc_enabled()) { + tracks.AppendElement( + CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters( + "video/hevc"_ns, aType)); + } +#endif + } + + // Check that something is supported at least. + RefPtr<PDMFactory> platform = new PDMFactory(); + for (const auto& track : tracks) { + if (track && !platform->Supports(SupportDecoderParams(*track), aDiagnostics) + .isEmpty()) { + return true; + } + } + return false; +} + +/* static */ +bool MP4Decoder::IsH264(const nsACString& aMimeType) { + return aMimeType.EqualsLiteral("video/mp4") || + aMimeType.EqualsLiteral("video/avc"); +} + +/* static */ +bool MP4Decoder::IsAAC(const nsACString& aMimeType) { + return aMimeType.EqualsLiteral("audio/mp4a-latm"); +} + +/* static */ +bool MP4Decoder::IsHEVC(const nsACString& aMimeType) { + return aMimeType.EqualsLiteral("video/hevc"); +} + +/* static */ +bool MP4Decoder::IsEnabled() { return StaticPrefs::media_mp4_enabled(); } + +/* static */ +nsTArray<UniquePtr<TrackInfo>> MP4Decoder::GetTracksInfo( + const MediaContainerType& aType) { + MediaResult rv = NS_OK; + return GetTracksInfo(aType, rv); +} + +} // namespace mozilla diff --git a/dom/media/mp4/MP4Decoder.h b/dom/media/mp4/MP4Decoder.h new file mode 100644 index 0000000000..c8a3304a49 --- /dev/null +++ b/dom/media/mp4/MP4Decoder.h @@ -0,0 +1,57 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#if !defined(MP4Decoder_h_) +# define MP4Decoder_h_ + +# include "mozilla/UniquePtr.h" +# include "nsStringFwd.h" +# include "nsTArray.h" + +namespace mozilla { + +class MediaContainerType; +class MediaResult; +class DecoderDoctorDiagnostics; +class TrackInfo; + +// Decoder that uses a bundled MP4 demuxer and platform decoders to play MP4. +class MP4Decoder { + public: + // Returns true if aContainerType is an MP4 type that we think we can render + // with the a platform decoder backend. + // If provided, codecs are checked for support. + static bool IsSupportedType(const MediaContainerType& aContainerType, + DecoderDoctorDiagnostics* aDiagnostics); + + // Return true if aMimeType is a one of the strings used by our demuxers to + // identify H264. Does not parse general content type strings, i.e. white + // space matters. + static bool IsH264(const nsACString& aMimeType); + + // Return true if aMimeType is a one of the strings used by our demuxers to + // identify AAC. Does not parse general content type strings, i.e. white + // space matters. + static bool IsAAC(const nsACString& aMimeType); + + // Return true if aMimeType is a one of the strings used by our demuxers to + // identify HEVC. Does not parse general content type strings, i.e. white + // space matters. + static bool IsHEVC(const nsACString& aMimeType); + + // Returns true if the MP4 backend is preffed on. + static bool IsEnabled(); + + static nsTArray<UniquePtr<TrackInfo>> GetTracksInfo( + const MediaContainerType& aType); + + private: + static nsTArray<UniquePtr<TrackInfo>> GetTracksInfo( + const MediaContainerType& aType, MediaResult& aError); +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/mp4/MP4Demuxer.cpp b/dom/media/mp4/MP4Demuxer.cpp new file mode 100644 index 0000000000..2690df2316 --- /dev/null +++ b/dom/media/mp4/MP4Demuxer.cpp @@ -0,0 +1,646 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <algorithm> +#include <limits> +#include <stdint.h> + +#include "MP4Demuxer.h" + +#include "AnnexB.h" +#include "BufferStream.h" +#include "H264.h" +#include "H265.h" +#include "MP4Decoder.h" +#include "MP4Metadata.h" +#include "MoofParser.h" +#include "ResourceStream.h" +#include "TimeUnits.h" +#include "VPXDecoder.h" +#include "mozilla/Span.h" +#include "mozilla/StaticPrefs_media.h" +#include "mozilla/Telemetry.h" +#include "nsPrintfCString.h" +#include "SampleIterator.h" + +extern mozilla::LazyLogModule gMediaDemuxerLog; +mozilla::LogModule* GetDemuxerLog() { return gMediaDemuxerLog; } + +#define LOG(arg, ...) \ + DDMOZ_LOG(gMediaDemuxerLog, mozilla::LogLevel::Debug, "::%s: " arg, \ + __func__, ##__VA_ARGS__) + +namespace mozilla { + +using TimeUnit = media::TimeUnit; +using TimeInterval = media::TimeInterval; +using TimeIntervals = media::TimeIntervals; + +DDLoggedTypeDeclNameAndBase(MP4TrackDemuxer, MediaTrackDemuxer); + +class MP4TrackDemuxer : public MediaTrackDemuxer, + public DecoderDoctorLifeLogger<MP4TrackDemuxer> { + public: + MP4TrackDemuxer(MediaResource* aResource, UniquePtr<TrackInfo>&& aInfo, + const IndiceWrapper& aIndices, uint32_t aTimeScale); + + UniquePtr<TrackInfo> GetInfo() const override; + + RefPtr<SeekPromise> Seek(const TimeUnit& aTime) override; + + RefPtr<SamplesPromise> GetSamples(int32_t aNumSamples = 1) override; + + void Reset() override; + + nsresult GetNextRandomAccessPoint(TimeUnit* aTime) override; + + RefPtr<SkipAccessPointPromise> SkipToNextRandomAccessPoint( + const TimeUnit& aTimeThreshold) override; + + TimeIntervals GetBuffered() override; + + void NotifyDataRemoved(); + void NotifyDataArrived(); + + private: + already_AddRefed<MediaRawData> GetNextSample(); + void EnsureUpToDateIndex(); + void SetNextKeyFrameTime(); + RefPtr<MediaResource> mResource; + RefPtr<ResourceStream> mStream; + UniquePtr<TrackInfo> mInfo; + RefPtr<MP4SampleIndex> mIndex; + UniquePtr<SampleIterator> mIterator; + Maybe<TimeUnit> mNextKeyframeTime; + // Queued samples extracted by the demuxer, but not yet returned. + RefPtr<MediaRawData> mQueuedSample; + bool mNeedReIndex; + enum CodecType { kH264, kVP9, kAAC, kHEVC, kOther } mType = kOther; +}; + +MP4Demuxer::MP4Demuxer(MediaResource* aResource) + : mResource(aResource), + mStream(new ResourceStream(aResource)), + mIsSeekable(false) { + DDLINKCHILD("resource", aResource); + DDLINKCHILD("stream", mStream.get()); +} + +RefPtr<MP4Demuxer::InitPromise> MP4Demuxer::Init() { + AutoPinned<ResourceStream> stream(mStream); + + // 'result' will capture the first warning, if any. + MediaResult result{NS_OK}; + + MP4Metadata::ResultAndByteBuffer initData = MP4Metadata::Metadata(stream); + if (!initData.Ref()) { + return InitPromise::CreateAndReject( + NS_FAILED(initData.Result()) + ? std::move(initData.Result()) + : MediaResult(NS_ERROR_DOM_MEDIA_DEMUXER_ERR, + RESULT_DETAIL("Invalid MP4 metadata or OOM")), + __func__); + } else if (NS_FAILED(initData.Result()) && result == NS_OK) { + result = std::move(initData.Result()); + } + + RefPtr<BufferStream> bufferstream = new BufferStream(initData.Ref()); + + MP4Metadata metadata{bufferstream}; + DDLINKCHILD("metadata", &metadata); + nsresult rv = metadata.Parse(); + if (NS_FAILED(rv)) { + return InitPromise::CreateAndReject( + MediaResult(rv, RESULT_DETAIL("Parse MP4 metadata failed")), __func__); + } + + auto audioTrackCount = metadata.GetNumberTracks(TrackInfo::kAudioTrack); + if (audioTrackCount.Ref() == MP4Metadata::NumberTracksError()) { + if (StaticPrefs::media_playback_warnings_as_errors()) { + return InitPromise::CreateAndReject( + MediaResult( + NS_ERROR_DOM_MEDIA_DEMUXER_ERR, + RESULT_DETAIL("Invalid audio track (%s)", + audioTrackCount.Result().Description().get())), + __func__); + } + audioTrackCount.Ref() = 0; + } + + auto videoTrackCount = metadata.GetNumberTracks(TrackInfo::kVideoTrack); + if (videoTrackCount.Ref() == MP4Metadata::NumberTracksError()) { + if (StaticPrefs::media_playback_warnings_as_errors()) { + return InitPromise::CreateAndReject( + MediaResult( + NS_ERROR_DOM_MEDIA_DEMUXER_ERR, + RESULT_DETAIL("Invalid video track (%s)", + videoTrackCount.Result().Description().get())), + __func__); + } + videoTrackCount.Ref() = 0; + } + + if (audioTrackCount.Ref() == 0 && videoTrackCount.Ref() == 0) { + return InitPromise::CreateAndReject( + MediaResult( + NS_ERROR_DOM_MEDIA_DEMUXER_ERR, + RESULT_DETAIL("No MP4 audio (%s) or video (%s) tracks", + audioTrackCount.Result().Description().get(), + videoTrackCount.Result().Description().get())), + __func__); + } + + if (NS_FAILED(audioTrackCount.Result()) && result == NS_OK) { + result = std::move(audioTrackCount.Result()); + } + if (NS_FAILED(videoTrackCount.Result()) && result == NS_OK) { + result = std::move(videoTrackCount.Result()); + } + + if (audioTrackCount.Ref() != 0) { + for (size_t i = 0; i < audioTrackCount.Ref(); i++) { + MP4Metadata::ResultAndTrackInfo info = + metadata.GetTrackInfo(TrackInfo::kAudioTrack, i); + if (!info.Ref()) { + if (StaticPrefs::media_playback_warnings_as_errors()) { + return InitPromise::CreateAndReject( + MediaResult(NS_ERROR_DOM_MEDIA_DEMUXER_ERR, + RESULT_DETAIL("Invalid MP4 audio track (%s)", + info.Result().Description().get())), + __func__); + } + if (result == NS_OK) { + result = + MediaResult(NS_ERROR_DOM_MEDIA_DEMUXER_ERR, + RESULT_DETAIL("Invalid MP4 audio track (%s)", + info.Result().Description().get())); + } + continue; + } else if (NS_FAILED(info.Result()) && result == NS_OK) { + result = std::move(info.Result()); + } + MP4Metadata::ResultAndIndice indices = + metadata.GetTrackIndice(info.Ref()->mTrackId); + if (!indices.Ref()) { + if (NS_FAILED(info.Result()) && result == NS_OK) { + result = std::move(indices.Result()); + } + continue; + } + RefPtr<MP4TrackDemuxer> demuxer = + new MP4TrackDemuxer(mResource, std::move(info.Ref()), + *indices.Ref().get(), info.Ref()->mTimeScale); + DDLINKCHILD("audio demuxer", demuxer.get()); + mAudioDemuxers.AppendElement(std::move(demuxer)); + } + } + + if (videoTrackCount.Ref() != 0) { + for (size_t i = 0; i < videoTrackCount.Ref(); i++) { + MP4Metadata::ResultAndTrackInfo info = + metadata.GetTrackInfo(TrackInfo::kVideoTrack, i); + if (!info.Ref()) { + if (StaticPrefs::media_playback_warnings_as_errors()) { + return InitPromise::CreateAndReject( + MediaResult(NS_ERROR_DOM_MEDIA_DEMUXER_ERR, + RESULT_DETAIL("Invalid MP4 video track (%s)", + info.Result().Description().get())), + __func__); + } + if (result == NS_OK) { + result = + MediaResult(NS_ERROR_DOM_MEDIA_DEMUXER_ERR, + RESULT_DETAIL("Invalid MP4 video track (%s)", + info.Result().Description().get())); + } + continue; + } else if (NS_FAILED(info.Result()) && result == NS_OK) { + result = std::move(info.Result()); + } + MP4Metadata::ResultAndIndice indices = + metadata.GetTrackIndice(info.Ref()->mTrackId); + if (!indices.Ref()) { + if (NS_FAILED(info.Result()) && result == NS_OK) { + result = std::move(indices.Result()); + } + continue; + } + RefPtr<MP4TrackDemuxer> demuxer = + new MP4TrackDemuxer(mResource, std::move(info.Ref()), + *indices.Ref().get(), info.Ref()->mTimeScale); + DDLINKCHILD("video demuxer", demuxer.get()); + mVideoDemuxers.AppendElement(std::move(demuxer)); + } + } + + MP4Metadata::ResultAndCryptoFile cryptoFile = metadata.Crypto(); + if (NS_FAILED(cryptoFile.Result()) && result == NS_OK) { + result = std::move(cryptoFile.Result()); + } + MOZ_ASSERT(cryptoFile.Ref()); + if (cryptoFile.Ref()->valid) { + const nsTArray<PsshInfo>& psshs = cryptoFile.Ref()->pssh; + for (uint32_t i = 0; i < psshs.Length(); i++) { + mCryptoInitData.AppendElements(psshs[i].data); + } + } + + mIsSeekable = metadata.CanSeek(); + + return InitPromise::CreateAndResolve(result, __func__); +} + +uint32_t MP4Demuxer::GetNumberTracks(TrackInfo::TrackType aType) const { + switch (aType) { + case TrackInfo::kAudioTrack: + return uint32_t(mAudioDemuxers.Length()); + case TrackInfo::kVideoTrack: + return uint32_t(mVideoDemuxers.Length()); + default: + return 0; + } +} + +already_AddRefed<MediaTrackDemuxer> MP4Demuxer::GetTrackDemuxer( + TrackInfo::TrackType aType, uint32_t aTrackNumber) { + switch (aType) { + case TrackInfo::kAudioTrack: + if (aTrackNumber >= uint32_t(mAudioDemuxers.Length())) { + return nullptr; + } + return RefPtr<MediaTrackDemuxer>(mAudioDemuxers[aTrackNumber]).forget(); + case TrackInfo::kVideoTrack: + if (aTrackNumber >= uint32_t(mVideoDemuxers.Length())) { + return nullptr; + } + return RefPtr<MediaTrackDemuxer>(mVideoDemuxers[aTrackNumber]).forget(); + default: + return nullptr; + } +} + +bool MP4Demuxer::IsSeekable() const { return mIsSeekable; } + +void MP4Demuxer::NotifyDataArrived() { + for (auto& dmx : mAudioDemuxers) { + dmx->NotifyDataArrived(); + } + for (auto& dmx : mVideoDemuxers) { + dmx->NotifyDataArrived(); + } +} + +void MP4Demuxer::NotifyDataRemoved() { + for (auto& dmx : mAudioDemuxers) { + dmx->NotifyDataRemoved(); + } + for (auto& dmx : mVideoDemuxers) { + dmx->NotifyDataRemoved(); + } +} + +UniquePtr<EncryptionInfo> MP4Demuxer::GetCrypto() { + UniquePtr<EncryptionInfo> crypto; + if (!mCryptoInitData.IsEmpty()) { + crypto.reset(new EncryptionInfo{}); + crypto->AddInitData(u"cenc"_ns, mCryptoInitData); + } + return crypto; +} + +MP4TrackDemuxer::MP4TrackDemuxer(MediaResource* aResource, + UniquePtr<TrackInfo>&& aInfo, + const IndiceWrapper& aIndices, + uint32_t aTimeScale) + : mResource(aResource), + mStream(new ResourceStream(aResource)), + mInfo(std::move(aInfo)), + mIndex(new MP4SampleIndex(aIndices, mStream, mInfo->mTrackId, + mInfo->IsAudio(), aTimeScale)), + mIterator(MakeUnique<SampleIterator>(mIndex)), + mNeedReIndex(true) { + EnsureUpToDateIndex(); // Force update of index + + VideoInfo* videoInfo = mInfo->GetAsVideoInfo(); + AudioInfo* audioInfo = mInfo->GetAsAudioInfo(); + if (videoInfo && MP4Decoder::IsH264(mInfo->mMimeType)) { + mType = kH264; + RefPtr<MediaByteBuffer> extraData = videoInfo->mExtraData; + SPSData spsdata; + if (H264::DecodeSPSFromExtraData(extraData, spsdata) && + spsdata.pic_width > 0 && spsdata.pic_height > 0 && + H264::EnsureSPSIsSane(spsdata)) { + videoInfo->mImage.width = spsdata.pic_width; + videoInfo->mImage.height = spsdata.pic_height; + videoInfo->mDisplay.width = spsdata.display_width; + videoInfo->mDisplay.height = spsdata.display_height; + } + } else if (videoInfo && VPXDecoder::IsVP9(mInfo->mMimeType)) { + mType = kVP9; + } else if (audioInfo && MP4Decoder::IsAAC(mInfo->mMimeType)) { + mType = kAAC; + } else if (videoInfo && MP4Decoder::IsHEVC(mInfo->mMimeType)) { + mType = kHEVC; + if (auto rv = H265::DecodeSPSFromHVCCExtraData(videoInfo->mExtraData); + rv.isOk()) { + const auto sps = rv.unwrap(); + videoInfo->mImage.width = sps.GetImageSize().Width(); + videoInfo->mImage.height = sps.GetImageSize().Height(); + videoInfo->mDisplay.width = sps.GetDisplaySize().Width(); + videoInfo->mDisplay.height = sps.GetDisplaySize().Height(); + } + } +} + +UniquePtr<TrackInfo> MP4TrackDemuxer::GetInfo() const { return mInfo->Clone(); } + +void MP4TrackDemuxer::EnsureUpToDateIndex() { + if (!mNeedReIndex) { + return; + } + AutoPinned<MediaResource> resource(mResource); + MediaByteRangeSet byteRanges; + nsresult rv = resource->GetCachedRanges(byteRanges); + if (NS_FAILED(rv)) { + return; + } + mIndex->UpdateMoofIndex(byteRanges); + mNeedReIndex = false; +} + +RefPtr<MP4TrackDemuxer::SeekPromise> MP4TrackDemuxer::Seek( + const TimeUnit& aTime) { + auto seekTime = aTime; + mQueuedSample = nullptr; + + mIterator->Seek(seekTime); + + // Check what time we actually seeked to. + do { + RefPtr<MediaRawData> sample = GetNextSample(); + if (!sample) { + return SeekPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_END_OF_STREAM, + __func__); + } + if (!sample->Size()) { + // This sample can't be decoded, continue searching. + continue; + } + if (sample->mKeyframe) { + MOZ_DIAGNOSTIC_ASSERT(sample->HasValidTime()); + mQueuedSample = sample; + seekTime = mQueuedSample->mTime; + } + } while (!mQueuedSample); + + SetNextKeyFrameTime(); + + return SeekPromise::CreateAndResolve(seekTime, __func__); +} + +already_AddRefed<MediaRawData> MP4TrackDemuxer::GetNextSample() { + RefPtr<MediaRawData> sample = mIterator->GetNext(); + if (!sample) { + return nullptr; + } + if (mInfo->GetAsVideoInfo()) { + sample->mExtraData = mInfo->GetAsVideoInfo()->mExtraData; + if (mType == kH264 && !sample->mCrypto.IsEncrypted()) { + H264::FrameType type = H264::GetFrameType(sample); + switch (type) { + case H264::FrameType::I_FRAME: + [[fallthrough]]; + case H264::FrameType::OTHER: { + bool keyframe = type == H264::FrameType::I_FRAME; + if (sample->mKeyframe != keyframe) { + NS_WARNING(nsPrintfCString("Frame incorrectly marked as %skeyframe " + "@ pts:%" PRId64 " dur:%" PRId64 + " dts:%" PRId64, + keyframe ? "" : "non-", + sample->mTime.ToMicroseconds(), + sample->mDuration.ToMicroseconds(), + sample->mTimecode.ToMicroseconds()) + .get()); + sample->mKeyframe = keyframe; + } + break; + } + case H264::FrameType::INVALID: + NS_WARNING(nsPrintfCString("Invalid H264 frame @ pts:%" PRId64 + " dur:%" PRId64 " dts:%" PRId64, + sample->mTime.ToMicroseconds(), + sample->mDuration.ToMicroseconds(), + sample->mTimecode.ToMicroseconds()) + .get()); + // We could reject the sample now, however demuxer errors are fatal. + // So we keep the invalid frame, relying on the H264 decoder to + // handle the error later. + // TODO: make demuxer errors non-fatal. + break; + } + } else if (mType == kVP9 && !sample->mCrypto.IsEncrypted()) { + bool keyframe = VPXDecoder::IsKeyframe( + Span<const uint8_t>(sample->Data(), sample->Size()), + VPXDecoder::Codec::VP9); + if (sample->mKeyframe != keyframe) { + NS_WARNING(nsPrintfCString( + "Frame incorrectly marked as %skeyframe " + "@ pts:%" PRId64 " dur:%" PRId64 " dts:%" PRId64, + keyframe ? "" : "non-", sample->mTime.ToMicroseconds(), + sample->mDuration.ToMicroseconds(), + sample->mTimecode.ToMicroseconds()) + .get()); + sample->mKeyframe = keyframe; + } + } + } + + // Adjust trimming information if needed. + if (mInfo->GetAsAudioInfo()) { + AudioInfo* info = mInfo->GetAsAudioInfo(); + TimeUnit originalPts = sample->mTime; + TimeUnit originalEnd = sample->GetEndTime(); + if (sample->mTime.IsNegative()) { + sample->mTime = TimeUnit::Zero(originalPts); + sample->mDuration = std::max(TimeUnit::Zero(sample->mTime), + originalPts + sample->mDuration); + sample->mOriginalPresentationWindow = + Some(TimeInterval{originalPts, originalEnd}); + } + // The demuxer only knows the presentation time of the packet, not the + // actual number of samples that will be decoded from this packet. + // However we need to trim the last packet to the correct duration. + // Find the actual size of the decoded packet to know how many samples to + // trim. This only works because the packet size are constant. + TimeUnit totalMediaDurationIncludingTrimming = + info->mDuration - info->mMediaTime; + if (mType == kAAC && + sample->GetEndTime() >= totalMediaDurationIncludingTrimming && + totalMediaDurationIncludingTrimming.IsPositive()) { + // Seek backward a bit. + mIterator->Seek(sample->mTime - sample->mDuration); + RefPtr<MediaRawData> previousSample = mIterator->GetNext(); + if (previousSample) { + TimeInterval fullPacketDuration{previousSample->mTime, + previousSample->GetEndTime()}; + sample->mOriginalPresentationWindow = Some(TimeInterval{ + originalPts, originalPts + fullPacketDuration.Length()}); + } + // Seek back so we're back at the original location -- there's no packet + // left anyway. + mIterator->Seek(sample->mTime); + RefPtr<MediaRawData> dummy = mIterator->GetNext(); + } + } + + if (MOZ_LOG_TEST(GetDemuxerLog(), LogLevel::Verbose)) { + bool isAudio = mInfo->GetAsAudioInfo(); + TimeUnit originalStart = TimeUnit::Invalid(); + TimeUnit originalEnd = TimeUnit::Invalid(); + if (sample->mOriginalPresentationWindow) { + originalStart = sample->mOriginalPresentationWindow->mStart; + originalEnd = sample->mOriginalPresentationWindow->mEnd; + } + LOG("%s packet demuxed (track id: %d): [%s,%s], duration: %s (original " + "time: [%s,%s])", + isAudio ? "Audio" : "Video", mInfo->mTrackId, + sample->mTime.ToString().get(), sample->GetEndTime().ToString().get(), + sample->mDuration.ToString().get(), originalStart.ToString().get(), + originalEnd.ToString().get()); + } + + return sample.forget(); +} + +RefPtr<MP4TrackDemuxer::SamplesPromise> MP4TrackDemuxer::GetSamples( + int32_t aNumSamples) { + EnsureUpToDateIndex(); + RefPtr<SamplesHolder> samples = new SamplesHolder; + if (!aNumSamples) { + return SamplesPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_DEMUXER_ERR, + __func__); + } + + if (mQueuedSample) { + NS_ASSERTION(mQueuedSample->mKeyframe, "mQueuedSample must be a keyframe"); + samples->AppendSample(mQueuedSample); + mQueuedSample = nullptr; + aNumSamples--; + } + RefPtr<MediaRawData> sample; + while (aNumSamples && (sample = GetNextSample())) { + if (!sample->Size()) { + continue; + } + MOZ_DIAGNOSTIC_ASSERT(sample->HasValidTime()); + samples->AppendSample(sample); + aNumSamples--; + } + + if (samples->GetSamples().IsEmpty()) { + return SamplesPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_END_OF_STREAM, + __func__); + } + + if (mNextKeyframeTime.isNothing() || + samples->GetSamples().LastElement()->mTime >= mNextKeyframeTime.value()) { + SetNextKeyFrameTime(); + } + return SamplesPromise::CreateAndResolve(samples, __func__); +} + +void MP4TrackDemuxer::SetNextKeyFrameTime() { + mNextKeyframeTime.reset(); + TimeUnit frameTime = mIterator->GetNextKeyframeTime(); + if (frameTime.IsValid()) { + mNextKeyframeTime.emplace(frameTime); + } +} + +void MP4TrackDemuxer::Reset() { + mQueuedSample = nullptr; + // TODO: verify this + mIterator->Seek(TimeUnit::FromNegativeInfinity()); + SetNextKeyFrameTime(); +} + +nsresult MP4TrackDemuxer::GetNextRandomAccessPoint(TimeUnit* aTime) { + if (mNextKeyframeTime.isNothing()) { + // There's no next key frame. + *aTime = TimeUnit::FromInfinity(); + } else { + *aTime = mNextKeyframeTime.value(); + } + return NS_OK; +} + +RefPtr<MP4TrackDemuxer::SkipAccessPointPromise> +MP4TrackDemuxer::SkipToNextRandomAccessPoint(const TimeUnit& aTimeThreshold) { + mQueuedSample = nullptr; + // Loop until we reach the next keyframe after the threshold. + uint32_t parsed = 0; + bool found = false; + RefPtr<MediaRawData> sample; + while (!found && (sample = GetNextSample())) { + parsed++; + MOZ_DIAGNOSTIC_ASSERT(sample->HasValidTime()); + if (sample->mKeyframe && sample->mTime >= aTimeThreshold) { + found = true; + mQueuedSample = sample; + } + } + SetNextKeyFrameTime(); + if (found) { + return SkipAccessPointPromise::CreateAndResolve(parsed, __func__); + } + SkipFailureHolder failure(NS_ERROR_DOM_MEDIA_END_OF_STREAM, parsed); + return SkipAccessPointPromise::CreateAndReject(std::move(failure), __func__); +} + +TimeIntervals MP4TrackDemuxer::GetBuffered() { + EnsureUpToDateIndex(); + AutoPinned<MediaResource> resource(mResource); + MediaByteRangeSet byteRanges; + nsresult rv = resource->GetCachedRanges(byteRanges); + + if (NS_FAILED(rv)) { + return TimeIntervals(); + } + + TimeIntervals timeRanges = mIndex->ConvertByteRangesToTimeRanges(byteRanges); + if (AudioInfo* info = mInfo->GetAsAudioInfo(); info) { + // Trim as in GetNextSample(). + TimeUnit totalMediaDurationIncludingTrimming = + info->mDuration - info->mMediaTime; + auto end = TimeUnit::FromInfinity(); + if (mType == kAAC && totalMediaDurationIncludingTrimming.IsPositive()) { + end = info->mDuration; + } + if (timeRanges.GetStart().IsNegative() || timeRanges.GetEnd() > end) { + TimeInterval trimming(TimeUnit::Zero(timeRanges.GetStart()), end); + timeRanges = timeRanges.Intersection(trimming); + } + } + + return timeRanges; +} + +void MP4TrackDemuxer::NotifyDataArrived() { mNeedReIndex = true; } + +void MP4TrackDemuxer::NotifyDataRemoved() { + AutoPinned<MediaResource> resource(mResource); + MediaByteRangeSet byteRanges; + nsresult rv = resource->GetCachedRanges(byteRanges); + if (NS_FAILED(rv)) { + return; + } + mIndex->UpdateMoofIndex(byteRanges, true /* can evict */); + mNeedReIndex = false; +} + +} // namespace mozilla + +#undef LOG diff --git a/dom/media/mp4/MP4Demuxer.h b/dom/media/mp4/MP4Demuxer.h new file mode 100644 index 0000000000..22fa5b137f --- /dev/null +++ b/dom/media/mp4/MP4Demuxer.h @@ -0,0 +1,52 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#if !defined(MP4Demuxer_h_) +# define MP4Demuxer_h_ + +# include "mozilla/Maybe.h" +# include "mozilla/Monitor.h" +# include "MediaDataDemuxer.h" +# include "MediaResource.h" + +namespace mozilla { +class MP4TrackDemuxer; +class ResourceStream; + +DDLoggedTypeDeclNameAndBase(MP4Demuxer, MediaDataDemuxer); + +class MP4Demuxer : public MediaDataDemuxer, + public DecoderDoctorLifeLogger<MP4Demuxer> { + public: + explicit MP4Demuxer(MediaResource* aResource); + + RefPtr<InitPromise> Init() override; + + uint32_t GetNumberTracks(TrackInfo::TrackType aType) const override; + + already_AddRefed<MediaTrackDemuxer> GetTrackDemuxer( + TrackInfo::TrackType aType, uint32_t aTrackNumber) override; + + bool IsSeekable() const override; + + UniquePtr<EncryptionInfo> GetCrypto() override; + + void NotifyDataArrived() override; + + void NotifyDataRemoved() override; + + private: + RefPtr<MediaResource> mResource; + RefPtr<ResourceStream> mStream; + AutoTArray<RefPtr<MP4TrackDemuxer>, 1> mAudioDemuxers; + AutoTArray<RefPtr<MP4TrackDemuxer>, 1> mVideoDemuxers; + nsTArray<uint8_t> mCryptoInitData; + bool mIsSeekable; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/mp4/MP4Interval.h b/dom/media/mp4/MP4Interval.h new file mode 100644 index 0000000000..70e6daeadd --- /dev/null +++ b/dom/media/mp4/MP4Interval.h @@ -0,0 +1,137 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef INTERVAL_H_ +#define INTERVAL_H_ + +#include "nsTArray.h" +#include <algorithm> +#include <nsString.h> + +namespace mozilla { + +template <typename T> +struct MP4Interval { + MP4Interval() : start{}, end{} {} + MP4Interval(T aStart, T aEnd) : start(aStart), end(aEnd) { + MOZ_ASSERT(aStart <= aEnd); + } + T Length() { return end - start; } + MP4Interval Intersection(const MP4Interval& aOther) const { + T s = start > aOther.start ? start : aOther.start; + T e = end < aOther.end ? end : aOther.end; + if (s > e) { + return MP4Interval(); + } + return MP4Interval(s, e); + } + bool Contains(const MP4Interval& aOther) const { + return aOther.start >= start && aOther.end <= end; + } + bool operator==(const MP4Interval& aOther) const { + return start == aOther.start && end == aOther.end; + } + bool operator!=(const MP4Interval& aOther) const { + return !(*this == aOther); + } + bool IsNull() const { return end == start; } + MP4Interval Extents(const MP4Interval& aOther) const { + if (IsNull()) { + return aOther; + } + return MP4Interval(std::min(start, aOther.start), + std::max(end, aOther.end)); + } + + T start; + T end; + + nsCString ToString() { + return nsPrintfCString("[%s, %s]", start.ToString().get(), + end.ToString().get()); + } + + static void SemiNormalAppend(nsTArray<MP4Interval<T>>& aIntervals, + MP4Interval<T> aMP4Interval) { + if (!aIntervals.IsEmpty() && + aIntervals.LastElement().end == aMP4Interval.start) { + aIntervals.LastElement().end = aMP4Interval.end; + } else { + aIntervals.AppendElement(aMP4Interval); + } + } + + static void Normalize(const nsTArray<MP4Interval<T>>& aIntervals, + nsTArray<MP4Interval<T>>* aNormalized) { + if (!aNormalized || !aIntervals.Length()) { + MOZ_ASSERT(aNormalized); + return; + } + MOZ_ASSERT(aNormalized->IsEmpty()); + + nsTArray<MP4Interval<T>> sorted = aIntervals.Clone(); + sorted.Sort(Compare()); + + MP4Interval<T> current = sorted[0]; + for (size_t i = 1; i < sorted.Length(); i++) { + MOZ_ASSERT(sorted[i].start <= sorted[i].end); + if (current.Contains(sorted[i])) { + continue; + } + if (current.end >= sorted[i].start) { + current.end = sorted[i].end; + } else { + aNormalized->AppendElement(current); + current = sorted[i]; + } + } + aNormalized->AppendElement(current); + } + + static void Intersection(const nsTArray<MP4Interval<T>>& a0, + const nsTArray<MP4Interval<T>>& a1, + nsTArray<MP4Interval<T>>* aIntersection) { + MOZ_ASSERT(IsNormalized(a0)); + MOZ_ASSERT(IsNormalized(a1)); + size_t i0 = 0; + size_t i1 = 0; + while (i0 < a0.Length() && i1 < a1.Length()) { + MP4Interval i = a0[i0].Intersection(a1[i1]); + if (i.Length()) { + aIntersection->AppendElement(i); + } + if (a0[i0].end < a1[i1].end) { + i0++; + // Assert that the array is sorted + MOZ_ASSERT(i0 == a0.Length() || a0[i0 - 1].start < a0[i0].start); + } else { + i1++; + // Assert that the array is sorted + MOZ_ASSERT(i1 == a1.Length() || a1[i1 - 1].start < a1[i1].start); + } + } + } + + static bool IsNormalized(const nsTArray<MP4Interval<T>>& aIntervals) { + for (size_t i = 1; i < aIntervals.Length(); i++) { + if (aIntervals[i - 1].end >= aIntervals[i].start) { + return false; + } + } + return true; + } + + struct Compare { + bool Equals(const MP4Interval<T>& a0, const MP4Interval<T>& a1) const { + return a0.start == a1.start && a0.end == a1.end; + } + + bool LessThan(const MP4Interval<T>& a0, const MP4Interval<T>& a1) const { + return a0.start < a1.start; + } + }; +}; +} // namespace mozilla + +#endif diff --git a/dom/media/mp4/MP4Metadata.cpp b/dom/media/mp4/MP4Metadata.cpp new file mode 100644 index 0000000000..b956072ca2 --- /dev/null +++ b/dom/media/mp4/MP4Metadata.cpp @@ -0,0 +1,510 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/Assertions.h" +#include "mozilla/CheckedInt.h" +#include "mozilla/EndianUtils.h" +#include "mozilla/Logging.h" +#include "mozilla/RefPtr.h" +#include "mozilla/Telemetry.h" +#include "mozilla/UniquePtr.h" +#include "VideoUtils.h" +#include "MoofParser.h" +#include "MP4Metadata.h" +#include "ByteStream.h" +#include "mp4parse.h" + +#include <limits> +#include <stdint.h> +#include <vector> + +using mozilla::media::TimeUnit; + +namespace mozilla { +LazyLogModule gMP4MetadataLog("MP4Metadata"); + +IndiceWrapper::IndiceWrapper(Mp4parseByteData& aRustIndice) { + mIndice.data = nullptr; + mIndice.length = aRustIndice.length; + mIndice.indices = aRustIndice.indices; +} + +size_t IndiceWrapper::Length() const { return mIndice.length; } + +bool IndiceWrapper::GetIndice(size_t aIndex, + MP4SampleIndex::Indice& aIndice) const { + if (aIndex >= mIndice.length) { + MOZ_LOG(gMP4MetadataLog, LogLevel::Error, ("Index overflow in indice")); + return false; + } + + const Mp4parseIndice* indice = &mIndice.indices[aIndex]; + aIndice.start_offset = indice->start_offset; + aIndice.end_offset = indice->end_offset; + aIndice.start_composition = indice->start_composition; + aIndice.end_composition = indice->end_composition; + aIndice.start_decode = indice->start_decode; + aIndice.sync = indice->sync; + return true; +} + +static const char* TrackTypeToString(mozilla::TrackInfo::TrackType aType) { + switch (aType) { + case mozilla::TrackInfo::kAudioTrack: + return "audio"; + case mozilla::TrackInfo::kVideoTrack: + return "video"; + default: + return "unknown"; + } +} + +bool StreamAdaptor::Read(uint8_t* buffer, uintptr_t size, size_t* bytes_read) { + if (!mOffset.isValid()) { + MOZ_LOG(gMP4MetadataLog, LogLevel::Error, + ("Overflow in source stream offset")); + return false; + } + bool rv = mSource->ReadAt(mOffset.value(), buffer, size, bytes_read); + if (rv) { + mOffset += *bytes_read; + } + return rv; +} + +// Wrapper to allow rust to call our read adaptor. +static intptr_t read_source(uint8_t* buffer, uintptr_t size, void* userdata) { + MOZ_ASSERT(buffer); + MOZ_ASSERT(userdata); + + auto source = reinterpret_cast<StreamAdaptor*>(userdata); + size_t bytes_read = 0; + bool rv = source->Read(buffer, size, &bytes_read); + if (!rv) { + MOZ_LOG(gMP4MetadataLog, LogLevel::Warning, ("Error reading source data")); + return -1; + } + return bytes_read; +} + +MP4Metadata::MP4Metadata(ByteStream* aSource) + : mSource(aSource), mSourceAdaptor(aSource) { + DDLINKCHILD("source", aSource); +} + +MP4Metadata::~MP4Metadata() = default; + +nsresult MP4Metadata::Parse() { + Mp4parseIo io = {read_source, &mSourceAdaptor}; + Mp4parseParser* parser = nullptr; + Mp4parseStatus status = mp4parse_new(&io, &parser); + if (status == MP4PARSE_STATUS_OK && parser) { + mParser.reset(parser); + MOZ_ASSERT(mParser); + } else { + MOZ_ASSERT(!mParser); + MOZ_LOG(gMP4MetadataLog, LogLevel::Debug, + ("Parse failed, return code %d\n", status)); + return status == MP4PARSE_STATUS_OOM ? NS_ERROR_OUT_OF_MEMORY + : NS_ERROR_DOM_MEDIA_METADATA_ERR; + } + + UpdateCrypto(); + + return NS_OK; +} + +void MP4Metadata::UpdateCrypto() { + Mp4parsePsshInfo info = {}; + if (mp4parse_get_pssh_info(mParser.get(), &info) != MP4PARSE_STATUS_OK) { + return; + } + + if (info.data.length == 0) { + return; + } + + mCrypto.Update(info.data.data, info.data.length); +} + +bool TrackTypeEqual(TrackInfo::TrackType aLHS, Mp4parseTrackType aRHS) { + switch (aLHS) { + case TrackInfo::kAudioTrack: + return aRHS == MP4PARSE_TRACK_TYPE_AUDIO; + case TrackInfo::kVideoTrack: + return aRHS == MP4PARSE_TRACK_TYPE_VIDEO; + default: + return false; + } +} + +MP4Metadata::ResultAndTrackCount MP4Metadata::GetNumberTracks( + mozilla::TrackInfo::TrackType aType) const { + uint32_t tracks; + auto rv = mp4parse_get_track_count(mParser.get(), &tracks); + if (rv != MP4PARSE_STATUS_OK) { + MOZ_LOG(gMP4MetadataLog, LogLevel::Warning, + ("rust parser error %d counting tracks", rv)); + return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR, + RESULT_DETAIL("Rust parser error %d", rv)), + MP4Metadata::NumberTracksError()}; + } + + uint32_t total = 0; + for (uint32_t i = 0; i < tracks; ++i) { + Mp4parseTrackInfo track_info; + rv = mp4parse_get_track_info(mParser.get(), i, &track_info); + if (rv != MP4PARSE_STATUS_OK) { + continue; + } + + if (track_info.track_type == MP4PARSE_TRACK_TYPE_AUDIO) { + Mp4parseTrackAudioInfo audio; + auto rv = mp4parse_get_track_audio_info(mParser.get(), i, &audio); + if (rv != MP4PARSE_STATUS_OK) { + MOZ_LOG(gMP4MetadataLog, LogLevel::Warning, + ("mp4parse_get_track_audio_info returned error %d", rv)); + continue; + } + MOZ_DIAGNOSTIC_ASSERT(audio.sample_info_count > 0, + "Must have at least one audio sample info"); + if (audio.sample_info_count == 0) { + return { + MediaResult( + NS_ERROR_DOM_MEDIA_METADATA_ERR, + RESULT_DETAIL( + "Got 0 audio sample info while checking number tracks")), + MP4Metadata::NumberTracksError()}; + } + // We assume the codec of the first sample info is representative of the + // whole track and skip it if we don't recognize the codec. + if (audio.sample_info[0].codec_type == MP4PARSE_CODEC_UNKNOWN) { + continue; + } + } else if (track_info.track_type == MP4PARSE_TRACK_TYPE_VIDEO) { + Mp4parseTrackVideoInfo video; + auto rv = mp4parse_get_track_video_info(mParser.get(), i, &video); + if (rv != MP4PARSE_STATUS_OK) { + MOZ_LOG(gMP4MetadataLog, LogLevel::Warning, + ("mp4parse_get_track_video_info returned error %d", rv)); + continue; + } + MOZ_DIAGNOSTIC_ASSERT(video.sample_info_count > 0, + "Must have at least one video sample info"); + if (video.sample_info_count == 0) { + return { + MediaResult( + NS_ERROR_DOM_MEDIA_METADATA_ERR, + RESULT_DETAIL( + "Got 0 video sample info while checking number tracks")), + MP4Metadata::NumberTracksError()}; + } + // We assume the codec of the first sample info is representative of the + // whole track and skip it if we don't recognize the codec. + if (video.sample_info[0].codec_type == MP4PARSE_CODEC_UNKNOWN) { + continue; + } + } else { + // Only audio and video are supported + continue; + } + if (TrackTypeEqual(aType, track_info.track_type)) { + total += 1; + } + } + + MOZ_LOG(gMP4MetadataLog, LogLevel::Info, + ("%s tracks found: %u", TrackTypeToString(aType), total)); + + return {NS_OK, total}; +} + +Maybe<uint32_t> MP4Metadata::TrackTypeToGlobalTrackIndex( + mozilla::TrackInfo::TrackType aType, size_t aTrackNumber) const { + uint32_t tracks; + auto rv = mp4parse_get_track_count(mParser.get(), &tracks); + if (rv != MP4PARSE_STATUS_OK) { + return Nothing(); + } + + /* The MP4Metadata API uses a per-TrackType index of tracks, but mp4parse + (and libstagefright) use a global track index. Convert the index by + counting the tracks of the requested type and returning the global + track index when a match is found. */ + uint32_t perType = 0; + for (uint32_t i = 0; i < tracks; ++i) { + Mp4parseTrackInfo track_info; + rv = mp4parse_get_track_info(mParser.get(), i, &track_info); + if (rv != MP4PARSE_STATUS_OK) { + continue; + } + if (TrackTypeEqual(aType, track_info.track_type)) { + if (perType == aTrackNumber) { + return Some(i); + } + perType += 1; + } + } + + return Nothing(); +} + +MP4Metadata::ResultAndTrackInfo MP4Metadata::GetTrackInfo( + mozilla::TrackInfo::TrackType aType, size_t aTrackNumber) const { + Maybe<uint32_t> trackIndex = TrackTypeToGlobalTrackIndex(aType, aTrackNumber); + if (trackIndex.isNothing()) { + return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR, + RESULT_DETAIL("No %s tracks", TrackTypeToStr(aType))), + nullptr}; + } + + Mp4parseTrackInfo info; + auto rv = mp4parse_get_track_info(mParser.get(), trackIndex.value(), &info); + if (rv != MP4PARSE_STATUS_OK) { + MOZ_LOG(gMP4MetadataLog, LogLevel::Warning, + ("mp4parse_get_track_info returned %d", rv)); + return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR, + RESULT_DETAIL("Cannot find %s track #%zu", + TrackTypeToStr(aType), aTrackNumber)), + nullptr}; + } +#ifdef DEBUG + bool haveSampleInfo = false; + const char* codecString = "unrecognized"; + Mp4parseCodec codecType = MP4PARSE_CODEC_UNKNOWN; + if (info.track_type == MP4PARSE_TRACK_TYPE_AUDIO) { + Mp4parseTrackAudioInfo audio; + auto rv = mp4parse_get_track_audio_info(mParser.get(), trackIndex.value(), + &audio); + if (rv == MP4PARSE_STATUS_OK && audio.sample_info_count > 0) { + codecType = audio.sample_info[0].codec_type; + haveSampleInfo = true; + } + } else if (info.track_type == MP4PARSE_TRACK_TYPE_VIDEO) { + Mp4parseTrackVideoInfo video; + auto rv = mp4parse_get_track_video_info(mParser.get(), trackIndex.value(), + &video); + if (rv == MP4PARSE_STATUS_OK && video.sample_info_count > 0) { + codecType = video.sample_info[0].codec_type; + haveSampleInfo = true; + } + } + if (haveSampleInfo) { + switch (codecType) { + case MP4PARSE_CODEC_UNKNOWN: + codecString = "unknown"; + break; + case MP4PARSE_CODEC_AAC: + codecString = "aac"; + break; + case MP4PARSE_CODEC_OPUS: + codecString = "opus"; + break; + case MP4PARSE_CODEC_FLAC: + codecString = "flac"; + break; + case MP4PARSE_CODEC_ALAC: + codecString = "alac"; + break; + case MP4PARSE_CODEC_H263: + codecString = "h.263"; + break; + case MP4PARSE_CODEC_AVC: + codecString = "h.264"; + break; + case MP4PARSE_CODEC_VP9: + codecString = "vp9"; + break; + case MP4PARSE_CODEC_AV1: + codecString = "av1"; + break; + case MP4PARSE_CODEC_MP3: + codecString = "mp3"; + break; + case MP4PARSE_CODEC_MP4V: + codecString = "mp4v"; + break; + case MP4PARSE_CODEC_JPEG: + codecString = "jpeg"; + break; + case MP4PARSE_CODEC_AC3: + codecString = "ac-3"; + break; + case MP4PARSE_CODEC_EC3: + codecString = "ec-3"; + break; + case MP4PARSE_CODEC_HEVC: + codecString = "hevc"; + break; + } + } + MOZ_LOG(gMP4MetadataLog, LogLevel::Debug, + ("track codec %s (%u)\n", codecString, codecType)); +#endif + + Mp4parseTrackInfo track_info; + rv = mp4parse_get_track_info(mParser.get(), trackIndex.value(), &track_info); + if (rv != MP4PARSE_STATUS_OK) { + MOZ_LOG(gMP4MetadataLog, LogLevel::Warning, + ("mp4parse_get_track_info returned error %d", rv)); + return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR, + RESULT_DETAIL("Cannot parse %s track #%zu", + TrackTypeToStr(aType), aTrackNumber)), + nullptr}; + } + + uint32_t timeScale = info.time_scale; + + // This specialization interface is wild. + UniquePtr<mozilla::TrackInfo> e; + switch (aType) { + case TrackInfo::TrackType::kAudioTrack: { + Mp4parseTrackAudioInfo audio; + auto rv = mp4parse_get_track_audio_info(mParser.get(), trackIndex.value(), + &audio); + if (rv != MP4PARSE_STATUS_OK) { + MOZ_LOG(gMP4MetadataLog, LogLevel::Warning, + ("mp4parse_get_track_audio_info returned error %d", rv)); + return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR, + RESULT_DETAIL("Cannot parse %s track #%zu", + TrackTypeToStr(aType), aTrackNumber)), + nullptr}; + } + + auto indices = GetTrackIndice(info.track_id); + if (!indices.Ref()) { + // non fatal + MOZ_LOG(gMP4MetadataLog, LogLevel::Warning, + ("Can't get index table for audio track, duration might be " + "slightly incorrect")); + } + auto track = mozilla::MakeUnique<MP4AudioInfo>(); + MediaResult updateStatus = + track->Update(&info, &audio, indices.Ref().get()); + if (NS_FAILED(updateStatus)) { + MOZ_LOG(gMP4MetadataLog, LogLevel::Warning, + ("Updating audio track failed with %s", + updateStatus.Message().get())); + return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR, + RESULT_DETAIL( + "Failed to update %s track #%zu with error: %s", + TrackTypeToStr(aType), aTrackNumber, + updateStatus.Message().get())), + nullptr}; + } + e = std::move(track); + } break; + case TrackInfo::TrackType::kVideoTrack: { + Mp4parseTrackVideoInfo video; + auto rv = mp4parse_get_track_video_info(mParser.get(), trackIndex.value(), + &video); + if (rv != MP4PARSE_STATUS_OK) { + MOZ_LOG(gMP4MetadataLog, LogLevel::Warning, + ("mp4parse_get_track_video_info returned error %d", rv)); + return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR, + RESULT_DETAIL("Cannot parse %s track #%zu", + TrackTypeToStr(aType), aTrackNumber)), + nullptr}; + } + auto track = mozilla::MakeUnique<MP4VideoInfo>(); + MediaResult updateStatus = track->Update(&info, &video); + if (NS_FAILED(updateStatus)) { + MOZ_LOG(gMP4MetadataLog, LogLevel::Warning, + ("Updating video track failed with %s", + updateStatus.Message().get())); + return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR, + RESULT_DETAIL( + "Failed to update %s track #%zu with error: %s", + TrackTypeToStr(aType), aTrackNumber, + updateStatus.Message().get())), + nullptr}; + } + e = std::move(track); + } break; + default: + MOZ_LOG(gMP4MetadataLog, LogLevel::Warning, + ("unhandled track type %d", aType)); + return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR, + RESULT_DETAIL("Cannot handle %s track #%zu", + TrackTypeToStr(aType), aTrackNumber)), + nullptr}; + } + + e->mTimeScale = timeScale; + + // No duration in track, use fragment_duration. + if (e && !e->mDuration.IsPositive()) { + Mp4parseFragmentInfo fragmentInfo; + auto rv = mp4parse_get_fragment_info(mParser.get(), &fragmentInfo); + if (rv == MP4PARSE_STATUS_OK) { + // This doesn't use the time scale of the track, but the time scale + // indicated in the mvhd box + e->mDuration = TimeUnit(fragmentInfo.fragment_duration, + AssertedCast<int64_t>(fragmentInfo.time_scale)); + } + } + + if (e && e->IsValid()) { + return {NS_OK, std::move(e)}; + } + MOZ_LOG(gMP4MetadataLog, LogLevel::Debug, ("TrackInfo didn't validate")); + + return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR, + RESULT_DETAIL("Invalid %s track #%zu", + TrackTypeToStr(aType), aTrackNumber)), + nullptr}; +} + +bool MP4Metadata::CanSeek() const { return true; } + +MP4Metadata::ResultAndCryptoFile MP4Metadata::Crypto() const { + return {NS_OK, &mCrypto}; +} + +MP4Metadata::ResultAndIndice MP4Metadata::GetTrackIndice( + uint32_t aTrackId) const { + Mp4parseByteData indiceRawData = {}; + + uint8_t fragmented = false; + auto rv = mp4parse_is_fragmented(mParser.get(), aTrackId, &fragmented); + if (rv != MP4PARSE_STATUS_OK) { + return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR, + RESULT_DETAIL("Cannot parse whether track id %u is " + "fragmented, mp4parse_error=%d", + aTrackId, int(rv))), + nullptr}; + } + + if (!fragmented) { + rv = mp4parse_get_indice_table(mParser.get(), aTrackId, &indiceRawData); + if (rv != MP4PARSE_STATUS_OK) { + return { + MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR, + RESULT_DETAIL("Cannot parse index table in track id %u, " + "mp4parse_error=%d", + aTrackId, int(rv))), + nullptr}; + } + } + + UniquePtr<IndiceWrapper> indice; + indice = mozilla::MakeUnique<IndiceWrapper>(indiceRawData); + + return {NS_OK, std::move(indice)}; +} + +/*static*/ MP4Metadata::ResultAndByteBuffer MP4Metadata::Metadata( + ByteStream* aSource) { + auto parser = mozilla::MakeUnique<MoofParser>( + aSource, AsVariant(ParseAllTracks{}), false); + RefPtr<mozilla::MediaByteBuffer> buffer = parser->Metadata(); + if (!buffer) { + return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR, + RESULT_DETAIL("Cannot parse metadata")), + nullptr}; + } + return {NS_OK, std::move(buffer)}; +} + +} // namespace mozilla diff --git a/dom/media/mp4/MP4Metadata.h b/dom/media/mp4/MP4Metadata.h new file mode 100644 index 0000000000..e900fbedc3 --- /dev/null +++ b/dom/media/mp4/MP4Metadata.h @@ -0,0 +1,116 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MP4METADATA_H_ +#define MP4METADATA_H_ + +#include <type_traits> + +#include "mozilla/UniquePtr.h" +#include "DecoderData.h" +#include "MediaData.h" +#include "MediaInfo.h" +#include "MediaResult.h" +#include "ByteStream.h" +#include "mp4parse.h" +#include "SampleIterator.h" + +namespace mozilla { + +DDLoggedTypeDeclName(MP4Metadata); + +// The memory owner in mIndice.indices is rust mp4 parser, so lifetime of this +// class SHOULD NOT longer than rust parser. +class IndiceWrapper { + public: + size_t Length() const; + + bool GetIndice(size_t aIndex, MP4SampleIndex::Indice& aIndice) const; + + explicit IndiceWrapper(Mp4parseByteData& aRustIndice); + + protected: + Mp4parseByteData mIndice; +}; + +struct FreeMP4Parser { + void operator()(Mp4parseParser* aPtr) { mp4parse_free(aPtr); } +}; + +// Wrap an Stream to remember the read offset. +class StreamAdaptor { + public: + explicit StreamAdaptor(ByteStream* aSource) : mSource(aSource), mOffset(0) {} + + ~StreamAdaptor() = default; + + bool Read(uint8_t* buffer, uintptr_t size, size_t* bytes_read); + + private: + ByteStream* mSource; + CheckedInt<size_t> mOffset; +}; + +class MP4Metadata : public DecoderDoctorLifeLogger<MP4Metadata> { + public: + explicit MP4Metadata(ByteStream* aSource); + ~MP4Metadata(); + + // Simple template class containing a MediaResult and another type. + template <typename T> + class ResultAndType { + public: + template <typename M2, typename T2> + ResultAndType(M2&& aM, T2&& aT) + : mResult(std::forward<M2>(aM)), mT(std::forward<T2>(aT)) {} + ResultAndType(const ResultAndType&) = default; + ResultAndType& operator=(const ResultAndType&) = default; + ResultAndType(ResultAndType&&) = default; + ResultAndType& operator=(ResultAndType&&) = default; + + mozilla::MediaResult& Result() { return mResult; } + T& Ref() { return mT; } + + private: + mozilla::MediaResult mResult; + std::decay_t<T> mT; + }; + + using ResultAndByteBuffer = ResultAndType<RefPtr<mozilla::MediaByteBuffer>>; + static ResultAndByteBuffer Metadata(ByteStream* aSource); + + static constexpr uint32_t NumberTracksError() { return UINT32_MAX; } + using ResultAndTrackCount = ResultAndType<uint32_t>; + ResultAndTrackCount GetNumberTracks( + mozilla::TrackInfo::TrackType aType) const; + + using ResultAndTrackInfo = + ResultAndType<mozilla::UniquePtr<mozilla::TrackInfo>>; + ResultAndTrackInfo GetTrackInfo(mozilla::TrackInfo::TrackType aType, + size_t aTrackNumber) const; + + bool CanSeek() const; + + using ResultAndCryptoFile = ResultAndType<const CryptoFile*>; + ResultAndCryptoFile Crypto() const; + + using ResultAndIndice = ResultAndType<mozilla::UniquePtr<IndiceWrapper>>; + ResultAndIndice GetTrackIndice(uint32_t aTrackId) const; + + nsresult Parse(); + + private: + void UpdateCrypto(); + Maybe<uint32_t> TrackTypeToGlobalTrackIndex( + mozilla::TrackInfo::TrackType aType, size_t aTrackNumber) const; + + CryptoFile mCrypto; + RefPtr<ByteStream> mSource; + StreamAdaptor mSourceAdaptor; + mozilla::UniquePtr<Mp4parseParser, FreeMP4Parser> mParser; +}; + +} // namespace mozilla + +#endif // MP4METADATA_H_ diff --git a/dom/media/mp4/MoofParser.cpp b/dom/media/mp4/MoofParser.cpp new file mode 100644 index 0000000000..c266c56148 --- /dev/null +++ b/dom/media/mp4/MoofParser.cpp @@ -0,0 +1,1288 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "MoofParser.h" +#include "Box.h" +#include "SinfParser.h" +#include <limits> +#include "MP4Interval.h" + +#include "mozilla/CheckedInt.h" +#include "mozilla/HelperMacros.h" +#include "mozilla/Logging.h" +#include "mozilla/Try.h" + +extern mozilla::LogModule* GetDemuxerLog(); + +#define LOG_ERROR(name, arg, ...) \ + MOZ_LOG( \ + GetDemuxerLog(), mozilla::LogLevel::Error, \ + (MOZ_STRINGIFY(name) "(%p)::%s: " arg, this, __func__, ##__VA_ARGS__)) +#define LOG_WARN(name, arg, ...) \ + MOZ_LOG( \ + GetDemuxerLog(), mozilla::LogLevel::Warning, \ + (MOZ_STRINGIFY(name) "(%p)::%s: " arg, this, __func__, ##__VA_ARGS__)) +#define LOG_DEBUG(name, arg, ...) \ + MOZ_LOG( \ + GetDemuxerLog(), mozilla::LogLevel::Debug, \ + (MOZ_STRINGIFY(name) "(%p)::%s: " arg, this, __func__, ##__VA_ARGS__)) + +namespace mozilla { + +using TimeUnit = media::TimeUnit; + +const uint32_t kKeyIdSize = 16; + +bool MoofParser::RebuildFragmentedIndex(const MediaByteRangeSet& aByteRanges) { + BoxContext context(mSource, aByteRanges); + return RebuildFragmentedIndex(context); +} + +bool MoofParser::RebuildFragmentedIndex(const MediaByteRangeSet& aByteRanges, + bool* aCanEvict) { + MOZ_ASSERT(aCanEvict); + if (*aCanEvict && mMoofs.Length() > 1) { + MOZ_ASSERT(mMoofs.Length() == mMediaRanges.Length()); + mMoofs.RemoveElementsAt(0, mMoofs.Length() - 1); + mMediaRanges.RemoveElementsAt(0, mMediaRanges.Length() - 1); + *aCanEvict = true; + } else { + *aCanEvict = false; + } + return RebuildFragmentedIndex(aByteRanges); +} + +bool MoofParser::RebuildFragmentedIndex(BoxContext& aContext) { + LOG_DEBUG( + Moof, + "Starting, mTrackParseMode=%s, track#=%" PRIu32 + " (ignore if multitrack).", + mTrackParseMode.is<ParseAllTracks>() ? "multitrack" : "single track", + mTrackParseMode.is<ParseAllTracks>() ? 0 + : mTrackParseMode.as<uint32_t>()); + bool foundValidMoof = false; + + for (Box box(&aContext, mOffset); box.IsAvailable(); box = box.Next()) { + if (box.IsType("moov") && mInitRange.IsEmpty()) { + mInitRange = MediaByteRange(0, box.Range().mEnd); + ParseMoov(box); + } else if (box.IsType("moof")) { + Moof moof(box, mTrackParseMode, mTrex, mMvhd, mMdhd, mEdts, mSinf, + &mLastDecodeTime, mIsAudio, mTracksEndCts); + + if (!moof.IsValid()) { + if (!box.Next().IsAvailable()) { + // Abort search for now, without advancing mOffset so that parsing + // can be attempted again when more of the resource is available. + LOG_WARN(Moof, "Invalid moof. moof may not be complete yet."); + break; + } + // moof is complete but invalid. Skip to next box. + continue; + } + + if (!mMoofs.IsEmpty()) { + // Stitch time ranges together in the case of a (hopefully small) time + // range gap between moofs. + mMoofs.LastElement().FixRounding(moof); + } + + mMediaRanges.AppendElement(moof.mRange); + mMoofs.AppendElement(std::move(moof)); + foundValidMoof = true; + } else if (box.IsType("mdat") && !Moofs().IsEmpty()) { + // Check if we have all our data from last moof. + Moof& moof = Moofs().LastElement(); + media::Interval<int64_t> datarange(moof.mMdatRange.mStart, + moof.mMdatRange.mEnd, 0); + media::Interval<int64_t> mdat(box.Range().mStart, box.Range().mEnd, 0); + if (datarange.Intersects(mdat)) { + mMediaRanges.LastElement() = + mMediaRanges.LastElement().Span(box.Range()); + } + } + mOffset = box.NextOffset(); + } + MOZ_ASSERT(mTrackParseMode.is<ParseAllTracks>() || + mTrex.mTrackId == mTrackParseMode.as<uint32_t>(), + "If not parsing all tracks, mTrex should have the same track id " + "as the track being parsed."); + LOG_DEBUG(Moof, "Done, foundValidMoof=%s.", + foundValidMoof ? "true" : "false"); + return foundValidMoof; +} + +MediaByteRange MoofParser::FirstCompleteMediaHeader() { + if (Moofs().IsEmpty()) { + return MediaByteRange(); + } + return Moofs()[0].mRange; +} + +MediaByteRange MoofParser::FirstCompleteMediaSegment() { + for (uint32_t i = 0; i < mMediaRanges.Length(); i++) { + if (mMediaRanges[i].Contains(Moofs()[i].mMdatRange)) { + return mMediaRanges[i]; + } + } + return MediaByteRange(); +} + +DDLoggedTypeDeclNameAndBase(BlockingStream, ByteStream); + +class BlockingStream : public ByteStream, + public DecoderDoctorLifeLogger<BlockingStream> { + public: + explicit BlockingStream(ByteStream* aStream) : mStream(aStream) { + DDLINKCHILD("stream", aStream); + } + + bool ReadAt(int64_t offset, void* data, size_t size, + size_t* bytes_read) override { + return mStream->ReadAt(offset, data, size, bytes_read); + } + + bool CachedReadAt(int64_t offset, void* data, size_t size, + size_t* bytes_read) override { + return mStream->ReadAt(offset, data, size, bytes_read); + } + + virtual bool Length(int64_t* size) override { return mStream->Length(size); } + + private: + RefPtr<ByteStream> mStream; +}; + +bool MoofParser::BlockingReadNextMoof() { + LOG_DEBUG(Moof, "Starting."); + int64_t length = std::numeric_limits<int64_t>::max(); + mSource->Length(&length); + RefPtr<BlockingStream> stream = new BlockingStream(mSource); + MediaByteRangeSet byteRanges(MediaByteRange(0, length)); + + BoxContext context(stream, byteRanges); + for (Box box(&context, mOffset); box.IsAvailable(); box = box.Next()) { + if (box.IsType("moof")) { + MediaByteRangeSet parseByteRanges( + MediaByteRange(mOffset, box.Range().mEnd)); + BoxContext parseContext(stream, parseByteRanges); + if (RebuildFragmentedIndex(parseContext)) { + LOG_DEBUG(Moof, "Succeeded on RebuildFragmentedIndex, returning true."); + return true; + } + } + } + LOG_DEBUG(Moof, "Couldn't read next moof, returning false."); + return false; +} + +void MoofParser::ScanForMetadata(mozilla::MediaByteRange& aMoov) { + LOG_DEBUG(Moof, "Starting."); + int64_t length = std::numeric_limits<int64_t>::max(); + mSource->Length(&length); + MediaByteRangeSet byteRanges; + byteRanges += MediaByteRange(0, length); + RefPtr<BlockingStream> stream = new BlockingStream(mSource); + + BoxContext context(stream, byteRanges); + for (Box box(&context, mOffset); box.IsAvailable(); box = box.Next()) { + if (box.IsType("moov")) { + aMoov = box.Range(); + break; + } + } + mInitRange = aMoov; + LOG_DEBUG(Moof, + "Done, mInitRange.mStart=%" PRIi64 ", mInitRange.mEnd=%" PRIi64, + mInitRange.mStart, mInitRange.mEnd); +} + +already_AddRefed<mozilla::MediaByteBuffer> MoofParser::Metadata() { + LOG_DEBUG(Moof, "Starting."); + MediaByteRange moov; + ScanForMetadata(moov); + CheckedInt<MediaByteBuffer::size_type> moovLength = moov.Length(); + if (!moovLength.isValid() || !moovLength.value()) { + // No moov, or cannot be used as array size. + LOG_WARN(Moof, + "Did not get usable moov length while trying to parse Metadata."); + return nullptr; + } + + RefPtr<MediaByteBuffer> metadata = new MediaByteBuffer(); + if (!metadata->SetLength(moovLength.value(), fallible)) { + LOG_ERROR(Moof, "OOM"); + return nullptr; + } + + RefPtr<BlockingStream> stream = new BlockingStream(mSource); + size_t read; + bool rv = stream->ReadAt(moov.mStart, metadata->Elements(), + moovLength.value(), &read); + if (!rv || read != moovLength.value()) { + LOG_WARN(Moof, "Failed to read moov while trying to parse Metadata."); + return nullptr; + } + LOG_DEBUG(Moof, "Done, found metadata."); + return metadata.forget(); +} + +MP4Interval<TimeUnit> MoofParser::GetCompositionRange( + const MediaByteRangeSet& aByteRanges) { + LOG_DEBUG(Moof, "Starting."); + MP4Interval<TimeUnit> compositionRange; + BoxContext context(mSource, aByteRanges); + for (size_t i = 0; i < mMoofs.Length(); i++) { + Moof& moof = mMoofs[i]; + Box box(&context, moof.mRange.mStart); + if (box.IsAvailable()) { + compositionRange = compositionRange.Extents(moof.mTimeRange); + } + } + LOG_DEBUG(Moof, + "Done, compositionRange.start=%" PRIi64 + ", compositionRange.end=%" PRIi64 ".", + compositionRange.start.ToMicroseconds(), + compositionRange.end.ToMicroseconds()); + return compositionRange; +} + +bool MoofParser::ReachedEnd() { + int64_t length; + return mSource->Length(&length) && mOffset == length; +} + +void MoofParser::ParseMoov(Box& aBox) { + LOG_DEBUG(Moof, "Starting."); + for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { + if (box.IsType("mvhd")) { + mMvhd = Mvhd(box); + } else if (box.IsType("trak")) { + ParseTrak(box); + } else if (box.IsType("mvex")) { + ParseMvex(box); + } + } + LOG_DEBUG(Moof, "Done."); +} + +void MoofParser::ParseTrak(Box& aBox) { + LOG_DEBUG(Trak, "Starting."); + Tkhd tkhd; + for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { + if (box.IsType("tkhd")) { + tkhd = Tkhd(box); + } else if (box.IsType("mdia")) { + if (mTrackParseMode.is<ParseAllTracks>() || + tkhd.mTrackId == mTrackParseMode.as<uint32_t>()) { + ParseMdia(box); + } + } else if (box.IsType("edts") && + (mTrackParseMode.is<ParseAllTracks>() || + tkhd.mTrackId == mTrackParseMode.as<uint32_t>())) { + mEdts = Edts(box); + } + } + LOG_DEBUG(Trak, "Done."); +} + +void MoofParser::ParseMdia(Box& aBox) { + LOG_DEBUG(Mdia, "Starting."); + for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { + if (box.IsType("mdhd")) { + mMdhd = Mdhd(box); + } else if (box.IsType("minf")) { + ParseMinf(box); + } + } + LOG_DEBUG(Mdia, "Done."); +} + +void MoofParser::ParseMvex(Box& aBox) { + LOG_DEBUG(Mvex, "Starting."); + for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { + if (box.IsType("trex")) { + Trex trex = Trex(box); + if (mTrackParseMode.is<ParseAllTracks>() || + trex.mTrackId == mTrackParseMode.as<uint32_t>()) { + mTrex = trex; + } + } + } + LOG_DEBUG(Mvex, "Done."); +} + +void MoofParser::ParseMinf(Box& aBox) { + LOG_DEBUG(Minf, "Starting."); + for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { + if (box.IsType("stbl")) { + ParseStbl(box); + } + } + LOG_DEBUG(Minf, "Done."); +} + +void MoofParser::ParseStbl(Box& aBox) { + LOG_DEBUG(Stbl, "Starting."); + for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { + if (box.IsType("stsd")) { + ParseStsd(box); + } else if (box.IsType("sgpd")) { + Sgpd sgpd(box); + if (sgpd.IsValid() && sgpd.mGroupingType == "seig") { + mTrackSampleEncryptionInfoEntries.Clear(); + if (!mTrackSampleEncryptionInfoEntries.AppendElements( + sgpd.mEntries, mozilla::fallible)) { + LOG_ERROR(Stbl, "OOM"); + return; + } + } + } else if (box.IsType("sbgp")) { + Sbgp sbgp(box); + if (sbgp.IsValid() && sbgp.mGroupingType == "seig") { + mTrackSampleToGroupEntries.Clear(); + if (!mTrackSampleToGroupEntries.AppendElements(sbgp.mEntries, + mozilla::fallible)) { + LOG_ERROR(Stbl, "OOM"); + return; + } + } + } + } + LOG_DEBUG(Stbl, "Done."); +} + +void MoofParser::ParseStsd(Box& aBox) { + LOG_DEBUG(Stsd, "Starting."); + if (mTrackParseMode.is<ParseAllTracks>()) { + // It is not a sane operation to try and map sample description boxes from + // multiple tracks onto the parser, which is modeled around storing metadata + // for a single track. + LOG_DEBUG(Stsd, "Early return due to multitrack parser."); + return; + } + MOZ_ASSERT( + mSampleDescriptions.IsEmpty(), + "Shouldn't have any sample descriptions yet when starting to parse stsd"); + uint32_t numberEncryptedEntries = 0; + for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { + SampleDescriptionEntry sampleDescriptionEntry{false}; + if (box.IsType("encv") || box.IsType("enca")) { + ParseEncrypted(box); + sampleDescriptionEntry.mIsEncryptedEntry = true; + numberEncryptedEntries++; + } + if (!mSampleDescriptions.AppendElement(sampleDescriptionEntry, + mozilla::fallible)) { + LOG_ERROR(Stsd, "OOM"); + return; + } + } + if (mSampleDescriptions.IsEmpty()) { + LOG_WARN(Stsd, + "No sample description entries found while parsing Stsd! This " + "shouldn't happen, as the spec requires one for each track!"); + } + if (numberEncryptedEntries > 1) { + LOG_WARN(Stsd, + "More than one encrypted sample description entry found while " + "parsing track! We don't expect this, and it will likely break " + "during fragment look up!"); + } + LOG_DEBUG(Stsd, + "Done, numberEncryptedEntries=%" PRIu32 + ", mSampleDescriptions.Length=%zu", + numberEncryptedEntries, mSampleDescriptions.Length()); +} + +void MoofParser::ParseEncrypted(Box& aBox) { + LOG_DEBUG(Moof, "Starting."); + for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { + // Some MP4 files have been found to have multiple sinf boxes in the same + // enc* box. This does not match spec anyway, so just choose the first + // one that parses properly. + if (box.IsType("sinf")) { + mSinf = Sinf(box); + + if (mSinf.IsValid()) { + break; + } + } + } + LOG_DEBUG(Moof, "Done."); +} + +class CtsComparator { + public: + bool Equals(Sample* const aA, Sample* const aB) const { + return aA->mCompositionRange.start == aB->mCompositionRange.start; + } + bool LessThan(Sample* const aA, Sample* const aB) const { + return aA->mCompositionRange.start < aB->mCompositionRange.start; + } +}; + +Moof::Moof(Box& aBox, const TrackParseMode& aTrackParseMode, Trex& aTrex, + Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, + uint64_t* aDecodeTime, bool aIsAudio, + nsTArray<TrackEndCts>& aTracksEndCts) + : mRange(aBox.Range()), + mTfhd(aTrex), + // Do not reporting discontuities less than 35ms + mMaxRoundingError(TimeUnit::FromSeconds(0.035)) { + LOG_DEBUG( + Moof, + "Starting, aTrackParseMode=%s, track#=%" PRIu32 + " (ignore if multitrack).", + aTrackParseMode.is<ParseAllTracks>() ? "multitrack" : "single track", + aTrackParseMode.is<ParseAllTracks>() ? 0 + : aTrackParseMode.as<uint32_t>()); + MOZ_ASSERT(aTrackParseMode.is<ParseAllTracks>() || + aTrex.mTrackId == aTrackParseMode.as<uint32_t>(), + "If not parsing all tracks, aTrex should have the same track id " + "as the track being parsed."); + nsTArray<Box> psshBoxes; + for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { + if (box.IsType("traf")) { + ParseTraf(box, aTrackParseMode, aTrex, aMvhd, aMdhd, aEdts, aSinf, + aDecodeTime, aIsAudio); + } + if (box.IsType("pssh")) { + psshBoxes.AppendElement(box); + } + } + + // The EME spec requires that PSSH boxes which are contiguous in the + // file are dispatched to the media element in a single "encrypted" event. + // So append contiguous boxes here. + for (size_t i = 0; i < psshBoxes.Length(); ++i) { + Box box = psshBoxes[i]; + if (i == 0 || box.Offset() != psshBoxes[i - 1].NextOffset()) { + mPsshes.AppendElement(); + } + nsTArray<uint8_t>& pssh = mPsshes.LastElement(); + pssh.AppendElements(std::move(box.ReadCompleteBox())); + } + + if (IsValid()) { + if (mIndex.Length()) { + // Ensure the samples are contiguous with no gaps. + nsTArray<Sample*> ctsOrder; + for (auto& sample : mIndex) { + ctsOrder.AppendElement(&sample); + } + ctsOrder.Sort(CtsComparator()); + + for (size_t i = 1; i < ctsOrder.Length(); i++) { + ctsOrder[i - 1]->mCompositionRange.end = + ctsOrder[i]->mCompositionRange.start; + } + + // Ensure that there are no gaps between the first sample in this + // Moof and the preceeding Moof. + if (!ctsOrder.IsEmpty()) { + bool found = false; + // Track ID of the track we're parsing. + const uint32_t trackId = aTrex.mTrackId; + // Find the previous CTS end time of Moof preceeding the Moofs we just + // parsed, for the track we're parsing. + for (auto& prevCts : aTracksEndCts) { + if (prevCts.mTrackId == trackId) { + // We ensure there are no gaps in samples' CTS between the last + // sample in a Moof, and the first sample in the next Moof, if + // they're within these many Microseconds of each other. + const TimeUnit CROSS_MOOF_CTS_MERGE_THRESHOLD = + TimeUnit::FromMicroseconds(1); + // We have previously parsed a Moof for this track. Smooth the gap + // between samples for this track across the Moof bounary. + if (ctsOrder[0]->mCompositionRange.start > prevCts.mCtsEndTime && + ctsOrder[0]->mCompositionRange.start - prevCts.mCtsEndTime <= + CROSS_MOOF_CTS_MERGE_THRESHOLD) { + ctsOrder[0]->mCompositionRange.start = prevCts.mCtsEndTime; + } + prevCts.mCtsEndTime = ctsOrder.LastElement()->mCompositionRange.end; + found = true; + break; + } + } + if (!found) { + // We've not parsed a Moof for this track yet. Save its CTS end + // time for the next Moof we parse. + aTracksEndCts.AppendElement(TrackEndCts( + trackId, ctsOrder.LastElement()->mCompositionRange.end)); + } + } + + // In MP4, the duration of a sample is defined as the delta between two + // decode timestamps. The operation above has updated the duration of each + // sample as a Sample's duration is mCompositionRange.end - + // mCompositionRange.start MSE's TrackBuffersManager expects dts that + // increased by the sample's duration, so we rewrite the dts accordingly. + TimeUnit presentationDuration = + ctsOrder.LastElement()->mCompositionRange.end - + ctsOrder[0]->mCompositionRange.start; + auto decodeOffset = + aMdhd.ToTimeUnit((int64_t)*aDecodeTime - aEdts.mMediaStart); + auto offsetOffset = aMvhd.ToTimeUnit(aEdts.mEmptyOffset); + TimeUnit endDecodeTime = + (decodeOffset.isOk() && offsetOffset.isOk()) + ? decodeOffset.unwrap() + offsetOffset.unwrap() + : TimeUnit::Zero(aMvhd.mTimescale); + TimeUnit decodeDuration = endDecodeTime - mIndex[0].mDecodeTime; + double adjust = 0.; + if (!presentationDuration.IsZero()) { + double num = decodeDuration.ToSeconds(); + double denom = presentationDuration.ToSeconds(); + if (denom != 0.) { + adjust = num / denom; + } + } + + TimeUnit dtsOffset = mIndex[0].mDecodeTime; + TimeUnit compositionDuration(0, aMvhd.mTimescale); + // Adjust the dts, ensuring that the new adjusted dts will never be + // greater than decodeTime (the next moof's decode start time). + for (auto& sample : mIndex) { + sample.mDecodeTime = dtsOffset + compositionDuration.MultDouble(adjust); + compositionDuration += sample.mCompositionRange.Length(); + } + mTimeRange = + MP4Interval<TimeUnit>(ctsOrder[0]->mCompositionRange.start, + ctsOrder.LastElement()->mCompositionRange.end); + } + ProcessCencAuxInfo(aSinf.mDefaultEncryptionType); + } + LOG_DEBUG(Moof, "Done."); +} + +bool Moof::GetAuxInfo(AtomType aType, + FallibleTArray<MediaByteRange>* aByteRanges) { + LOG_DEBUG(Moof, "Starting."); + aByteRanges->Clear(); + + Saiz* saiz = nullptr; + for (int i = 0;; i++) { + if (i == mSaizs.Length()) { + LOG_DEBUG(Moof, "Could not find saiz matching aType. Returning false."); + return false; + } + if (mSaizs[i].mAuxInfoType == aType) { + saiz = &mSaizs[i]; + break; + } + } + Saio* saio = nullptr; + for (int i = 0;; i++) { + if (i == mSaios.Length()) { + LOG_DEBUG(Moof, "Could not find saio matching aType. Returning false."); + return false; + } + if (mSaios[i].mAuxInfoType == aType) { + saio = &mSaios[i]; + break; + } + } + + if (saio->mOffsets.Length() == 1) { + if (!aByteRanges->SetCapacity(saiz->mSampleInfoSize.Length(), + mozilla::fallible)) { + LOG_ERROR(Moof, "OOM"); + return false; + } + uint64_t offset = mRange.mStart + saio->mOffsets[0]; + for (size_t i = 0; i < saiz->mSampleInfoSize.Length(); i++) { + if (!aByteRanges->AppendElement( + MediaByteRange(offset, offset + saiz->mSampleInfoSize[i]), + mozilla::fallible)) { + LOG_ERROR(Moof, "OOM"); + return false; + } + offset += saiz->mSampleInfoSize[i]; + } + LOG_DEBUG( + Moof, + "Saio has 1 entry. aByteRanges populated accordingly. Returning true."); + return true; + } + + if (saio->mOffsets.Length() == saiz->mSampleInfoSize.Length()) { + if (!aByteRanges->SetCapacity(saiz->mSampleInfoSize.Length(), + mozilla::fallible)) { + LOG_ERROR(Moof, "OOM"); + return false; + } + for (size_t i = 0; i < saio->mOffsets.Length(); i++) { + uint64_t offset = mRange.mStart + saio->mOffsets[i]; + if (!aByteRanges->AppendElement( + MediaByteRange(offset, offset + saiz->mSampleInfoSize[i]), + mozilla::fallible)) { + LOG_ERROR(Moof, "OOM"); + return false; + } + } + LOG_DEBUG( + Moof, + "Saio and saiz have same number of entries. aByteRanges populated " + "accordingly. Returning true."); + return true; + } + + LOG_DEBUG(Moof, + "Moof::GetAuxInfo could not find any Aux info, returning false."); + return false; +} + +bool Moof::ProcessCencAuxInfo(AtomType aScheme) { + LOG_DEBUG(Moof, "Starting."); + FallibleTArray<MediaByteRange> cencRanges; + if (!GetAuxInfo(aScheme, &cencRanges) || + cencRanges.Length() != mIndex.Length()) { + LOG_DEBUG(Moof, "Couldn't find cenc aux info."); + return false; + } + for (int i = 0; i < cencRanges.Length(); i++) { + mIndex[i].mCencRange = cencRanges[i]; + } + LOG_DEBUG(Moof, "Found cenc aux info and stored on index."); + return true; +} + +void Moof::ParseTraf(Box& aBox, const TrackParseMode& aTrackParseMode, + Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, + Sinf& aSinf, uint64_t* aDecodeTime, bool aIsAudio) { + LOG_DEBUG( + Traf, + "Starting, aTrackParseMode=%s, track#=%" PRIu32 + " (ignore if multitrack).", + aTrackParseMode.is<ParseAllTracks>() ? "multitrack" : "single track", + aTrackParseMode.is<ParseAllTracks>() ? 0 + : aTrackParseMode.as<uint32_t>()); + MOZ_ASSERT(aDecodeTime); + MOZ_ASSERT(aTrackParseMode.is<ParseAllTracks>() || + aTrex.mTrackId == aTrackParseMode.as<uint32_t>(), + "If not parsing all tracks, aTrex should have the same track id " + "as the track being parsed."); + Tfdt tfdt; + + for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { + if (box.IsType("tfhd")) { + mTfhd = Tfhd(box, aTrex); + } else if (aTrackParseMode.is<ParseAllTracks>() || + mTfhd.mTrackId == aTrackParseMode.as<uint32_t>()) { + if (box.IsType("tfdt")) { + tfdt = Tfdt(box); + } else if (box.IsType("sgpd")) { + Sgpd sgpd(box); + if (sgpd.IsValid() && sgpd.mGroupingType == "seig") { + mFragmentSampleEncryptionInfoEntries.Clear(); + if (!mFragmentSampleEncryptionInfoEntries.AppendElements( + sgpd.mEntries, mozilla::fallible)) { + LOG_ERROR(Moof, "OOM"); + return; + } + } + } else if (box.IsType("sbgp")) { + Sbgp sbgp(box); + if (sbgp.IsValid() && sbgp.mGroupingType == "seig") { + mFragmentSampleToGroupEntries.Clear(); + if (!mFragmentSampleToGroupEntries.AppendElements( + sbgp.mEntries, mozilla::fallible)) { + LOG_ERROR(Moof, "OOM"); + return; + } + } + } else if (box.IsType("saiz")) { + if (!mSaizs.AppendElement(Saiz(box, aSinf.mDefaultEncryptionType), + mozilla::fallible)) { + LOG_ERROR(Moof, "OOM"); + return; + } + } else if (box.IsType("saio")) { + if (!mSaios.AppendElement(Saio(box, aSinf.mDefaultEncryptionType), + mozilla::fallible)) { + LOG_ERROR(Moof, "OOM"); + return; + } + } + } + } + if (aTrackParseMode.is<uint32_t>() && + mTfhd.mTrackId != aTrackParseMode.as<uint32_t>()) { + LOG_DEBUG(Traf, + "Early return as not multitrack parser and track id didn't match " + "mTfhd.mTrackId=%" PRIu32, + mTfhd.mTrackId); + return; + } + // Now search for TRUN boxes. + uint64_t decodeTime = + tfdt.IsValid() ? tfdt.mBaseMediaDecodeTime : *aDecodeTime; + for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { + if (box.IsType("trun")) { + if (ParseTrun(box, aMvhd, aMdhd, aEdts, &decodeTime, aIsAudio).isOk()) { + mValid = true; + } else { + LOG_WARN(Moof, "ParseTrun failed"); + mValid = false; + return; + } + } + } + *aDecodeTime = decodeTime; + LOG_DEBUG(Traf, "Done, setting aDecodeTime=%." PRIu64 ".", decodeTime); +} + +void Moof::FixRounding(const Moof& aMoof) { + TimeUnit gap = aMoof.mTimeRange.start - mTimeRange.end; + if (gap.IsPositive() && gap <= mMaxRoundingError) { + mTimeRange.end = aMoof.mTimeRange.start; + } +} + +Result<Ok, nsresult> Moof::ParseTrun(Box& aBox, Mvhd& aMvhd, Mdhd& aMdhd, + Edts& aEdts, uint64_t* aDecodeTime, + bool aIsAudio) { + LOG_DEBUG(Trun, "Starting."); + if (!mTfhd.IsValid() || !aMvhd.IsValid() || !aMdhd.IsValid() || + !aEdts.IsValid()) { + LOG_WARN( + Moof, "Invalid dependencies: mTfhd(%d) aMvhd(%d) aMdhd(%d) aEdts(%d)", + mTfhd.IsValid(), aMvhd.IsValid(), aMdhd.IsValid(), !aEdts.IsValid()); + return Err(NS_ERROR_FAILURE); + } + + BoxReader reader(aBox); + if (!reader->CanReadType<uint32_t>()) { + LOG_WARN(Moof, "Incomplete Box (missing flags)"); + return Err(NS_ERROR_FAILURE); + } + uint32_t flags; + MOZ_TRY_VAR(flags, reader->ReadU32()); + + if (!reader->CanReadType<uint32_t>()) { + LOG_WARN(Moof, "Incomplete Box (missing sampleCount)"); + return Err(NS_ERROR_FAILURE); + } + uint32_t sampleCount; + MOZ_TRY_VAR(sampleCount, reader->ReadU32()); + if (sampleCount == 0) { + LOG_DEBUG(Trun, "Trun with no samples, returning."); + return Ok(); + } + + uint64_t offset = mTfhd.mBaseDataOffset; + if (flags & 0x01) { + uint32_t tmp; + MOZ_TRY_VAR(tmp, reader->ReadU32()); + offset += tmp; + } + uint32_t firstSampleFlags = mTfhd.mDefaultSampleFlags; + if (flags & 0x04) { + MOZ_TRY_VAR(firstSampleFlags, reader->ReadU32()); + } + nsTArray<MP4Interval<TimeUnit>> timeRanges; + uint64_t decodeTime = *aDecodeTime; + + if (!mIndex.SetCapacity(mIndex.Length() + sampleCount, fallible)) { + LOG_ERROR(Moof, "Out of Memory"); + return Err(NS_ERROR_FAILURE); + } + + for (size_t i = 0; i < sampleCount; i++) { + uint32_t sampleDuration = mTfhd.mDefaultSampleDuration; + if (flags & 0x100) { + MOZ_TRY_VAR(sampleDuration, reader->ReadU32()); + } + uint32_t sampleSize = mTfhd.mDefaultSampleSize; + if (flags & 0x200) { + MOZ_TRY_VAR(sampleSize, reader->ReadU32()); + } + uint32_t sampleFlags = i ? mTfhd.mDefaultSampleFlags : firstSampleFlags; + if (flags & 0x400) { + MOZ_TRY_VAR(sampleFlags, reader->ReadU32()); + } + int32_t ctsOffset = 0; + if (flags & 0x800) { + MOZ_TRY_VAR(ctsOffset, reader->Read32()); + } + + if (sampleSize) { + Sample sample; + sample.mByteRange = MediaByteRange(offset, offset + sampleSize); + offset += sampleSize; + + TimeUnit decodeOffset, emptyOffset, startCts, endCts; + MOZ_TRY_VAR(decodeOffset, + aMdhd.ToTimeUnit((int64_t)decodeTime - aEdts.mMediaStart)); + MOZ_TRY_VAR(emptyOffset, aMvhd.ToTimeUnit(aEdts.mEmptyOffset)); + sample.mDecodeTime = decodeOffset + emptyOffset; + MOZ_TRY_VAR(startCts, aMdhd.ToTimeUnit((int64_t)decodeTime + ctsOffset - + aEdts.mMediaStart)); + MOZ_TRY_VAR(endCts, aMdhd.ToTimeUnit((int64_t)decodeTime + ctsOffset + + sampleDuration - aEdts.mMediaStart)); + sample.mCompositionRange = + MP4Interval<TimeUnit>(startCts + emptyOffset, endCts + emptyOffset); + // Sometimes audio streams don't properly mark their samples as keyframes, + // because every audio sample is a keyframe. + sample.mSync = !(sampleFlags & 0x1010000) || aIsAudio; + + MOZ_ALWAYS_TRUE(mIndex.AppendElement(sample, fallible)); + + mMdatRange = mMdatRange.Span(sample.mByteRange); + } + decodeTime += sampleDuration; + } + TimeUnit roundTime; + MOZ_TRY_VAR(roundTime, aMdhd.ToTimeUnit(sampleCount)); + mMaxRoundingError = roundTime + mMaxRoundingError; + + *aDecodeTime = decodeTime; + + LOG_DEBUG(Trun, "Done."); + return Ok(); +} + +Tkhd::Tkhd(Box& aBox) : mTrackId(0) { + mValid = Parse(aBox).isOk(); + if (!mValid) { + LOG_WARN(Tkhd, "Parse failed"); + } +} + +Result<Ok, nsresult> Tkhd::Parse(Box& aBox) { + BoxReader reader(aBox); + uint32_t flags; + MOZ_TRY_VAR(flags, reader->ReadU32()); + uint8_t version = flags >> 24; + if (version == 0) { + uint32_t creationTime, modificationTime, reserved, duration; + MOZ_TRY_VAR(creationTime, reader->ReadU32()); + MOZ_TRY_VAR(modificationTime, reader->ReadU32()); + MOZ_TRY_VAR(mTrackId, reader->ReadU32()); + MOZ_TRY_VAR(reserved, reader->ReadU32()); + MOZ_TRY_VAR(duration, reader->ReadU32()); + + (void)reserved; + NS_ASSERTION(!reserved, "reserved should be 0"); + + mCreationTime = creationTime; + mModificationTime = modificationTime; + mDuration = duration; + } else if (version == 1) { + uint32_t reserved; + MOZ_TRY_VAR(mCreationTime, reader->ReadU64()); + MOZ_TRY_VAR(mModificationTime, reader->ReadU64()); + MOZ_TRY_VAR(mTrackId, reader->ReadU32()); + MOZ_TRY_VAR(reserved, reader->ReadU32()); + (void)reserved; + NS_ASSERTION(!reserved, "reserved should be 0"); + MOZ_TRY_VAR(mDuration, reader->ReadU64()); + } + return Ok(); +} + +Mvhd::Mvhd(Box& aBox) + : mCreationTime(0), mModificationTime(0), mTimescale(0), mDuration(0) { + mValid = Parse(aBox).isOk(); + if (!mValid) { + LOG_WARN(Mvhd, "Parse failed"); + } +} + +Result<Ok, nsresult> Mvhd::Parse(Box& aBox) { + BoxReader reader(aBox); + + uint32_t flags; + MOZ_TRY_VAR(flags, reader->ReadU32()); + uint8_t version = flags >> 24; + + if (version == 0) { + uint32_t creationTime, modificationTime, duration; + MOZ_TRY_VAR(creationTime, reader->ReadU32()); + MOZ_TRY_VAR(modificationTime, reader->ReadU32()); + MOZ_TRY_VAR(mTimescale, reader->ReadU32()); + MOZ_TRY_VAR(duration, reader->ReadU32()); + mCreationTime = creationTime; + mModificationTime = modificationTime; + mDuration = duration; + } else if (version == 1) { + MOZ_TRY_VAR(mCreationTime, reader->ReadU64()); + MOZ_TRY_VAR(mModificationTime, reader->ReadU64()); + MOZ_TRY_VAR(mTimescale, reader->ReadU32()); + MOZ_TRY_VAR(mDuration, reader->ReadU64()); + } else { + return Err(NS_ERROR_FAILURE); + } + return Ok(); +} + +Mdhd::Mdhd(Box& aBox) : Mvhd(aBox) {} + +Trex::Trex(Box& aBox) + : mFlags(0), + mTrackId(0), + mDefaultSampleDescriptionIndex(0), + mDefaultSampleDuration(0), + mDefaultSampleSize(0), + mDefaultSampleFlags(0) { + mValid = Parse(aBox).isOk(); + if (!mValid) { + LOG_WARN(Trex, "Parse failed"); + } +} + +Result<Ok, nsresult> Trex::Parse(Box& aBox) { + BoxReader reader(aBox); + + MOZ_TRY_VAR(mFlags, reader->ReadU32()); + MOZ_TRY_VAR(mTrackId, reader->ReadU32()); + MOZ_TRY_VAR(mDefaultSampleDescriptionIndex, reader->ReadU32()); + MOZ_TRY_VAR(mDefaultSampleDuration, reader->ReadU32()); + MOZ_TRY_VAR(mDefaultSampleSize, reader->ReadU32()); + MOZ_TRY_VAR(mDefaultSampleFlags, reader->ReadU32()); + + return Ok(); +} + +Tfhd::Tfhd(Box& aBox, Trex& aTrex) : Trex(aTrex), mBaseDataOffset(0) { + mValid = Parse(aBox).isOk(); + if (!mValid) { + LOG_WARN(Tfhd, "Parse failed"); + } +} + +Result<Ok, nsresult> Tfhd::Parse(Box& aBox) { + MOZ_ASSERT(aBox.IsType("tfhd")); + MOZ_ASSERT(aBox.Parent()->IsType("traf")); + MOZ_ASSERT(aBox.Parent()->Parent()->IsType("moof")); + + BoxReader reader(aBox); + + MOZ_TRY_VAR(mFlags, reader->ReadU32()); + MOZ_TRY_VAR(mTrackId, reader->ReadU32()); + mBaseDataOffset = aBox.Parent()->Parent()->Offset(); + if (mFlags & 0x01) { + MOZ_TRY_VAR(mBaseDataOffset, reader->ReadU64()); + } + if (mFlags & 0x02) { + MOZ_TRY_VAR(mDefaultSampleDescriptionIndex, reader->ReadU32()); + } + if (mFlags & 0x08) { + MOZ_TRY_VAR(mDefaultSampleDuration, reader->ReadU32()); + } + if (mFlags & 0x10) { + MOZ_TRY_VAR(mDefaultSampleSize, reader->ReadU32()); + } + if (mFlags & 0x20) { + MOZ_TRY_VAR(mDefaultSampleFlags, reader->ReadU32()); + } + + return Ok(); +} + +Tfdt::Tfdt(Box& aBox) : mBaseMediaDecodeTime(0) { + mValid = Parse(aBox).isOk(); + if (!mValid) { + LOG_WARN(Tfdt, "Parse failed"); + } +} + +Result<Ok, nsresult> Tfdt::Parse(Box& aBox) { + BoxReader reader(aBox); + + uint32_t flags; + MOZ_TRY_VAR(flags, reader->ReadU32()); + uint8_t version = flags >> 24; + if (version == 0) { + uint32_t tmp; + MOZ_TRY_VAR(tmp, reader->ReadU32()); + mBaseMediaDecodeTime = tmp; + } else if (version == 1) { + MOZ_TRY_VAR(mBaseMediaDecodeTime, reader->ReadU64()); + } + return Ok(); +} + +Edts::Edts(Box& aBox) : mMediaStart(0), mEmptyOffset(0) { + mValid = Parse(aBox).isOk(); + if (!mValid) { + LOG_WARN(Edts, "Parse failed"); + } +} + +Result<Ok, nsresult> Edts::Parse(Box& aBox) { + Box child = aBox.FirstChild(); + if (!child.IsType("elst")) { + return Err(NS_ERROR_FAILURE); + } + + BoxReader reader(child); + uint32_t flags; + MOZ_TRY_VAR(flags, reader->ReadU32()); + uint8_t version = flags >> 24; + bool emptyEntry = false; + uint32_t entryCount; + MOZ_TRY_VAR(entryCount, reader->ReadU32()); + for (uint32_t i = 0; i < entryCount; i++) { + uint64_t segment_duration; + int64_t media_time; + if (version == 1) { + MOZ_TRY_VAR(segment_duration, reader->ReadU64()); + MOZ_TRY_VAR(media_time, reader->Read64()); + } else { + uint32_t tmp; + MOZ_TRY_VAR(tmp, reader->ReadU32()); + segment_duration = tmp; + int32_t tmp2; + MOZ_TRY_VAR(tmp2, reader->Read32()); + media_time = tmp2; + } + if (media_time == -1 && i) { + LOG_WARN(Edts, "Multiple empty edit, not handled"); + } else if (media_time == -1) { + if (segment_duration > std::numeric_limits<int64_t>::max()) { + NS_WARNING("Segment duration higher than int64_t max."); + mEmptyOffset = std::numeric_limits<int64_t>::max(); + } else { + mEmptyOffset = static_cast<int64_t>(segment_duration); + } + emptyEntry = true; + } else if (i > 1 || (i > 0 && !emptyEntry)) { + LOG_WARN(Edts, + "More than one edit entry, not handled. A/V sync will be wrong"); + break; + } else { + mMediaStart = media_time; + } + MOZ_TRY(reader->ReadU32()); // media_rate_integer and media_rate_fraction + } + + return Ok(); +} + +Saiz::Saiz(Box& aBox, AtomType aDefaultType) + : mAuxInfoType(aDefaultType), mAuxInfoTypeParameter(0) { + mValid = Parse(aBox).isOk(); + if (!mValid) { + LOG_WARN(Saiz, "Parse failed"); + } +} + +Result<Ok, nsresult> Saiz::Parse(Box& aBox) { + BoxReader reader(aBox); + + uint32_t flags; + MOZ_TRY_VAR(flags, reader->ReadU32()); + if (flags & 1) { + MOZ_TRY_VAR(mAuxInfoType, reader->ReadU32()); + MOZ_TRY_VAR(mAuxInfoTypeParameter, reader->ReadU32()); + } + uint8_t defaultSampleInfoSize; + MOZ_TRY_VAR(defaultSampleInfoSize, reader->ReadU8()); + uint32_t count; + MOZ_TRY_VAR(count, reader->ReadU32()); + if (defaultSampleInfoSize) { + if (!mSampleInfoSize.SetLength(count, fallible)) { + LOG_ERROR(Saiz, "OOM"); + return Err(NS_ERROR_FAILURE); + } + memset(mSampleInfoSize.Elements(), defaultSampleInfoSize, + mSampleInfoSize.Length()); + } else { + if (!reader->ReadArray(mSampleInfoSize, count)) { + LOG_WARN(Saiz, "Incomplete Box (OOM or missing count:%u)", count); + return Err(NS_ERROR_FAILURE); + } + } + return Ok(); +} + +Saio::Saio(Box& aBox, AtomType aDefaultType) + : mAuxInfoType(aDefaultType), mAuxInfoTypeParameter(0) { + mValid = Parse(aBox).isOk(); + if (!mValid) { + LOG_WARN(Saio, "Parse failed"); + } +} + +Result<Ok, nsresult> Saio::Parse(Box& aBox) { + BoxReader reader(aBox); + + uint32_t flags; + MOZ_TRY_VAR(flags, reader->ReadU32()); + uint8_t version = flags >> 24; + if (flags & 1) { + MOZ_TRY_VAR(mAuxInfoType, reader->ReadU32()); + MOZ_TRY_VAR(mAuxInfoTypeParameter, reader->ReadU32()); + } + + size_t count; + MOZ_TRY_VAR(count, reader->ReadU32()); + if (!mOffsets.SetCapacity(count, fallible)) { + LOG_ERROR(Saiz, "OOM"); + return Err(NS_ERROR_FAILURE); + } + if (version == 0) { + uint32_t offset; + for (size_t i = 0; i < count; i++) { + MOZ_TRY_VAR(offset, reader->ReadU32()); + MOZ_ALWAYS_TRUE(mOffsets.AppendElement(offset, fallible)); + } + } else { + uint64_t offset; + for (size_t i = 0; i < count; i++) { + MOZ_TRY_VAR(offset, reader->ReadU64()); + MOZ_ALWAYS_TRUE(mOffsets.AppendElement(offset, fallible)); + } + } + return Ok(); +} + +Sbgp::Sbgp(Box& aBox) : mGroupingTypeParam(0) { + mValid = Parse(aBox).isOk(); + if (!mValid) { + LOG_WARN(Sbgp, "Parse failed"); + } +} + +Result<Ok, nsresult> Sbgp::Parse(Box& aBox) { + BoxReader reader(aBox); + + uint32_t flags; + MOZ_TRY_VAR(flags, reader->ReadU32()); + const uint8_t version = flags >> 24; + + uint32_t type; + MOZ_TRY_VAR(type, reader->ReadU32()); + mGroupingType = type; + + if (version == 1) { + MOZ_TRY_VAR(mGroupingTypeParam, reader->ReadU32()); + } + + uint32_t count; + MOZ_TRY_VAR(count, reader->ReadU32()); + + for (uint32_t i = 0; i < count; i++) { + uint32_t sampleCount; + MOZ_TRY_VAR(sampleCount, reader->ReadU32()); + uint32_t groupDescriptionIndex; + MOZ_TRY_VAR(groupDescriptionIndex, reader->ReadU32()); + + SampleToGroupEntry entry(sampleCount, groupDescriptionIndex); + if (!mEntries.AppendElement(entry, mozilla::fallible)) { + LOG_ERROR(Sbgp, "OOM"); + return Err(NS_ERROR_FAILURE); + } + } + return Ok(); +} + +Sgpd::Sgpd(Box& aBox) { + mValid = Parse(aBox).isOk(); + if (!mValid) { + LOG_WARN(Sgpd, "Parse failed"); + } +} + +Result<Ok, nsresult> Sgpd::Parse(Box& aBox) { + BoxReader reader(aBox); + + uint32_t flags; + MOZ_TRY_VAR(flags, reader->ReadU32()); + const uint8_t version = flags >> 24; + + uint32_t type; + MOZ_TRY_VAR(type, reader->ReadU32()); + mGroupingType = type; + + const uint32_t entrySize = sizeof(uint32_t) + kKeyIdSize; + uint32_t defaultLength = 0; + + if (version == 1) { + MOZ_TRY_VAR(defaultLength, reader->ReadU32()); + if (defaultLength < entrySize && defaultLength != 0) { + return Err(NS_ERROR_FAILURE); + } + } + + uint32_t count; + MOZ_TRY_VAR(count, reader->ReadU32()); + + for (uint32_t i = 0; i < count; ++i) { + if (version == 1 && defaultLength == 0) { + uint32_t descriptionLength; + MOZ_TRY_VAR(descriptionLength, reader->ReadU32()); + if (descriptionLength < entrySize) { + return Err(NS_ERROR_FAILURE); + } + } + + CencSampleEncryptionInfoEntry entry; + bool valid = entry.Init(reader).isOk(); + if (!valid) { + return Err(NS_ERROR_FAILURE); + } + if (!mEntries.AppendElement(entry, mozilla::fallible)) { + LOG_ERROR(Sgpd, "OOM"); + return Err(NS_ERROR_FAILURE); + } + } + return Ok(); +} + +Result<Ok, nsresult> CencSampleEncryptionInfoEntry::Init(BoxReader& aReader) { + // Skip a reserved byte. + MOZ_TRY(aReader->ReadU8()); + + uint8_t pattern; + MOZ_TRY_VAR(pattern, aReader->ReadU8()); + mCryptByteBlock = pattern >> 4; + mSkipByteBlock = pattern & 0x0f; + + uint8_t isEncrypted; + MOZ_TRY_VAR(isEncrypted, aReader->ReadU8()); + mIsEncrypted = isEncrypted != 0; + + MOZ_TRY_VAR(mIVSize, aReader->ReadU8()); + + // Read the key id. + if (!mKeyId.SetLength(kKeyIdSize, fallible)) { + LOG_ERROR(CencSampleEncryptionInfoEntry, "OOM"); + return Err(NS_ERROR_FAILURE); + } + for (uint32_t i = 0; i < kKeyIdSize; ++i) { + MOZ_TRY_VAR(mKeyId.ElementAt(i), aReader->ReadU8()); + } + + if (mIsEncrypted) { + if (mIVSize != 8 && mIVSize != 16) { + return Err(NS_ERROR_FAILURE); + } + } else if (mIVSize != 0) { + // Protected content with 0 sized IV indicates a constant IV is present. + // This is used for the cbcs scheme. + uint8_t constantIVSize; + MOZ_TRY_VAR(constantIVSize, aReader->ReadU8()); + if (constantIVSize != 8 && constantIVSize != 16) { + LOG_WARN(CencSampleEncryptionInfoEntry, + "Unexpected constantIVSize: %" PRIu8, constantIVSize); + return Err(NS_ERROR_FAILURE); + } + if (!mConsantIV.SetLength(constantIVSize, mozilla::fallible)) { + LOG_ERROR(CencSampleEncryptionInfoEntry, "OOM"); + return Err(NS_ERROR_FAILURE); + } + for (uint32_t i = 0; i < constantIVSize; ++i) { + MOZ_TRY_VAR(mConsantIV.ElementAt(i), aReader->ReadU8()); + } + } + + return Ok(); +} +} // namespace mozilla + +#undef LOG_DEBUG +#undef LOG_WARN +#undef LOG_ERROR diff --git a/dom/media/mp4/MoofParser.h b/dom/media/mp4/MoofParser.h new file mode 100644 index 0000000000..1c3a1a5ce6 --- /dev/null +++ b/dom/media/mp4/MoofParser.h @@ -0,0 +1,361 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MOOF_PARSER_H_ +#define MOOF_PARSER_H_ + +#include "mozilla/ResultExtensions.h" +#include "TimeUnits.h" +#include "mozilla/Variant.h" +#include "Atom.h" +#include "AtomType.h" +#include "SinfParser.h" +#include "ByteStream.h" +#include "MP4Interval.h" +#include "MediaResource.h" + +namespace mozilla { + +class Box; +class BoxContext; +class BoxReader; +class Moof; + +// Used to track the CTS end time of the last sample of a track +// in the preceeding Moof, so that we can smooth tracks' timestamps +// across Moofs. +struct TrackEndCts { + TrackEndCts(uint32_t aTrackId, const media::TimeUnit& aCtsEndTime) + : mTrackId(aTrackId), mCtsEndTime(aCtsEndTime) {} + uint32_t mTrackId; + media::TimeUnit mCtsEndTime; +}; + +class Mvhd : public Atom { + public: + Mvhd() + : mCreationTime(0), mModificationTime(0), mTimescale(0), mDuration(0) {} + explicit Mvhd(Box& aBox); + + Result<media::TimeUnit, nsresult> ToTimeUnit(int64_t aTimescaleUnits) { + if (!mTimescale) { + NS_WARNING("invalid mTimescale"); + return Err(NS_ERROR_FAILURE); + } + return media::TimeUnit(aTimescaleUnits, mTimescale); + } + + uint64_t mCreationTime; + uint64_t mModificationTime; + uint32_t mTimescale; + uint64_t mDuration; + + protected: + Result<Ok, nsresult> Parse(Box& aBox); +}; + +class Tkhd : public Mvhd { + public: + Tkhd() : mTrackId(0) {} + explicit Tkhd(Box& aBox); + + uint32_t mTrackId; + + protected: + Result<Ok, nsresult> Parse(Box& aBox); +}; + +class Mdhd : public Mvhd { + public: + Mdhd() = default; + explicit Mdhd(Box& aBox); +}; + +class Trex : public Atom { + public: + explicit Trex(uint32_t aTrackId) + : mFlags(0), + mTrackId(aTrackId), + mDefaultSampleDescriptionIndex(0), + mDefaultSampleDuration(0), + mDefaultSampleSize(0), + mDefaultSampleFlags(0) {} + + explicit Trex(Box& aBox); + + uint32_t mFlags; + uint32_t mTrackId; + uint32_t mDefaultSampleDescriptionIndex; + uint32_t mDefaultSampleDuration; + uint32_t mDefaultSampleSize; + uint32_t mDefaultSampleFlags; + + protected: + Result<Ok, nsresult> Parse(Box& aBox); +}; + +class Tfhd : public Trex { + public: + explicit Tfhd(Trex& aTrex) : Trex(aTrex), mBaseDataOffset(0) { + mValid = aTrex.IsValid(); + } + Tfhd(Box& aBox, Trex& aTrex); + + uint64_t mBaseDataOffset; + + protected: + Result<Ok, nsresult> Parse(Box& aBox); +}; + +class Tfdt : public Atom { + public: + Tfdt() : mBaseMediaDecodeTime(0) {} + explicit Tfdt(Box& aBox); + + uint64_t mBaseMediaDecodeTime; + + protected: + Result<Ok, nsresult> Parse(Box& aBox); +}; + +class Edts : public Atom { + public: + Edts() : mMediaStart(0), mEmptyOffset(0) {} + explicit Edts(Box& aBox); + virtual bool IsValid() override { + // edts is optional + return true; + } + + int64_t mMediaStart; + int64_t mEmptyOffset; + + protected: + Result<Ok, nsresult> Parse(Box& aBox); +}; + +struct Sample { + mozilla::MediaByteRange mByteRange; + mozilla::MediaByteRange mCencRange; + media::TimeUnit mDecodeTime; + MP4Interval<media::TimeUnit> mCompositionRange; + bool mSync; +}; + +class Saiz final : public Atom { + public: + Saiz(Box& aBox, AtomType aDefaultType); + + AtomType mAuxInfoType; + uint32_t mAuxInfoTypeParameter; + FallibleTArray<uint8_t> mSampleInfoSize; + + protected: + Result<Ok, nsresult> Parse(Box& aBox); +}; + +class Saio final : public Atom { + public: + Saio(Box& aBox, AtomType aDefaultType); + + AtomType mAuxInfoType; + uint32_t mAuxInfoTypeParameter; + FallibleTArray<uint64_t> mOffsets; + + protected: + Result<Ok, nsresult> Parse(Box& aBox); +}; + +struct SampleToGroupEntry { + public: + static const uint32_t kTrackGroupDescriptionIndexBase = 0; + static const uint32_t kFragmentGroupDescriptionIndexBase = 0x10000; + + SampleToGroupEntry(uint32_t aSampleCount, uint32_t aGroupDescriptionIndex) + : mSampleCount(aSampleCount), + mGroupDescriptionIndex(aGroupDescriptionIndex) {} + + uint32_t mSampleCount; + uint32_t mGroupDescriptionIndex; +}; + +class Sbgp final : public Atom // SampleToGroup box. +{ + public: + explicit Sbgp(Box& aBox); + + AtomType mGroupingType; + uint32_t mGroupingTypeParam; + FallibleTArray<SampleToGroupEntry> mEntries; + + protected: + Result<Ok, nsresult> Parse(Box& aBox); +}; + +// Stores information form CencSampleEncryptionInformationGroupEntry (seig). +// Cenc here refers to the common encryption standard, rather than the specific +// cenc scheme from that standard. This structure is used for all encryption +// schemes. I.e. it is used for both cenc and cbcs, not just cenc. +struct CencSampleEncryptionInfoEntry final { + public: + CencSampleEncryptionInfoEntry() = default; + + Result<Ok, nsresult> Init(BoxReader& aReader); + + bool mIsEncrypted = false; + uint8_t mIVSize = 0; + CopyableTArray<uint8_t> mKeyId; + uint8_t mCryptByteBlock = 0; + uint8_t mSkipByteBlock = 0; + CopyableTArray<uint8_t> mConsantIV; +}; + +class Sgpd final : public Atom // SampleGroupDescription box. +{ + public: + explicit Sgpd(Box& aBox); + + AtomType mGroupingType; + FallibleTArray<CencSampleEncryptionInfoEntry> mEntries; + + protected: + Result<Ok, nsresult> Parse(Box& aBox); +}; + +// Audio/video entries from the sample description box (stsd). We only need to +// store if these are encrypted, so do not need a specialized class for +// different audio and video data. Currently most of the parsing of these +// entries is by the mp4parse-rust, but moof pasrser needs to know which of +// these are encrypted when parsing the track fragment header (tfhd). +struct SampleDescriptionEntry { + bool mIsEncryptedEntry = false; +}; + +// Used to indicate in variants if all tracks should be parsed. +struct ParseAllTracks {}; + +using TrackParseMode = Variant<ParseAllTracks, uint32_t>; + +class Moof final : public Atom { + public: + Moof(Box& aBox, const TrackParseMode& aTrackParseMode, Trex& aTrex, + Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, + uint64_t* aDecodeTime, bool aIsAudio, + nsTArray<TrackEndCts>& aTracksEndCts); + bool GetAuxInfo(AtomType aType, FallibleTArray<MediaByteRange>* aByteRanges); + void FixRounding(const Moof& aMoof); + + mozilla::MediaByteRange mRange; + mozilla::MediaByteRange mMdatRange; + MP4Interval<media::TimeUnit> mTimeRange; + FallibleTArray<Sample> mIndex; + + FallibleTArray<CencSampleEncryptionInfoEntry> + mFragmentSampleEncryptionInfoEntries; + FallibleTArray<SampleToGroupEntry> mFragmentSampleToGroupEntries; + + Tfhd mTfhd; + FallibleTArray<Saiz> mSaizs; + FallibleTArray<Saio> mSaios; + nsTArray<nsTArray<uint8_t>> mPsshes; + + private: + // aDecodeTime is updated to the end of the parsed TRAF on return. + void ParseTraf(Box& aBox, const TrackParseMode& aTrackParseMode, Trex& aTrex, + Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, + uint64_t* aDecodeTime, bool aIsAudio); + // aDecodeTime is updated to the end of the parsed TRUN on return. + Result<Ok, nsresult> ParseTrun(Box& aBox, Mvhd& aMvhd, Mdhd& aMdhd, + Edts& aEdts, uint64_t* aDecodeTime, + bool aIsAudio); + // Process the sample auxiliary information used by common encryption. + // aScheme is used to select the appropriate auxiliary information and should + // be set based on the encryption scheme used by the track being processed. + // Note, the term cenc here refers to the standard, not the specific scheme + // from that standard. I.e. this function is used to handle up auxiliary + // information from the cenc and cbcs schemes. + bool ProcessCencAuxInfo(AtomType aScheme); + media::TimeUnit mMaxRoundingError; +}; + +DDLoggedTypeDeclName(MoofParser); + +class MoofParser : public DecoderDoctorLifeLogger<MoofParser> { + public: + MoofParser(ByteStream* aSource, const TrackParseMode& aTrackParseMode, + bool aIsAudio) + : mSource(aSource), + mOffset(0), + mTrex(aTrackParseMode.is<uint32_t>() ? aTrackParseMode.as<uint32_t>() + : 0), + mIsAudio(aIsAudio), + mLastDecodeTime(0), + mTrackParseMode(aTrackParseMode) { + // Setting mIsMultitrackParser is a nasty work around for calculating + // the composition range for MSE that causes the parser to parse multiple + // tracks. Ideally we'd store an array of tracks with different metadata + // for each. + DDLINKCHILD("source", aSource); + } + bool RebuildFragmentedIndex(const mozilla::MediaByteRangeSet& aByteRanges); + // If *aCanEvict is set to true. then will remove all moofs already parsed + // from index then rebuild the index. *aCanEvict is set to true upon return if + // some moofs were removed. + bool RebuildFragmentedIndex(const mozilla::MediaByteRangeSet& aByteRanges, + bool* aCanEvict); + bool RebuildFragmentedIndex(BoxContext& aContext); + MP4Interval<media::TimeUnit> GetCompositionRange( + const mozilla::MediaByteRangeSet& aByteRanges); + bool ReachedEnd(); + void ParseMoov(Box& aBox); + void ParseTrak(Box& aBox); + void ParseMdia(Box& aBox); + void ParseMvex(Box& aBox); + + void ParseMinf(Box& aBox); + void ParseStbl(Box& aBox); + void ParseStsd(Box& aBox); + void ParseEncrypted(Box& aBox); + + bool BlockingReadNextMoof(); + + already_AddRefed<mozilla::MediaByteBuffer> Metadata(); + MediaByteRange FirstCompleteMediaSegment(); + MediaByteRange FirstCompleteMediaHeader(); + + mozilla::MediaByteRange mInitRange; + RefPtr<ByteStream> mSource; + uint64_t mOffset; + Mvhd mMvhd; + Mdhd mMdhd; + Trex mTrex; + Tfdt mTfdt; + Edts mEdts; + Sinf mSinf; + + FallibleTArray<CencSampleEncryptionInfoEntry> + mTrackSampleEncryptionInfoEntries; + FallibleTArray<SampleToGroupEntry> mTrackSampleToGroupEntries; + FallibleTArray<SampleDescriptionEntry> mSampleDescriptions; + + nsTArray<Moof>& Moofs() { return mMoofs; } + + private: + void ScanForMetadata(mozilla::MediaByteRange& aMoov); + nsTArray<Moof> mMoofs; + nsTArray<MediaByteRange> mMediaRanges; + nsTArray<TrackEndCts> mTracksEndCts; + bool mIsAudio; + uint64_t mLastDecodeTime; + // Either a ParseAllTracks if in multitrack mode, or an integer representing + // the track_id for the track being parsed. If parsing a specific track, mTrex + // should have an id matching mTrackParseMode.as<uint32_t>(). In this case 0 + // is a valid track id -- this is not allowed in the spec, but such mp4s + // appear in the wild. In the ParseAllTracks case, mTrex can have an arbitrary + // id based on the tracks being parsed. + const TrackParseMode mTrackParseMode; +}; +} // namespace mozilla + +#endif diff --git a/dom/media/mp4/ResourceStream.cpp b/dom/media/mp4/ResourceStream.cpp new file mode 100644 index 0000000000..ce2fb6f2f6 --- /dev/null +++ b/dom/media/mp4/ResourceStream.cpp @@ -0,0 +1,56 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ResourceStream.h" + +namespace mozilla { + +ResourceStream::ResourceStream(mozilla::MediaResource* aResource) + : mResource(aResource), mPinCount(0) { + MOZ_ASSERT(aResource); + DDLINKCHILD("resource", &mResource); +} + +ResourceStream::~ResourceStream() { MOZ_ASSERT(mPinCount == 0); } + +bool ResourceStream::ReadAt(int64_t aOffset, void* aBuffer, size_t aCount, + size_t* aBytesRead) { + uint32_t sum = 0; + uint32_t bytesRead = 0; + do { + uint64_t offset = aOffset + sum; + char* buffer = reinterpret_cast<char*>(aBuffer) + sum; + uint32_t toRead = aCount - sum; + nsresult rv = mResource.ReadAt(offset, buffer, toRead, &bytesRead); + if (NS_FAILED(rv)) { + return false; + } + sum += bytesRead; + } while (sum < aCount && bytesRead > 0); + + *aBytesRead = sum; + return true; +} + +bool ResourceStream::CachedReadAt(int64_t aOffset, void* aBuffer, size_t aCount, + size_t* aBytesRead) { + nsresult rv = mResource.GetResource()->ReadFromCache( + reinterpret_cast<char*>(aBuffer), aOffset, aCount); + if (NS_FAILED(rv)) { + *aBytesRead = 0; + return false; + } + *aBytesRead = aCount; + return true; +} + +bool ResourceStream::Length(int64_t* aSize) { + if (mResource.GetLength() < 0) return false; + *aSize = mResource.GetLength(); + return true; +} + +} // namespace mozilla diff --git a/dom/media/mp4/ResourceStream.h b/dom/media/mp4/ResourceStream.h new file mode 100644 index 0000000000..1aa59fdaed --- /dev/null +++ b/dom/media/mp4/ResourceStream.h @@ -0,0 +1,48 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef RESOURCESTREAM_H_ +#define RESOURCESTREAM_H_ + +#include "MediaResource.h" +#include "ByteStream.h" +#include "mozilla/RefPtr.h" + +namespace mozilla { + +DDLoggedTypeDeclNameAndBase(ResourceStream, ByteStream); + +class ResourceStream : public ByteStream, + public DecoderDoctorLifeLogger<ResourceStream> { + public: + explicit ResourceStream(mozilla::MediaResource* aResource); + + virtual bool ReadAt(int64_t offset, void* aBuffer, size_t aCount, + size_t* aBytesRead) override; + virtual bool CachedReadAt(int64_t aOffset, void* aBuffer, size_t aCount, + size_t* aBytesRead) override; + virtual bool Length(int64_t* size) override; + + void Pin() { + mResource.GetResource()->Pin(); + ++mPinCount; + } + + void Unpin() { + mResource.GetResource()->Unpin(); + MOZ_ASSERT(mPinCount); + --mPinCount; + } + + protected: + virtual ~ResourceStream(); + + private: + mozilla::MediaResourceIndex mResource; + uint32_t mPinCount; +}; + +} // namespace mozilla + +#endif // RESOURCESTREAM_H_ diff --git a/dom/media/mp4/SampleIterator.cpp b/dom/media/mp4/SampleIterator.cpp new file mode 100644 index 0000000000..95fc8af457 --- /dev/null +++ b/dom/media/mp4/SampleIterator.cpp @@ -0,0 +1,712 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "SampleIterator.h" + +#include <algorithm> +#include <limits> + +#include "BufferReader.h" +#include "mozilla/RefPtr.h" +#include "MP4Interval.h" +#include "MP4Metadata.h" +#include "SinfParser.h" + +using namespace mozilla::media; + +namespace mozilla { + +class MOZ_STACK_CLASS RangeFinder { + public: + // Given that we're processing this in order we don't use a binary search + // to find the apropriate time range. Instead we search linearly from the + // last used point. + explicit RangeFinder(const MediaByteRangeSet& ranges) + : mRanges(ranges), mIndex(0) { + // Ranges must be normalised for this to work + } + + bool Contains(const MediaByteRange& aByteRange); + + private: + const MediaByteRangeSet& mRanges; + size_t mIndex; +}; + +bool RangeFinder::Contains(const MediaByteRange& aByteRange) { + if (mRanges.IsEmpty()) { + return false; + } + + if (mRanges[mIndex].ContainsStrict(aByteRange)) { + return true; + } + + if (aByteRange.mStart < mRanges[mIndex].mStart) { + // Search backwards + do { + if (!mIndex) { + return false; + } + --mIndex; + if (mRanges[mIndex].ContainsStrict(aByteRange)) { + return true; + } + } while (aByteRange.mStart < mRanges[mIndex].mStart); + + return false; + } + + while (aByteRange.mEnd > mRanges[mIndex].mEnd) { + if (mIndex == mRanges.Length() - 1) { + return false; + } + ++mIndex; + if (mRanges[mIndex].ContainsStrict(aByteRange)) { + return true; + } + } + + return false; +} + +SampleIterator::SampleIterator(MP4SampleIndex* aIndex) + : mIndex(aIndex), mCurrentMoof(0), mCurrentSample(0) { + mIndex->RegisterIterator(this); +} + +SampleIterator::~SampleIterator() { mIndex->UnregisterIterator(this); } + +bool SampleIterator::HasNext() { return !!Get(); } + +already_AddRefed<MediaRawData> SampleIterator::GetNext() { + Sample* s(Get()); + if (!s) { + return nullptr; + } + + int64_t length = std::numeric_limits<int64_t>::max(); + mIndex->mSource->Length(&length); + if (s->mByteRange.mEnd > length) { + // We don't have this complete sample. + return nullptr; + } + + RefPtr<MediaRawData> sample = new MediaRawData(); + sample->mTimecode = s->mDecodeTime; + sample->mTime = s->mCompositionRange.start; + sample->mDuration = s->mCompositionRange.Length(); + sample->mOffset = s->mByteRange.mStart; + sample->mKeyframe = s->mSync; + + UniquePtr<MediaRawDataWriter> writer(sample->CreateWriter()); + // Do the blocking read + if (!writer->SetSize(s->mByteRange.Length())) { + return nullptr; + } + + size_t bytesRead; + if (!mIndex->mSource->ReadAt(sample->mOffset, writer->Data(), sample->Size(), + &bytesRead) || + bytesRead != sample->Size()) { + return nullptr; + } + + MoofParser* moofParser = mIndex->mMoofParser.get(); + if (!moofParser) { + // File is not fragmented, we can't have crypto, just early return. + Next(); + return sample.forget(); + } + + // We need to check if this moof has init data the CDM expects us to surface. + // This should happen when handling the first sample, even if that sample + // isn't encrypted (samples later in the moof may be). + if (mCurrentSample == 0) { + const nsTArray<Moof>& moofs = moofParser->Moofs(); + const Moof* currentMoof = &moofs[mCurrentMoof]; + if (!currentMoof->mPsshes.IsEmpty()) { + // This Moof contained crypto init data. Report that. We only report + // the init data on the Moof's first sample, to avoid reporting it more + // than once per Moof. + writer->mCrypto.mInitDatas.AppendElements(currentMoof->mPsshes); + writer->mCrypto.mInitDataType = u"cenc"_ns; + } + } + + auto cryptoSchemeResult = GetEncryptionScheme(); + if (cryptoSchemeResult.isErr()) { + // Log the error here in future. + return nullptr; + } + CryptoScheme cryptoScheme = cryptoSchemeResult.unwrap(); + if (cryptoScheme == CryptoScheme::None) { + // No crypto to handle, early return. + Next(); + return sample.forget(); + } + + writer->mCrypto.mCryptoScheme = cryptoScheme; + MOZ_ASSERT(writer->mCrypto.mCryptoScheme != CryptoScheme::None, + "Should have early returned if we don't have a crypto scheme!"); + MOZ_ASSERT(writer->mCrypto.mKeyId.IsEmpty(), + "Sample should not already have a key ID"); + MOZ_ASSERT(writer->mCrypto.mConstantIV.IsEmpty(), + "Sample should not already have a constant IV"); + CencSampleEncryptionInfoEntry* sampleInfo = GetSampleEncryptionEntry(); + if (sampleInfo) { + // Use sample group information if present, this supersedes track level + // information. + writer->mCrypto.mKeyId.AppendElements(sampleInfo->mKeyId); + writer->mCrypto.mIVSize = sampleInfo->mIVSize; + writer->mCrypto.mCryptByteBlock = sampleInfo->mCryptByteBlock; + writer->mCrypto.mSkipByteBlock = sampleInfo->mSkipByteBlock; + writer->mCrypto.mConstantIV.AppendElements(sampleInfo->mConsantIV); + } else { + // Use the crypto info from track metadata + writer->mCrypto.mKeyId.AppendElements(moofParser->mSinf.mDefaultKeyID, 16); + writer->mCrypto.mIVSize = moofParser->mSinf.mDefaultIVSize; + writer->mCrypto.mCryptByteBlock = moofParser->mSinf.mDefaultCryptByteBlock; + writer->mCrypto.mSkipByteBlock = moofParser->mSinf.mDefaultSkipByteBlock; + writer->mCrypto.mConstantIV.AppendElements( + moofParser->mSinf.mDefaultConstantIV); + } + + if ((writer->mCrypto.mIVSize == 0 && writer->mCrypto.mConstantIV.IsEmpty()) || + (writer->mCrypto.mIVSize != 0 && s->mCencRange.IsEmpty())) { + // If mIVSize == 0, this indicates that a constant IV is in use, thus we + // should have a non empty constant IV. Alternatively if IV size is non + // zero, we should have an IV for this sample, which we need to look up + // in mCencRange (which must then be non empty). If neither of these are + // true we have bad crypto data, so bail. + return nullptr; + } + // Parse auxiliary information if present + if (!s->mCencRange.IsEmpty()) { + // The size comes from an 8 bit field + AutoTArray<uint8_t, 256> cencAuxInfo; + cencAuxInfo.SetLength(s->mCencRange.Length()); + if (!mIndex->mSource->ReadAt(s->mCencRange.mStart, cencAuxInfo.Elements(), + cencAuxInfo.Length(), &bytesRead) || + bytesRead != cencAuxInfo.Length()) { + return nullptr; + } + BufferReader reader(cencAuxInfo); + if (!reader.ReadArray(writer->mCrypto.mIV, writer->mCrypto.mIVSize)) { + return nullptr; + } + + // Parse the auxiliary information for subsample information + auto res = reader.ReadU16(); + if (res.isOk() && res.unwrap() > 0) { + uint16_t count = res.unwrap(); + + if (reader.Remaining() < count * 6) { + return nullptr; + } + + for (size_t i = 0; i < count; i++) { + auto res_16 = reader.ReadU16(); + auto res_32 = reader.ReadU32(); + if (res_16.isErr() || res_32.isErr()) { + return nullptr; + } + writer->mCrypto.mPlainSizes.AppendElement(res_16.unwrap()); + writer->mCrypto.mEncryptedSizes.AppendElement(res_32.unwrap()); + } + } else { + // No subsample information means the entire sample is encrypted. + writer->mCrypto.mPlainSizes.AppendElement(0); + writer->mCrypto.mEncryptedSizes.AppendElement(sample->Size()); + } + } + + Next(); + + return sample.forget(); +} + +SampleDescriptionEntry* SampleIterator::GetSampleDescriptionEntry() { + nsTArray<Moof>& moofs = mIndex->mMoofParser->Moofs(); + Moof& currentMoof = moofs[mCurrentMoof]; + uint32_t sampleDescriptionIndex = + currentMoof.mTfhd.mDefaultSampleDescriptionIndex; + // Mp4 indices start at 1, shift down 1 so we index our array correctly. + sampleDescriptionIndex--; + FallibleTArray<SampleDescriptionEntry>& sampleDescriptions = + mIndex->mMoofParser->mSampleDescriptions; + if (sampleDescriptionIndex >= sampleDescriptions.Length()) { + // The sample description index is invalid, the mp4 is malformed. Bail out. + return nullptr; + } + return &sampleDescriptions[sampleDescriptionIndex]; +} + +CencSampleEncryptionInfoEntry* SampleIterator::GetSampleEncryptionEntry() { + nsTArray<Moof>& moofs = mIndex->mMoofParser->Moofs(); + Moof* currentMoof = &moofs[mCurrentMoof]; + SampleToGroupEntry* sampleToGroupEntry = nullptr; + + // Default to using the sample to group entries for the fragment, otherwise + // fall back to the sample to group entries for the track. + FallibleTArray<SampleToGroupEntry>* sampleToGroupEntries = + currentMoof->mFragmentSampleToGroupEntries.Length() != 0 + ? ¤tMoof->mFragmentSampleToGroupEntries + : &mIndex->mMoofParser->mTrackSampleToGroupEntries; + + uint32_t seen = 0; + + for (SampleToGroupEntry& entry : *sampleToGroupEntries) { + if (seen + entry.mSampleCount > mCurrentSample) { + sampleToGroupEntry = &entry; + break; + } + seen += entry.mSampleCount; + } + + // ISO-14496-12 Section 8.9.2.3 and 8.9.4 : group description index + // (1) ranges from 1 to the number of sample group entries in the track + // level SampleGroupDescription Box, or (2) takes the value 0 to + // indicate that this sample is a member of no group, in this case, the + // sample is associated with the default values specified in + // TrackEncryption Box, or (3) starts at 0x10001, i.e. the index value + // 1, with the value 1 in the top 16 bits, to reference fragment-local + // SampleGroupDescription Box. + + // According to the spec, ISO-14496-12, the sum of the sample counts in this + // box should be equal to the total number of samples, and, if less, the + // reader should behave as if an extra SampleToGroupEntry existed, with + // groupDescriptionIndex 0. + + if (!sampleToGroupEntry || sampleToGroupEntry->mGroupDescriptionIndex == 0) { + return nullptr; + } + + FallibleTArray<CencSampleEncryptionInfoEntry>* entries = + &mIndex->mMoofParser->mTrackSampleEncryptionInfoEntries; + + uint32_t groupIndex = sampleToGroupEntry->mGroupDescriptionIndex; + + // If the first bit is set to a one, then we should use the sample group + // descriptions from the fragment. + if (groupIndex > SampleToGroupEntry::kFragmentGroupDescriptionIndexBase) { + groupIndex -= SampleToGroupEntry::kFragmentGroupDescriptionIndexBase; + entries = ¤tMoof->mFragmentSampleEncryptionInfoEntries; + } + + // The group_index is one based. + return groupIndex > entries->Length() ? nullptr + : &entries->ElementAt(groupIndex - 1); +} + +Result<CryptoScheme, nsCString> SampleIterator::GetEncryptionScheme() { + // See ISO/IEC 23001-7 for information on the metadata being checked. + MoofParser* moofParser = mIndex->mMoofParser.get(); + if (!moofParser) { + // This mp4 isn't fragmented so it can't be encrypted. + return CryptoScheme::None; + } + + SampleDescriptionEntry* sampleDescriptionEntry = GetSampleDescriptionEntry(); + if (!sampleDescriptionEntry) { + // For the file to be valid the tfhd must reference a sample description + // entry. + // If we encounter this error often, we may consider using the first + // sample description entry if the index is out of bounds. + return mozilla::Err(nsLiteralCString( + "Could not determine encryption scheme due to bad index for sample " + "description entry.")); + } + + if (!sampleDescriptionEntry->mIsEncryptedEntry) { + return CryptoScheme::None; + } + + if (!moofParser->mSinf.IsValid()) { + // The sample description entry says this sample is encrypted, but we + // don't have a valid sinf box. This shouldn't happen as the sinf box is + // part of the sample description entry. Suggests a malformed file, bail. + return mozilla::Err(nsLiteralCString( + "Could not determine encryption scheme. Sample description entry " + "indicates encryption, but could not find associated sinf box.")); + } + + CencSampleEncryptionInfoEntry* sampleInfo = GetSampleEncryptionEntry(); + if (sampleInfo && !sampleInfo->mIsEncrypted) { + // May not have sample encryption info, but if we do, it should match other + // metadata. + return mozilla::Err(nsLiteralCString( + "Could not determine encryption scheme. Sample description entry " + "indicates encryption, but sample encryption entry indicates sample is " + "not encrypted. These should be consistent.")); + } + + if (moofParser->mSinf.mDefaultEncryptionType == AtomType("cenc")) { + return CryptoScheme::Cenc; + } else if (moofParser->mSinf.mDefaultEncryptionType == AtomType("cbcs")) { + return CryptoScheme::Cbcs; + } + return mozilla::Err(nsLiteralCString( + "Could not determine encryption scheme. Sample description entry " + "reports sample is encrypted, but no scheme, or an unsupported scheme " + "is in use.")); +} + +Sample* SampleIterator::Get() { + if (!mIndex->mMoofParser) { + MOZ_ASSERT(!mCurrentMoof); + return mCurrentSample < mIndex->mIndex.Length() + ? &mIndex->mIndex[mCurrentSample] + : nullptr; + } + + nsTArray<Moof>& moofs = mIndex->mMoofParser->Moofs(); + while (true) { + if (mCurrentMoof == moofs.Length()) { + if (!mIndex->mMoofParser->BlockingReadNextMoof()) { + return nullptr; + } + MOZ_ASSERT(mCurrentMoof < moofs.Length()); + } + if (mCurrentSample < moofs[mCurrentMoof].mIndex.Length()) { + break; + } + mCurrentSample = 0; + ++mCurrentMoof; + } + return &moofs[mCurrentMoof].mIndex[mCurrentSample]; +} + +void SampleIterator::Next() { ++mCurrentSample; } + +void SampleIterator::Seek(const TimeUnit& aTime) { + size_t syncMoof = 0; + size_t syncSample = 0; + mCurrentMoof = 0; + mCurrentSample = 0; + Sample* sample; + while (!!(sample = Get())) { + if (sample->mCompositionRange.start > aTime) { + break; + } + if (sample->mSync) { + syncMoof = mCurrentMoof; + syncSample = mCurrentSample; + } + if (sample->mCompositionRange.start == aTime) { + break; + } + Next(); + } + mCurrentMoof = syncMoof; + mCurrentSample = syncSample; +} + +TimeUnit SampleIterator::GetNextKeyframeTime() { + SampleIterator itr(*this); + Sample* sample; + while (!!(sample = itr.Get())) { + if (sample->mSync) { + return sample->mCompositionRange.start; + } + itr.Next(); + } + return TimeUnit::Invalid(); +} + +MP4SampleIndex::MP4SampleIndex(const IndiceWrapper& aIndices, + ByteStream* aSource, uint32_t aTrackId, + bool aIsAudio, uint32_t aTimeScale) + : mSource(aSource), mIsAudio(aIsAudio) { + if (!aIndices.Length()) { + mMoofParser = + MakeUnique<MoofParser>(aSource, AsVariant(aTrackId), aIsAudio); + } else { + if (!mIndex.SetCapacity(aIndices.Length(), fallible)) { + // OOM. + return; + } + media::IntervalSet<TimeUnit> intervalTime; + MediaByteRange intervalRange; + bool haveSync = false; + bool progressive = true; + int64_t lastOffset = 0; + for (size_t i = 0; i < aIndices.Length(); i++) { + Indice indice{}; + int64_t timescale = + mMoofParser ? AssertedCast<int64_t>(mMoofParser->mMvhd.mTimescale) + : aTimeScale; + if (!aIndices.GetIndice(i, indice)) { + // Out of index? + return; + } + if (indice.sync || mIsAudio) { + haveSync = true; + } + if (!haveSync) { + continue; + } + Sample sample; + sample.mByteRange = + MediaByteRange(indice.start_offset, indice.end_offset); + sample.mCompositionRange = MP4Interval<media::TimeUnit>( + TimeUnit(indice.start_composition, timescale), + TimeUnit(indice.end_composition, timescale)); + sample.mDecodeTime = TimeUnit(indice.start_decode, timescale); + sample.mSync = indice.sync || mIsAudio; + // FIXME: Make this infallible after bug 968520 is done. + MOZ_ALWAYS_TRUE(mIndex.AppendElement(sample, fallible)); + if (indice.start_offset < lastOffset) { + NS_WARNING("Chunks in MP4 out of order, expect slow down"); + progressive = false; + } + lastOffset = indice.end_offset; + + // Pack audio samples in group of 128. + if (sample.mSync && progressive && (!mIsAudio || !(i % 128))) { + if (mDataOffset.Length()) { + auto& last = mDataOffset.LastElement(); + last.mEndOffset = intervalRange.mEnd; + NS_ASSERTION(intervalTime.Length() == 1, + "Discontinuous samples between keyframes"); + last.mTime.start = intervalTime.GetStart(); + last.mTime.end = intervalTime.GetEnd(); + } + if (!mDataOffset.AppendElement( + MP4DataOffset(mIndex.Length() - 1, indice.start_offset), + fallible)) { + // OOM. + return; + } + intervalTime = media::IntervalSet<TimeUnit>(); + intervalRange = MediaByteRange(); + } + intervalTime += media::Interval<TimeUnit>(sample.mCompositionRange.start, + sample.mCompositionRange.end); + intervalRange = intervalRange.Span(sample.mByteRange); + } + + if (mDataOffset.Length() && progressive) { + Indice indice; + if (!aIndices.GetIndice(aIndices.Length() - 1, indice)) { + return; + } + auto& last = mDataOffset.LastElement(); + last.mEndOffset = indice.end_offset; + last.mTime = + MP4Interval<TimeUnit>(intervalTime.GetStart(), intervalTime.GetEnd()); + } else { + mDataOffset.Clear(); + } + } +} + +MP4SampleIndex::~MP4SampleIndex() = default; + +void MP4SampleIndex::UpdateMoofIndex(const MediaByteRangeSet& aByteRanges) { + UpdateMoofIndex(aByteRanges, false); +} + +void MP4SampleIndex::UpdateMoofIndex(const MediaByteRangeSet& aByteRanges, + bool aCanEvict) { + if (!mMoofParser) { + return; + } + size_t moofs = mMoofParser->Moofs().Length(); + bool canEvict = aCanEvict && moofs > 1; + if (canEvict) { + // Check that we can trim the mMoofParser. We can only do so if all + // iterators have demuxed all possible samples. + for (const SampleIterator* iterator : mIterators) { + if ((iterator->mCurrentSample == 0 && iterator->mCurrentMoof == moofs) || + iterator->mCurrentMoof == moofs - 1) { + continue; + } + canEvict = false; + break; + } + } + mMoofParser->RebuildFragmentedIndex(aByteRanges, &canEvict); + if (canEvict) { + // The moofparser got trimmed. Adjust all registered iterators. + for (SampleIterator* iterator : mIterators) { + iterator->mCurrentMoof -= moofs - 1; + } + } +} + +TimeUnit MP4SampleIndex::GetEndCompositionIfBuffered( + const MediaByteRangeSet& aByteRanges) { + FallibleTArray<Sample>* index; + if (mMoofParser) { + int64_t base = mMoofParser->mMdhd.mTimescale; + if (!mMoofParser->ReachedEnd() || mMoofParser->Moofs().IsEmpty()) { + return TimeUnit::Zero(base); + } + index = &mMoofParser->Moofs().LastElement().mIndex; + } else { + index = &mIndex; + } + + int64_t base = mMoofParser->mMdhd.mTimescale; + media::TimeUnit lastComposition = TimeUnit::Zero(base); + RangeFinder rangeFinder(aByteRanges); + for (size_t i = index->Length(); i--;) { + const Sample& sample = (*index)[i]; + if (!rangeFinder.Contains(sample.mByteRange)) { + return TimeUnit::Zero(base); + } + lastComposition = std::max(lastComposition, sample.mCompositionRange.end); + if (sample.mSync) { + return lastComposition; + } + } + return TimeUnit::Zero(base); +} + +TimeIntervals MP4SampleIndex::ConvertByteRangesToTimeRanges( + const MediaByteRangeSet& aByteRanges) { + if (aByteRanges == mLastCachedRanges) { + return mLastBufferedRanges; + } + mLastCachedRanges = aByteRanges; + + if (mDataOffset.Length()) { + TimeIntervals timeRanges; + for (const auto& range : aByteRanges) { + uint32_t start = mDataOffset.IndexOfFirstElementGt(range.mStart - 1); + if (!mIsAudio && start == mDataOffset.Length()) { + continue; + } + uint32_t end = mDataOffset.IndexOfFirstElementGt( + range.mEnd, MP4DataOffset::EndOffsetComparator()); + if (!mIsAudio && end < start) { + continue; + } + if (mIsAudio && start && + range.Intersects(MediaByteRange(mDataOffset[start - 1].mStartOffset, + mDataOffset[start - 1].mEndOffset))) { + // Check if previous audio data block contains some available samples. + for (size_t i = mDataOffset[start - 1].mIndex; i < mIndex.Length(); + i++) { + if (range.ContainsStrict(mIndex[i].mByteRange)) { + timeRanges += TimeInterval(mIndex[i].mCompositionRange.start, + mIndex[i].mCompositionRange.end); + } + } + } + if (end > start) { + for (uint32_t i = start; i < end; i++) { + timeRanges += TimeInterval(mDataOffset[i].mTime.start, + mDataOffset[i].mTime.end); + } + } + if (end < mDataOffset.Length()) { + // Find samples in partial block contained in the byte range. + for (size_t i = mDataOffset[end].mIndex; + i < mIndex.Length() && range.ContainsStrict(mIndex[i].mByteRange); + i++) { + timeRanges += TimeInterval(mIndex[i].mCompositionRange.start, + mIndex[i].mCompositionRange.end); + } + } + } + mLastBufferedRanges = timeRanges; + return timeRanges; + } + + RangeFinder rangeFinder(aByteRanges); + nsTArray<MP4Interval<media::TimeUnit>> timeRanges; + nsTArray<FallibleTArray<Sample>*> indexes; + if (mMoofParser) { + // We take the index out of the moof parser and move it into a local + // variable so we don't get concurrency issues. It gets freed when we + // exit this function. + for (int i = 0; i < mMoofParser->Moofs().Length(); i++) { + Moof& moof = mMoofParser->Moofs()[i]; + + // We need the entire moof in order to play anything + if (rangeFinder.Contains(moof.mRange)) { + if (rangeFinder.Contains(moof.mMdatRange)) { + MP4Interval<media::TimeUnit>::SemiNormalAppend(timeRanges, + moof.mTimeRange); + } else { + indexes.AppendElement(&moof.mIndex); + } + } + } + } else { + indexes.AppendElement(&mIndex); + } + + bool hasSync = false; + for (size_t i = 0; i < indexes.Length(); i++) { + FallibleTArray<Sample>* index = indexes[i]; + for (size_t j = 0; j < index->Length(); j++) { + const Sample& sample = (*index)[j]; + if (!rangeFinder.Contains(sample.mByteRange)) { + // We process the index in decode order so we clear hasSync when we hit + // a range that isn't buffered. + hasSync = false; + continue; + } + + hasSync |= sample.mSync; + if (!hasSync) { + continue; + } + + MP4Interval<media::TimeUnit>::SemiNormalAppend(timeRanges, + sample.mCompositionRange); + } + } + + // This fixes up when the compositon order differs from the byte range order + nsTArray<MP4Interval<TimeUnit>> timeRangesNormalized; + MP4Interval<media::TimeUnit>::Normalize(timeRanges, &timeRangesNormalized); + // convert timeRanges. + media::TimeIntervals ranges; + for (size_t i = 0; i < timeRangesNormalized.Length(); i++) { + ranges += media::TimeInterval(timeRangesNormalized[i].start, + timeRangesNormalized[i].end); + } + mLastBufferedRanges = ranges; + return ranges; +} + +uint64_t MP4SampleIndex::GetEvictionOffset(const TimeUnit& aTime) { + uint64_t offset = std::numeric_limits<uint64_t>::max(); + if (mMoofParser) { + // We need to keep the whole moof if we're keeping any of it because the + // parser doesn't keep parsed moofs. + for (int i = 0; i < mMoofParser->Moofs().Length(); i++) { + Moof& moof = mMoofParser->Moofs()[i]; + + if (!moof.mTimeRange.Length().IsZero() && moof.mTimeRange.end > aTime) { + offset = std::min(offset, uint64_t(std::min(moof.mRange.mStart, + moof.mMdatRange.mStart))); + } + } + } else { + // We've already parsed and stored the moov so we don't need to keep it. + // All we need to keep is the sample data itself. + for (size_t i = 0; i < mIndex.Length(); i++) { + const Sample& sample = mIndex[i]; + if (aTime >= sample.mCompositionRange.end) { + offset = std::min(offset, uint64_t(sample.mByteRange.mEnd)); + } + } + } + return offset; +} + +void MP4SampleIndex::RegisterIterator(SampleIterator* aIterator) { + mIterators.AppendElement(aIterator); +} + +void MP4SampleIndex::UnregisterIterator(SampleIterator* aIterator) { + mIterators.RemoveElement(aIterator); +} + +} // namespace mozilla diff --git a/dom/media/mp4/SampleIterator.h b/dom/media/mp4/SampleIterator.h new file mode 100644 index 0000000000..61b60df6af --- /dev/null +++ b/dom/media/mp4/SampleIterator.h @@ -0,0 +1,134 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef DOM_MEDIA_MP4_SAMPLE_ITERATOR_H_ +#define DOM_MEDIA_MP4_SAMPLE_ITERATOR_H_ + +#include "ByteStream.h" +#include "MediaData.h" +#include "MediaResource.h" +#include "MoofParser.h" +#include "mozilla/ResultVariant.h" +#include "MP4Interval.h" +#include "nsISupportsImpl.h" +#include "TimeUnits.h" + +namespace mozilla { + +struct CencSampleEncryptionInfoEntry; +class IndiceWrapper; +class MP4SampleIndex; +struct Sample; + +class SampleIterator { + public: + explicit SampleIterator(MP4SampleIndex* aIndex); + ~SampleIterator(); + bool HasNext(); + already_AddRefed<mozilla::MediaRawData> GetNext(); + void Seek(const media::TimeUnit& aTime); + media::TimeUnit GetNextKeyframeTime(); + + private: + Sample* Get(); + + // Gets the sample description entry for the current moof, or nullptr if + // called without a valid current moof. + SampleDescriptionEntry* GetSampleDescriptionEntry(); + CencSampleEncryptionInfoEntry* GetSampleEncryptionEntry(); + + // Determines the encryption scheme in use for the current sample. If the + // the scheme cannot be unambiguously determined, will return an error with + // the reason. + // + // Returns: Ok(CryptoScheme) if a crypto scheme, including None, can be + // determined, or Err(nsCString) if there is an issue determining the scheme. + Result<CryptoScheme, nsCString> GetEncryptionScheme(); + + void Next(); + RefPtr<MP4SampleIndex> mIndex; + friend class MP4SampleIndex; + size_t mCurrentMoof; + size_t mCurrentSample; +}; + +class MP4SampleIndex { + public: + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MP4SampleIndex) + + struct Indice { + uint64_t start_offset; + uint64_t end_offset; + int64_t start_composition; + int64_t end_composition; + int64_t start_decode; + bool sync; + }; + + struct MP4DataOffset { + MP4DataOffset(uint32_t aIndex, int64_t aStartOffset) + : mIndex(aIndex), mStartOffset(aStartOffset), mEndOffset(0) {} + + bool operator==(int64_t aStartOffset) const { + return mStartOffset == aStartOffset; + } + + bool operator!=(int64_t aStartOffset) const { + return mStartOffset != aStartOffset; + } + + bool operator<(int64_t aStartOffset) const { + return mStartOffset < aStartOffset; + } + + struct EndOffsetComparator { + bool Equals(const MP4DataOffset& a, const int64_t& b) const { + return a.mEndOffset == b; + } + + bool LessThan(const MP4DataOffset& a, const int64_t& b) const { + return a.mEndOffset < b; + } + }; + + uint32_t mIndex; + int64_t mStartOffset; + int64_t mEndOffset; + MP4Interval<media::TimeUnit> mTime; + }; + + MP4SampleIndex(const mozilla::IndiceWrapper& aIndices, ByteStream* aSource, + uint32_t aTrackId, bool aIsAudio, uint32_t aTimeScale); + + void UpdateMoofIndex(const mozilla::MediaByteRangeSet& aByteRanges, + bool aCanEvict); + void UpdateMoofIndex(const mozilla::MediaByteRangeSet& aByteRanges); + media::TimeUnit GetEndCompositionIfBuffered( + const mozilla::MediaByteRangeSet& aByteRanges); + mozilla::media::TimeIntervals ConvertByteRangesToTimeRanges( + const mozilla::MediaByteRangeSet& aByteRanges); + uint64_t GetEvictionOffset(const media::TimeUnit& aTime); + bool IsFragmented() { return !!mMoofParser; } + + friend class SampleIterator; + + private: + ~MP4SampleIndex(); + void RegisterIterator(SampleIterator* aIterator); + void UnregisterIterator(SampleIterator* aIterator); + + ByteStream* mSource; + FallibleTArray<Sample> mIndex; + FallibleTArray<MP4DataOffset> mDataOffset; + UniquePtr<MoofParser> mMoofParser; + nsTArray<SampleIterator*> mIterators; + + // ConvertByteRangesToTimeRanges cache + mozilla::MediaByteRangeSet mLastCachedRanges; + mozilla::media::TimeIntervals mLastBufferedRanges; + bool mIsAudio; +}; +} // namespace mozilla + +#endif diff --git a/dom/media/mp4/SinfParser.cpp b/dom/media/mp4/SinfParser.cpp new file mode 100644 index 0000000000..660bf30246 --- /dev/null +++ b/dom/media/mp4/SinfParser.cpp @@ -0,0 +1,96 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/Try.h" +#include "mozilla/Unused.h" +#include "SinfParser.h" +#include "AtomType.h" +#include "Box.h" +#include "ByteStream.h" + +namespace mozilla { + +Sinf::Sinf(Box& aBox) : mDefaultIVSize(0) { + SinfParser parser(aBox); + if (parser.GetSinf().IsValid()) { + *this = parser.GetSinf(); + } +} + +SinfParser::SinfParser(Box& aBox) { + for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { + if (box.IsType("schm")) { + mozilla::Unused << ParseSchm(box); + } else if (box.IsType("schi")) { + mozilla::Unused << ParseSchi(box); + } + } +} + +Result<Ok, nsresult> SinfParser::ParseSchm(Box& aBox) { + BoxReader reader(aBox); + + if (reader->Remaining() < 8) { + return Err(NS_ERROR_FAILURE); + } + + MOZ_TRY(reader->ReadU32()); // flags -- ignore + MOZ_TRY_VAR(mSinf.mDefaultEncryptionType, reader->ReadU32()); + return Ok(); +} + +Result<Ok, nsresult> SinfParser::ParseSchi(Box& aBox) { + for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { + if (box.IsType("tenc") && ParseTenc(box).isErr()) { + return Err(NS_ERROR_FAILURE); + } + } + return Ok(); +} + +Result<Ok, nsresult> SinfParser::ParseTenc(Box& aBox) { + BoxReader reader(aBox); + + if (reader->Remaining() < 24) { + return Err(NS_ERROR_FAILURE); + } + + uint32_t flags; + MOZ_TRY_VAR(flags, reader->ReadU32()); + uint8_t version = flags >> 24; + + // Skip reserved byte + MOZ_TRY(reader->ReadU8()); + if (version >= 1) { + uint8_t pattern; + MOZ_TRY_VAR(pattern, reader->ReadU8()); + mSinf.mDefaultCryptByteBlock = pattern >> 4; + mSinf.mDefaultSkipByteBlock = pattern & 0x0f; + } else { + // Reserved if version is less than 1 + MOZ_TRY(reader->ReadU8()); + mSinf.mDefaultCryptByteBlock = 0; + mSinf.mDefaultSkipByteBlock = 0; + } + + uint8_t isEncrypted; + MOZ_TRY_VAR(isEncrypted, reader->ReadU8()); + MOZ_TRY_VAR(mSinf.mDefaultIVSize, reader->ReadU8()); + memcpy(mSinf.mDefaultKeyID, reader->Read(16), 16); + + if (isEncrypted && mSinf.mDefaultIVSize == 0) { + uint8_t defaultConstantIVSize; + MOZ_TRY_VAR(defaultConstantIVSize, reader->ReadU8()); + if (!mSinf.mDefaultConstantIV.SetLength(defaultConstantIVSize, + mozilla::fallible)) { + return Err(NS_ERROR_FAILURE); + } + for (uint8_t i = 0; i < defaultConstantIVSize; i++) { + MOZ_TRY_VAR(mSinf.mDefaultConstantIV.ElementAt(i), reader->ReadU8()); + } + } + return Ok(); +} + +} // namespace mozilla diff --git a/dom/media/mp4/SinfParser.h b/dom/media/mp4/SinfParser.h new file mode 100644 index 0000000000..d0a09a1d98 --- /dev/null +++ b/dom/media/mp4/SinfParser.h @@ -0,0 +1,56 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef SINF_PARSER_H_ +#define SINF_PARSER_H_ + +#include "mozilla/ResultExtensions.h" +#include "Atom.h" +#include "AtomType.h" +#include "nsTArray.h" + +namespace mozilla { + +class Box; + +class Sinf : public Atom { + public: + Sinf() + : mDefaultIVSize(0), + + mDefaultCryptByteBlock(0), + mDefaultSkipByteBlock(0) {} + explicit Sinf(Box& aBox); + + bool IsValid() override { + return !!mDefaultEncryptionType && // Should have an encryption scheme + (mDefaultIVSize > 0 || // and either a default IV size + mDefaultConstantIV.Length() > 0); // or a constant IV. + } + + uint8_t mDefaultIVSize; + AtomType mDefaultEncryptionType; + uint8_t mDefaultKeyID[16]; + uint8_t mDefaultCryptByteBlock; + uint8_t mDefaultSkipByteBlock; + CopyableTArray<uint8_t> mDefaultConstantIV; +}; + +class SinfParser { + public: + explicit SinfParser(Box& aBox); + + Sinf& GetSinf() { return mSinf; } + + private: + Result<Ok, nsresult> ParseSchm(Box& aBox); + Result<Ok, nsresult> ParseSchi(Box& aBox); + Result<Ok, nsresult> ParseTenc(Box& aBox); + + Sinf mSinf; +}; + +} // namespace mozilla + +#endif // SINF_PARSER_H_ diff --git a/dom/media/mp4/moz.build b/dom/media/mp4/moz.build new file mode 100644 index 0000000000..48fce2a040 --- /dev/null +++ b/dom/media/mp4/moz.build @@ -0,0 +1,45 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +EXPORTS += [ + "Atom.h", + "AtomType.h", + "Box.h", + "BufferStream.h", + "ByteStream.h", + "DecoderData.h", + "MoofParser.h", + "MP4Decoder.h", + "MP4Demuxer.h", + "MP4Interval.h", + "MP4Metadata.h", + "ResourceStream.h", + "SampleIterator.h", + "SinfParser.h", +] + +UNIFIED_SOURCES += [ + "Box.cpp", + "BufferStream.cpp", + "DecoderData.cpp", + "MoofParser.cpp", + "MP4Decoder.cpp", + "MP4Demuxer.cpp", + "MP4Metadata.cpp", + "ResourceStream.cpp", + "SampleIterator.cpp", + "SinfParser.cpp", +] + +FINAL_LIBRARY = "xul" + +# Suppress warnings for now. +CXXFLAGS += [ + "-Wno-sign-compare", +] + +# Add libFuzzer configuration directives +include("/tools/fuzzing/libfuzzer-config.mozbuild") |