summaryrefslogtreecommitdiffstats
path: root/dom/media/mp4/MoofParser.h
diff options
context:
space:
mode:
Diffstat (limited to 'dom/media/mp4/MoofParser.h')
-rw-r--r--dom/media/mp4/MoofParser.h364
1 files changed, 364 insertions, 0 deletions
diff --git a/dom/media/mp4/MoofParser.h b/dom/media/mp4/MoofParser.h
new file mode 100644
index 0000000000..9099df7d14
--- /dev/null
+++ b/dom/media/mp4/MoofParser.h
@@ -0,0 +1,364 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOOF_PARSER_H_
+#define MOOF_PARSER_H_
+
+#include "mozilla/ResultExtensions.h"
+#include "mozilla/Variant.h"
+#include "Atom.h"
+#include "AtomType.h"
+#include "SinfParser.h"
+#include "ByteStream.h"
+#include "MP4Interval.h"
+#include "MediaResource.h"
+
+namespace mozilla {
+
+typedef int64_t Microseconds;
+
+class Box;
+class BoxContext;
+class BoxReader;
+class Moof;
+
+// Used to track the CTS end time of the last sample of a track
+// in the preceeding Moof, so that we can smooth tracks' timestamps
+// across Moofs.
+struct TrackEndCts {
+ TrackEndCts(uint32_t aTrackId, Microseconds aCtsEndTime)
+ : mTrackId(aTrackId), mCtsEndTime(aCtsEndTime) {}
+ uint32_t mTrackId;
+ Microseconds mCtsEndTime;
+};
+
+class Mvhd : public Atom {
+ public:
+ Mvhd()
+ : mCreationTime(0), mModificationTime(0), mTimescale(0), mDuration(0) {}
+ explicit Mvhd(Box& aBox);
+
+ Result<Microseconds, nsresult> ToMicroseconds(int64_t aTimescaleUnits) {
+ if (!mTimescale) {
+ NS_WARNING("invalid mTimescale");
+ return Err(NS_ERROR_FAILURE);
+ }
+ int64_t major = aTimescaleUnits / mTimescale;
+ int64_t remainder = aTimescaleUnits % mTimescale;
+ return major * 1000000ll + remainder * 1000000ll / mTimescale;
+ }
+
+ uint64_t mCreationTime;
+ uint64_t mModificationTime;
+ uint32_t mTimescale;
+ uint64_t mDuration;
+
+ protected:
+ Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+class Tkhd : public Mvhd {
+ public:
+ Tkhd() : mTrackId(0) {}
+ explicit Tkhd(Box& aBox);
+
+ uint32_t mTrackId;
+
+ protected:
+ Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+class Mdhd : public Mvhd {
+ public:
+ Mdhd() = default;
+ explicit Mdhd(Box& aBox);
+};
+
+class Trex : public Atom {
+ public:
+ explicit Trex(uint32_t aTrackId)
+ : mFlags(0),
+ mTrackId(aTrackId),
+ mDefaultSampleDescriptionIndex(0),
+ mDefaultSampleDuration(0),
+ mDefaultSampleSize(0),
+ mDefaultSampleFlags(0) {}
+
+ explicit Trex(Box& aBox);
+
+ uint32_t mFlags;
+ uint32_t mTrackId;
+ uint32_t mDefaultSampleDescriptionIndex;
+ uint32_t mDefaultSampleDuration;
+ uint32_t mDefaultSampleSize;
+ uint32_t mDefaultSampleFlags;
+
+ protected:
+ Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+class Tfhd : public Trex {
+ public:
+ explicit Tfhd(Trex& aTrex) : Trex(aTrex), mBaseDataOffset(0) {
+ mValid = aTrex.IsValid();
+ }
+ Tfhd(Box& aBox, Trex& aTrex);
+
+ uint64_t mBaseDataOffset;
+
+ protected:
+ Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+class Tfdt : public Atom {
+ public:
+ Tfdt() : mBaseMediaDecodeTime(0) {}
+ explicit Tfdt(Box& aBox);
+
+ uint64_t mBaseMediaDecodeTime;
+
+ protected:
+ Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+class Edts : public Atom {
+ public:
+ Edts() : mMediaStart(0), mEmptyOffset(0) {}
+ explicit Edts(Box& aBox);
+ virtual bool IsValid() override {
+ // edts is optional
+ return true;
+ }
+
+ int64_t mMediaStart;
+ int64_t mEmptyOffset;
+
+ protected:
+ Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+struct Sample {
+ mozilla::MediaByteRange mByteRange;
+ mozilla::MediaByteRange mCencRange;
+ Microseconds mDecodeTime;
+ MP4Interval<Microseconds> mCompositionRange;
+ bool mSync;
+};
+
+class Saiz final : public Atom {
+ public:
+ Saiz(Box& aBox, AtomType aDefaultType);
+
+ AtomType mAuxInfoType;
+ uint32_t mAuxInfoTypeParameter;
+ FallibleTArray<uint8_t> mSampleInfoSize;
+
+ protected:
+ Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+class Saio final : public Atom {
+ public:
+ Saio(Box& aBox, AtomType aDefaultType);
+
+ AtomType mAuxInfoType;
+ uint32_t mAuxInfoTypeParameter;
+ FallibleTArray<uint64_t> mOffsets;
+
+ protected:
+ Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+struct SampleToGroupEntry {
+ public:
+ static const uint32_t kTrackGroupDescriptionIndexBase = 0;
+ static const uint32_t kFragmentGroupDescriptionIndexBase = 0x10000;
+
+ SampleToGroupEntry(uint32_t aSampleCount, uint32_t aGroupDescriptionIndex)
+ : mSampleCount(aSampleCount),
+ mGroupDescriptionIndex(aGroupDescriptionIndex) {}
+
+ uint32_t mSampleCount;
+ uint32_t mGroupDescriptionIndex;
+};
+
+class Sbgp final : public Atom // SampleToGroup box.
+{
+ public:
+ explicit Sbgp(Box& aBox);
+
+ AtomType mGroupingType;
+ uint32_t mGroupingTypeParam;
+ FallibleTArray<SampleToGroupEntry> mEntries;
+
+ protected:
+ Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+// Stores information form CencSampleEncryptionInformationGroupEntry (seig).
+// Cenc here refers to the common encryption standard, rather than the specific
+// cenc scheme from that standard. This structure is used for all encryption
+// schemes. I.e. it is used for both cenc and cbcs, not just cenc.
+struct CencSampleEncryptionInfoEntry final {
+ public:
+ CencSampleEncryptionInfoEntry() = default;
+
+ Result<Ok, nsresult> Init(BoxReader& aReader);
+
+ bool mIsEncrypted = false;
+ uint8_t mIVSize = 0;
+ CopyableTArray<uint8_t> mKeyId;
+ uint8_t mCryptByteBlock = 0;
+ uint8_t mSkipByteBlock = 0;
+ CopyableTArray<uint8_t> mConsantIV;
+};
+
+class Sgpd final : public Atom // SampleGroupDescription box.
+{
+ public:
+ explicit Sgpd(Box& aBox);
+
+ AtomType mGroupingType;
+ FallibleTArray<CencSampleEncryptionInfoEntry> mEntries;
+
+ protected:
+ Result<Ok, nsresult> Parse(Box& aBox);
+};
+
+// Audio/video entries from the sample description box (stsd). We only need to
+// store if these are encrypted, so do not need a specialized class for
+// different audio and video data. Currently most of the parsing of these
+// entries is by the mp4parse-rust, but moof pasrser needs to know which of
+// these are encrypted when parsing the track fragment header (tfhd).
+struct SampleDescriptionEntry {
+ bool mIsEncryptedEntry = false;
+};
+
+// Used to indicate in variants if all tracks should be parsed.
+struct ParseAllTracks {};
+
+typedef Variant<ParseAllTracks, uint32_t> TrackParseMode;
+
+class Moof final : public Atom {
+ public:
+ Moof(Box& aBox, const TrackParseMode& aTrackParseMode, Trex& aTrex,
+ Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf,
+ uint64_t* aDecodeTime, bool aIsAudio,
+ nsTArray<TrackEndCts>& aTracksEndCts);
+ bool GetAuxInfo(AtomType aType, FallibleTArray<MediaByteRange>* aByteRanges);
+ void FixRounding(const Moof& aMoof);
+
+ mozilla::MediaByteRange mRange;
+ mozilla::MediaByteRange mMdatRange;
+ MP4Interval<Microseconds> mTimeRange;
+ FallibleTArray<Sample> mIndex;
+
+ FallibleTArray<CencSampleEncryptionInfoEntry>
+ mFragmentSampleEncryptionInfoEntries;
+ FallibleTArray<SampleToGroupEntry> mFragmentSampleToGroupEntries;
+
+ Tfhd mTfhd;
+ FallibleTArray<Saiz> mSaizs;
+ FallibleTArray<Saio> mSaios;
+ nsTArray<nsTArray<uint8_t>> mPsshes;
+
+ private:
+ // aDecodeTime is updated to the end of the parsed TRAF on return.
+ void ParseTraf(Box& aBox, const TrackParseMode& aTrackParseMode, Trex& aTrex,
+ Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf,
+ uint64_t* aDecodeTime, bool aIsAudio);
+ // aDecodeTime is updated to the end of the parsed TRUN on return.
+ Result<Ok, nsresult> ParseTrun(Box& aBox, Mvhd& aMvhd, Mdhd& aMdhd,
+ Edts& aEdts, uint64_t* aDecodeTime,
+ bool aIsAudio);
+ // Process the sample auxiliary information used by common encryption.
+ // aScheme is used to select the appropriate auxiliary information and should
+ // be set based on the encryption scheme used by the track being processed.
+ // Note, the term cenc here refers to the standard, not the specific scheme
+ // from that standard. I.e. this function is used to handle up auxiliary
+ // information from the cenc and cbcs schemes.
+ bool ProcessCencAuxInfo(AtomType aScheme);
+ uint64_t mMaxRoundingError;
+};
+
+DDLoggedTypeDeclName(MoofParser);
+
+class MoofParser : public DecoderDoctorLifeLogger<MoofParser> {
+ public:
+ MoofParser(ByteStream* aSource, const TrackParseMode& aTrackParseMode,
+ bool aIsAudio)
+ : mSource(aSource),
+ mOffset(0),
+ mTrex(aTrackParseMode.is<uint32_t>() ? aTrackParseMode.as<uint32_t>()
+ : 0),
+ mIsAudio(aIsAudio),
+ mLastDecodeTime(0),
+ mTrackParseMode(aTrackParseMode) {
+ // Setting mIsMultitrackParser is a nasty work around for calculating
+ // the composition range for MSE that causes the parser to parse multiple
+ // tracks. Ideally we'd store an array of tracks with different metadata
+ // for each.
+ DDLINKCHILD("source", aSource);
+ }
+ bool RebuildFragmentedIndex(const mozilla::MediaByteRangeSet& aByteRanges);
+ // If *aCanEvict is set to true. then will remove all moofs already parsed
+ // from index then rebuild the index. *aCanEvict is set to true upon return if
+ // some moofs were removed.
+ bool RebuildFragmentedIndex(const mozilla::MediaByteRangeSet& aByteRanges,
+ bool* aCanEvict);
+ bool RebuildFragmentedIndex(BoxContext& aContext);
+ MP4Interval<Microseconds> GetCompositionRange(
+ const mozilla::MediaByteRangeSet& aByteRanges);
+ bool ReachedEnd();
+ void ParseMoov(Box& aBox);
+ void ParseTrak(Box& aBox);
+ void ParseMdia(Box& aBox);
+ void ParseMvex(Box& aBox);
+
+ void ParseMinf(Box& aBox);
+ void ParseStbl(Box& aBox);
+ void ParseStsd(Box& aBox);
+ void ParseEncrypted(Box& aBox);
+
+ bool BlockingReadNextMoof();
+
+ already_AddRefed<mozilla::MediaByteBuffer> Metadata();
+ MediaByteRange FirstCompleteMediaSegment();
+ MediaByteRange FirstCompleteMediaHeader();
+
+ mozilla::MediaByteRange mInitRange;
+ RefPtr<ByteStream> mSource;
+ uint64_t mOffset;
+ Mvhd mMvhd;
+ Mdhd mMdhd;
+ Trex mTrex;
+ Tfdt mTfdt;
+ Edts mEdts;
+ Sinf mSinf;
+
+ FallibleTArray<CencSampleEncryptionInfoEntry>
+ mTrackSampleEncryptionInfoEntries;
+ FallibleTArray<SampleToGroupEntry> mTrackSampleToGroupEntries;
+ FallibleTArray<SampleDescriptionEntry> mSampleDescriptions;
+
+ nsTArray<Moof>& Moofs() { return mMoofs; }
+
+ private:
+ void ScanForMetadata(mozilla::MediaByteRange& aMoov);
+ nsTArray<Moof> mMoofs;
+ nsTArray<MediaByteRange> mMediaRanges;
+ nsTArray<TrackEndCts> mTracksEndCts;
+ bool mIsAudio;
+ uint64_t mLastDecodeTime;
+ // Either a ParseAllTracks if in multitrack mode, or an integer representing
+ // the track_id for the track being parsed. If parsing a specific track, mTrex
+ // should have an id matching mTrackParseMode.as<uint32_t>(). In this case 0
+ // is a valid track id -- this is not allowed in the spec, but such mp4s
+ // appear in the wild. In the ParseAllTracks case, mTrex can have an arbitrary
+ // id based on the tracks being parsed.
+ const TrackParseMode mTrackParseMode;
+};
+} // namespace mozilla
+
+#endif