From 6bf0a5cb5034a7e684dcc3500e841785237ce2dd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 19:32:43 +0200 Subject: Adding upstream version 1:115.7.0. Signed-off-by: Daniel Baumann --- dom/media/mp4/DecoderData.cpp | 357 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 357 insertions(+) create mode 100644 dom/media/mp4/DecoderData.cpp (limited to 'dom/media/mp4/DecoderData.cpp') diff --git a/dom/media/mp4/DecoderData.cpp b/dom/media/mp4/DecoderData.cpp new file mode 100644 index 0000000000..b7c9c86954 --- /dev/null +++ b/dom/media/mp4/DecoderData.cpp @@ -0,0 +1,357 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "Adts.h" +#include "AnnexB.h" +#include "BufferReader.h" +#include "DecoderData.h" +#include "mozilla/ArrayUtils.h" +#include "mozilla/EndianUtils.h" +#include "mozilla/Telemetry.h" +#include "VideoUtils.h" +#include "MP4Metadata.h" +#include "mozilla/Logging.h" + +// OpusDecoder header is really needed only by MP4 in rust +#include "OpusDecoder.h" +#include "mp4parse.h" + +#define LOG(...) \ + MOZ_LOG(gMP4MetadataLog, mozilla::LogLevel::Debug, (__VA_ARGS__)) + +using mozilla::media::TimeUnit; + +namespace mozilla { + +mozilla::Result CryptoFile::DoUpdate( + const uint8_t* aData, size_t aLength) { + BufferReader reader(aData, aLength); + while (reader.Remaining()) { + PsshInfo psshInfo; + if (!reader.ReadArray(psshInfo.uuid, 16)) { + return mozilla::Err(NS_ERROR_FAILURE); + } + + if (!reader.CanReadType()) { + return mozilla::Err(NS_ERROR_FAILURE); + } + auto length = reader.ReadType(); + + if (!reader.ReadArray(psshInfo.data, length)) { + return mozilla::Err(NS_ERROR_FAILURE); + } + pssh.AppendElement(std::move(psshInfo)); + } + return mozilla::Ok(); +} + +static MediaResult UpdateTrackProtectedInfo(mozilla::TrackInfo& aConfig, + const Mp4parseSinfInfo& aSinf) { + if (aSinf.is_encrypted != 0) { + if (aSinf.scheme_type == MP4_PARSE_ENCRYPTION_SCHEME_TYPE_CENC) { + aConfig.mCrypto.mCryptoScheme = CryptoScheme::Cenc; + } else if (aSinf.scheme_type == MP4_PARSE_ENCRYPTION_SCHEME_TYPE_CBCS) { + aConfig.mCrypto.mCryptoScheme = CryptoScheme::Cbcs; + } else { + // Unsupported encryption type; + return MediaResult( + NS_ERROR_DOM_MEDIA_METADATA_ERR, + RESULT_DETAIL( + "Unsupported encryption scheme encountered aSinf.scheme_type=%d", + static_cast(aSinf.scheme_type))); + } + aConfig.mCrypto.mIVSize = aSinf.iv_size; + aConfig.mCrypto.mKeyId.AppendElements(aSinf.kid.data, aSinf.kid.length); + aConfig.mCrypto.mCryptByteBlock = aSinf.crypt_byte_block; + aConfig.mCrypto.mSkipByteBlock = aSinf.skip_byte_block; + aConfig.mCrypto.mConstantIV.AppendElements(aSinf.constant_iv.data, + aSinf.constant_iv.length); + } + return NS_OK; +} + +// Verify various information shared by Mp4ParseTrackAudioInfo and +// Mp4ParseTrackVideoInfo and record telemetry on that info. Returns an +// appropriate MediaResult indicating if the info is valid or not. +// This verifies: +// - That we have a sample_info_count > 0 (valid tracks should have at least one +// sample description entry) +// - That only a single codec is used across all sample infos, as we don't +// handle multiple. +// - If more than one sample information structures contain crypto info. This +// case is not fatal (we don't return an error), but does record telemetry +// to help judge if we need more handling in gecko for multiple crypto. +// +// Telemetry is also recorded on the above. As of writing, the +// telemetry is recorded to give us early warning if MP4s exist that we're not +// handling. Note, if adding new checks and telemetry to this function, +// telemetry should be recorded before returning to ensure it is gathered. +template +static MediaResult VerifyAudioOrVideoInfoAndRecordTelemetry( + Mp4ParseTrackAudioOrVideoInfo* audioOrVideoInfo) { + Telemetry::Accumulate( + Telemetry::MEDIA_MP4_PARSE_NUM_SAMPLE_DESCRIPTION_ENTRIES, + audioOrVideoInfo->sample_info_count); + + bool hasMultipleCodecs = false; + uint32_t cryptoCount = 0; + Mp4parseCodec codecType = audioOrVideoInfo->sample_info[0].codec_type; + for (uint32_t i = 0; i < audioOrVideoInfo->sample_info_count; i++) { + if (audioOrVideoInfo->sample_info[0].codec_type != codecType) { + hasMultipleCodecs = true; + } + + // Update our encryption info if any is present on the sample info. + if (audioOrVideoInfo->sample_info[i].protected_data.is_encrypted) { + cryptoCount += 1; + } + } + + Telemetry::Accumulate( + Telemetry:: + MEDIA_MP4_PARSE_SAMPLE_DESCRIPTION_ENTRIES_HAVE_MULTIPLE_CODECS, + hasMultipleCodecs); + + // Accumulate if we have multiple (2 or more) crypto entries. + // TODO(1715283): rework this to count number of crypto entries + gather + // richer data. + Telemetry::Accumulate( + Telemetry:: + MEDIA_MP4_PARSE_SAMPLE_DESCRIPTION_ENTRIES_HAVE_MULTIPLE_CRYPTO, + cryptoCount >= 2); + + if (audioOrVideoInfo->sample_info_count == 0) { + return MediaResult( + NS_ERROR_DOM_MEDIA_METADATA_ERR, + RESULT_DETAIL("Got 0 sample info while verifying track.")); + } + + if (hasMultipleCodecs) { + // Different codecs in a single track. We don't handle this. + return MediaResult( + NS_ERROR_DOM_MEDIA_METADATA_ERR, + RESULT_DETAIL("Multiple codecs encountered while verifying track.")); + } + + return NS_OK; +} + +MediaResult MP4AudioInfo::Update(const Mp4parseTrackInfo* aTrack, + const Mp4parseTrackAudioInfo* aAudio, + const IndiceWrapper* aIndices) { + auto rv = VerifyAudioOrVideoInfoAndRecordTelemetry(aAudio); + NS_ENSURE_SUCCESS(rv, rv); + + Mp4parseCodec codecType = aAudio->sample_info[0].codec_type; + for (uint32_t i = 0; i < aAudio->sample_info_count; i++) { + if (aAudio->sample_info[i].protected_data.is_encrypted) { + auto rv = UpdateTrackProtectedInfo(*this, + aAudio->sample_info[i].protected_data); + NS_ENSURE_SUCCESS(rv, rv); + break; + } + } + + // We assume that the members of the first sample info are representative of + // the entire track. This code will need to be updated should this assumption + // ever not hold. E.g. if we need to handle different codecs in a single + // track, or if we have different numbers or channels in a single track. + Mp4parseByteData mp4ParseSampleCodecSpecific = + aAudio->sample_info[0].codec_specific_config; + Mp4parseByteData extraData = aAudio->sample_info[0].extra_data; + MOZ_ASSERT(mCodecSpecificConfig.is(), + "Should have no codec specific data yet"); + if (codecType == MP4PARSE_CODEC_OPUS) { + mMimeType = "audio/opus"_ns; + OpusCodecSpecificData opusCodecSpecificData{}; + // The Opus decoder expects the container's codec delay or + // pre-skip value, in microseconds, as a 64-bit int at the + // start of the codec-specific config blob. + if (mp4ParseSampleCodecSpecific.data && + mp4ParseSampleCodecSpecific.length >= 12) { + uint16_t preskip = mozilla::LittleEndian::readUint16( + mp4ParseSampleCodecSpecific.data + 10); + opusCodecSpecificData.mContainerCodecDelayMicroSeconds = + mozilla::FramesToUsecs(preskip, 48000).value(); + LOG("Opus stream in MP4 container, %" PRId64 + " microseconds of encoder delay (%" PRIu16 ").", + opusCodecSpecificData.mContainerCodecDelayMicroSeconds, preskip); + } else { + // This file will error later as it will be rejected by the opus decoder. + opusCodecSpecificData.mContainerCodecDelayMicroSeconds = 0; + } + opusCodecSpecificData.mHeadersBinaryBlob->AppendElements( + mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length); + mCodecSpecificConfig = + AudioCodecSpecificVariant{std::move(opusCodecSpecificData)}; + } else if (codecType == MP4PARSE_CODEC_AAC) { + mMimeType = "audio/mp4a-latm"_ns; + int64_t codecDelayUS = aTrack->media_time; + double USECS_PER_S = 1e6; + // We can't use mozilla::UsecsToFrames here because we need to round, and it + // floors. + uint32_t encoderDelayFrameCount = 0; + if (codecDelayUS > 0) { + encoderDelayFrameCount = static_cast( + std::lround(static_cast(codecDelayUS) * + aAudio->sample_info->sample_rate / USECS_PER_S)); + LOG("AAC stream in MP4 container, %" PRIu32 " frames of encoder delay.", + encoderDelayFrameCount); + } + + uint64_t mediaFrameCount = 0; + // Pass the padding number, in frames, to the AAC decoder as well. + if (aIndices) { + MP4SampleIndex::Indice firstIndice = {0}; + MP4SampleIndex::Indice lastIndice = {0}; + bool rv = aIndices->GetIndice(0, firstIndice); + rv |= aIndices->GetIndice(aIndices->Length() - 1, lastIndice); + if (rv) { + if (firstIndice.start_composition > lastIndice.end_composition) { + return MediaResult( + NS_ERROR_DOM_MEDIA_METADATA_ERR, + RESULT_DETAIL("Inconsistent start and end time in index")); + } + // The `end_composition` member of the very last index member is the + // duration of the media in microseconds, excluding decoder delay and + // padding. Convert to frames and give to the decoder so that trimming + // can be done properly. + mediaFrameCount = + lastIndice.end_composition - firstIndice.start_composition; + LOG("AAC stream in MP4 container, total media duration is %" PRIu64 + " frames", + mediaFrameCount); + } else { + LOG("AAC stream in MP4 container, couldn't determine total media time"); + } + } + + AacCodecSpecificData aacCodecSpecificData{}; + + aacCodecSpecificData.mEncoderDelayFrames = encoderDelayFrameCount; + aacCodecSpecificData.mMediaFrameCount = mediaFrameCount; + + // codec specific data is used to store the DecoderConfigDescriptor. + aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob->AppendElements( + mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length); + // extra data stores the ES_Descriptor. + aacCodecSpecificData.mEsDescriptorBinaryBlob->AppendElements( + extraData.data, extraData.length); + mCodecSpecificConfig = + AudioCodecSpecificVariant{std::move(aacCodecSpecificData)}; + } else if (codecType == MP4PARSE_CODEC_FLAC) { + MOZ_ASSERT(extraData.length == 0, + "FLAC doesn't expect extra data so doesn't handle it!"); + mMimeType = "audio/flac"_ns; + FlacCodecSpecificData flacCodecSpecificData{}; + flacCodecSpecificData.mStreamInfoBinaryBlob->AppendElements( + mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length); + mCodecSpecificConfig = + AudioCodecSpecificVariant{std::move(flacCodecSpecificData)}; + } else if (codecType == MP4PARSE_CODEC_MP3) { + // mp3 in mp4 can contain ES_Descriptor info (it also has a flash in mp4 + // specific box, which the rust parser recognizes). However, we don't + // handle any such data here. + mMimeType = "audio/mpeg"_ns; + // TODO(bug 1705812): parse the encoder delay values from the mp4. + mCodecSpecificConfig = AudioCodecSpecificVariant{Mp3CodecSpecificData{}}; + } + + mRate = aAudio->sample_info[0].sample_rate; + mChannels = aAudio->sample_info[0].channels; + mBitDepth = aAudio->sample_info[0].bit_depth; + mExtendedProfile = + AssertedCast(aAudio->sample_info[0].extended_profile); + if (aTrack->duration > TimeUnit::MaxTicks()) { + mDuration = TimeUnit::FromInfinity(); + } else { + mDuration = + TimeUnit(AssertedCast(aTrack->duration), aTrack->time_scale); + } + mMediaTime = TimeUnit(aTrack->media_time, aTrack->time_scale); + mTrackId = aTrack->track_id; + + // In stagefright, mProfile is kKeyAACProfile, mExtendedProfile is kKeyAACAOT. + if (aAudio->sample_info[0].profile <= 4) { + mProfile = AssertedCast(aAudio->sample_info[0].profile); + } + + if (mCodecSpecificConfig.is()) { + // Handle codecs that are not explicitly handled above. + MOZ_ASSERT( + extraData.length == 0, + "Codecs that use extra data should be explicitly handled already"); + AudioCodecSpecificBinaryBlob codecSpecificBinaryBlob; + // No codec specific metadata set, use the generic form. + codecSpecificBinaryBlob.mBinaryBlob->AppendElements( + mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length); + mCodecSpecificConfig = + AudioCodecSpecificVariant{std::move(codecSpecificBinaryBlob)}; + } + + return NS_OK; +} + +bool MP4AudioInfo::IsValid() const { + return mChannels > 0 && mRate > 0 && + // Accept any mime type here, but if it's aac, validate the profile. + (!mMimeType.EqualsLiteral("audio/mp4a-latm") || mProfile > 0 || + mExtendedProfile > 0); +} + +MediaResult MP4VideoInfo::Update(const Mp4parseTrackInfo* track, + const Mp4parseTrackVideoInfo* video) { + auto rv = VerifyAudioOrVideoInfoAndRecordTelemetry(video); + NS_ENSURE_SUCCESS(rv, rv); + + Mp4parseCodec codecType = video->sample_info[0].codec_type; + for (uint32_t i = 0; i < video->sample_info_count; i++) { + if (video->sample_info[i].protected_data.is_encrypted) { + auto rv = + UpdateTrackProtectedInfo(*this, video->sample_info[i].protected_data); + NS_ENSURE_SUCCESS(rv, rv); + break; + } + } + + // We assume that the members of the first sample info are representative of + // the entire track. This code will need to be updated should this assumption + // ever not hold. E.g. if we need to handle different codecs in a single + // track, or if we have different numbers or channels in a single track. + if (codecType == MP4PARSE_CODEC_AVC) { + mMimeType = "video/avc"_ns; + } else if (codecType == MP4PARSE_CODEC_VP9) { + mMimeType = "video/vp9"_ns; + } else if (codecType == MP4PARSE_CODEC_AV1) { + mMimeType = "video/av1"_ns; + } else if (codecType == MP4PARSE_CODEC_MP4V) { + mMimeType = "video/mp4v-es"_ns; + } + mTrackId = track->track_id; + if (track->duration > TimeUnit::MaxTicks()) { + mDuration = TimeUnit::FromInfinity(); + } else { + mDuration = + TimeUnit(AssertedCast(track->duration), track->time_scale); + } + mMediaTime = TimeUnit(track->media_time, track->time_scale); + mDisplay.width = AssertedCast(video->display_width); + mDisplay.height = AssertedCast(video->display_height); + mImage.width = video->sample_info[0].image_width; + mImage.height = video->sample_info[0].image_height; + mRotation = ToSupportedRotation(video->rotation); + Mp4parseByteData extraData = video->sample_info[0].extra_data; + // If length is 0 we append nothing + mExtraData->AppendElements(extraData.data, extraData.length); + return NS_OK; +} + +bool MP4VideoInfo::IsValid() const { + return (mDisplay.width > 0 && mDisplay.height > 0) || + (mImage.width > 0 && mImage.height > 0); +} + +} // namespace mozilla + +#undef LOG -- cgit v1.2.3