357 lines
14 KiB
C++
357 lines
14 KiB
C++
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#include "Adts.h"
|
|
#include "AnnexB.h"
|
|
#include "BufferReader.h"
|
|
#include "DecoderData.h"
|
|
#include "mozilla/ArrayUtils.h"
|
|
#include "mozilla/EndianUtils.h"
|
|
#include "mozilla/glean/DomMediaMp4Metrics.h"
|
|
#include "VideoUtils.h"
|
|
#include "MP4Metadata.h"
|
|
#include "mozilla/Logging.h"
|
|
|
|
#include "mp4parse.h"
|
|
|
|
#define LOG(...) \
|
|
MOZ_LOG(gMP4MetadataLog, mozilla::LogLevel::Debug, (__VA_ARGS__))
|
|
|
|
using mozilla::media::TimeUnit;
|
|
|
|
namespace mozilla {
|
|
|
|
mozilla::Result<mozilla::Ok, nsresult> CryptoFile::DoUpdate(
|
|
const uint8_t* aData, size_t aLength) {
|
|
BufferReader reader(aData, aLength);
|
|
while (reader.Remaining()) {
|
|
PsshInfo psshInfo;
|
|
if (!reader.ReadArray(psshInfo.uuid, 16)) {
|
|
return mozilla::Err(NS_ERROR_FAILURE);
|
|
}
|
|
|
|
if (!reader.CanReadType<uint32_t>()) {
|
|
return mozilla::Err(NS_ERROR_FAILURE);
|
|
}
|
|
auto length = reader.ReadType<uint32_t>();
|
|
|
|
if (!reader.ReadArray(psshInfo.data, length)) {
|
|
return mozilla::Err(NS_ERROR_FAILURE);
|
|
}
|
|
pssh.AppendElement(std::move(psshInfo));
|
|
}
|
|
return mozilla::Ok();
|
|
}
|
|
|
|
static MediaResult UpdateTrackProtectedInfo(mozilla::TrackInfo& aConfig,
|
|
const Mp4parseSinfInfo& aSinf) {
|
|
if (aSinf.is_encrypted != 0) {
|
|
if (aSinf.scheme_type == MP4_PARSE_ENCRYPTION_SCHEME_TYPE_CENC) {
|
|
aConfig.mCrypto.mCryptoScheme = CryptoScheme::Cenc;
|
|
} else if (aSinf.scheme_type == MP4_PARSE_ENCRYPTION_SCHEME_TYPE_CBCS) {
|
|
aConfig.mCrypto.mCryptoScheme = CryptoScheme::Cbcs;
|
|
} else {
|
|
// Unsupported encryption type;
|
|
return MediaResult(
|
|
NS_ERROR_DOM_MEDIA_METADATA_ERR,
|
|
RESULT_DETAIL(
|
|
"Unsupported encryption scheme encountered aSinf.scheme_type=%d",
|
|
static_cast<int>(aSinf.scheme_type)));
|
|
}
|
|
aConfig.mCrypto.mIVSize = aSinf.iv_size;
|
|
aConfig.mCrypto.mKeyId.AppendElements(aSinf.kid.data, aSinf.kid.length);
|
|
aConfig.mCrypto.mCryptByteBlock = aSinf.crypt_byte_block;
|
|
aConfig.mCrypto.mSkipByteBlock = aSinf.skip_byte_block;
|
|
aConfig.mCrypto.mConstantIV.AppendElements(aSinf.constant_iv.data,
|
|
aSinf.constant_iv.length);
|
|
}
|
|
return NS_OK;
|
|
}
|
|
|
|
// Verify various information shared by Mp4ParseTrackAudioInfo and
|
|
// Mp4ParseTrackVideoInfo and record telemetry on that info. Returns an
|
|
// appropriate MediaResult indicating if the info is valid or not.
|
|
// This verifies:
|
|
// - That we have a sample_info_count > 0 (valid tracks should have at least one
|
|
// sample description entry)
|
|
// - That only a single codec is used across all sample infos, as we don't
|
|
// handle multiple.
|
|
// - If more than one sample information structures contain crypto info. This
|
|
// case is not fatal (we don't return an error), but does record telemetry
|
|
// to help judge if we need more handling in gecko for multiple crypto.
|
|
//
|
|
// Telemetry is also recorded on the above. As of writing, the
|
|
// telemetry is recorded to give us early warning if MP4s exist that we're not
|
|
// handling. Note, if adding new checks and telemetry to this function,
|
|
// telemetry should be recorded before returning to ensure it is gathered.
|
|
template <typename Mp4ParseTrackAudioOrVideoInfo>
|
|
static MediaResult VerifyAudioOrVideoInfoAndRecordTelemetry(
|
|
Mp4ParseTrackAudioOrVideoInfo* audioOrVideoInfo) {
|
|
glean::media_mp4_parse::num_sample_description_entries.AccumulateSingleSample(
|
|
audioOrVideoInfo->sample_info_count);
|
|
|
|
bool hasMultipleCodecs = false;
|
|
uint32_t cryptoCount = 0;
|
|
Mp4parseCodec codecType = audioOrVideoInfo->sample_info[0].codec_type;
|
|
for (uint32_t i = 0; i < audioOrVideoInfo->sample_info_count; i++) {
|
|
if (audioOrVideoInfo->sample_info[0].codec_type != codecType) {
|
|
hasMultipleCodecs = true;
|
|
}
|
|
|
|
// Update our encryption info if any is present on the sample info.
|
|
if (audioOrVideoInfo->sample_info[i].protected_data.is_encrypted) {
|
|
cryptoCount += 1;
|
|
}
|
|
}
|
|
|
|
glean::media_mp4_parse::sample_description_entries_have_multiple_codecs
|
|
.EnumGet(static_cast<glean::media_mp4_parse::
|
|
SampleDescriptionEntriesHaveMultipleCodecsLabel>(
|
|
hasMultipleCodecs))
|
|
.Add();
|
|
|
|
// Accumulate if we have multiple (2 or more) crypto entries.
|
|
// TODO(1715283): rework this to count number of crypto entries + gather
|
|
// richer data.
|
|
glean::media_mp4_parse::sample_description_entries_have_multiple_crypto
|
|
.EnumGet(static_cast<glean::media_mp4_parse::
|
|
SampleDescriptionEntriesHaveMultipleCryptoLabel>(
|
|
cryptoCount >= 2))
|
|
.Add();
|
|
|
|
if (audioOrVideoInfo->sample_info_count == 0) {
|
|
return MediaResult(
|
|
NS_ERROR_DOM_MEDIA_METADATA_ERR,
|
|
RESULT_DETAIL("Got 0 sample info while verifying track."));
|
|
}
|
|
|
|
if (hasMultipleCodecs) {
|
|
// Different codecs in a single track. We don't handle this.
|
|
return MediaResult(
|
|
NS_ERROR_DOM_MEDIA_METADATA_ERR,
|
|
RESULT_DETAIL("Multiple codecs encountered while verifying track."));
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
MediaResult MP4AudioInfo::Update(const Mp4parseTrackInfo* aTrack,
|
|
const Mp4parseTrackAudioInfo* aAudio,
|
|
const IndiceWrapper* aIndices) {
|
|
auto rv = VerifyAudioOrVideoInfoAndRecordTelemetry(aAudio);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
Mp4parseCodec codecType = aAudio->sample_info[0].codec_type;
|
|
for (uint32_t i = 0; i < aAudio->sample_info_count; i++) {
|
|
if (aAudio->sample_info[i].protected_data.is_encrypted) {
|
|
auto rv = UpdateTrackProtectedInfo(*this,
|
|
aAudio->sample_info[i].protected_data);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
break;
|
|
}
|
|
}
|
|
|
|
// We assume that the members of the first sample info are representative of
|
|
// the entire track. This code will need to be updated should this assumption
|
|
// ever not hold. E.g. if we need to handle different codecs in a single
|
|
// track, or if we have different numbers or channels in a single track.
|
|
Mp4parseByteData mp4ParseSampleCodecSpecific =
|
|
aAudio->sample_info[0].codec_specific_config;
|
|
Mp4parseByteData extraData = aAudio->sample_info[0].extra_data;
|
|
MOZ_ASSERT(mCodecSpecificConfig.is<NoCodecSpecificData>(),
|
|
"Should have no codec specific data yet");
|
|
if (codecType == MP4PARSE_CODEC_OPUS) {
|
|
mMimeType = "audio/opus"_ns;
|
|
OpusCodecSpecificData opusCodecSpecificData{};
|
|
// The Opus decoder expects the container's codec delay or
|
|
// pre-skip value, in microseconds, as a 64-bit int at the
|
|
// start of the codec-specific config blob.
|
|
if (mp4ParseSampleCodecSpecific.data &&
|
|
mp4ParseSampleCodecSpecific.length >= 12) {
|
|
uint16_t preskip = mozilla::LittleEndian::readUint16(
|
|
mp4ParseSampleCodecSpecific.data + 10);
|
|
opusCodecSpecificData.mContainerCodecDelayFrames = preskip;
|
|
LOG("Opus stream in MP4 container, %" PRId64
|
|
" microseconds of encoder delay (%" PRIu16 ").",
|
|
opusCodecSpecificData.mContainerCodecDelayFrames, preskip);
|
|
} else {
|
|
// This file will error later as it will be rejected by the opus decoder.
|
|
opusCodecSpecificData.mContainerCodecDelayFrames = 0;
|
|
}
|
|
opusCodecSpecificData.mHeadersBinaryBlob->AppendElements(
|
|
mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length);
|
|
mCodecSpecificConfig =
|
|
AudioCodecSpecificVariant{std::move(opusCodecSpecificData)};
|
|
} else if (codecType == MP4PARSE_CODEC_AAC) {
|
|
mMimeType = "audio/mp4a-latm"_ns;
|
|
int64_t codecDelayUS = aTrack->media_time;
|
|
double USECS_PER_S = 1e6;
|
|
// We can't use mozilla::UsecsToFrames here because we need to round, and it
|
|
// floors.
|
|
uint32_t encoderDelayFrameCount = 0;
|
|
if (codecDelayUS > 0) {
|
|
encoderDelayFrameCount = static_cast<uint32_t>(
|
|
std::lround(static_cast<double>(codecDelayUS) *
|
|
aAudio->sample_info->sample_rate / USECS_PER_S));
|
|
LOG("AAC stream in MP4 container, %" PRIu32 " frames of encoder delay.",
|
|
encoderDelayFrameCount);
|
|
}
|
|
|
|
uint64_t mediaFrameCount = 0;
|
|
// Pass the padding number, in frames, to the AAC decoder as well.
|
|
if (aIndices) {
|
|
MP4SampleIndex::Indice firstIndice = {0};
|
|
MP4SampleIndex::Indice lastIndice = {0};
|
|
bool rv = aIndices->GetIndice(0, firstIndice);
|
|
rv |= aIndices->GetIndice(aIndices->Length() - 1, lastIndice);
|
|
if (rv) {
|
|
if (firstIndice.start_composition > lastIndice.end_composition) {
|
|
return MediaResult(
|
|
NS_ERROR_DOM_MEDIA_METADATA_ERR,
|
|
RESULT_DETAIL("Inconsistent start and end time in index"));
|
|
}
|
|
// The `end_composition` member of the very last index member is the
|
|
// duration of the media in microseconds, excluding decoder delay and
|
|
// padding. Convert to frames and give to the decoder so that trimming
|
|
// can be done properly.
|
|
mediaFrameCount =
|
|
lastIndice.end_composition - firstIndice.start_composition;
|
|
LOG("AAC stream in MP4 container, total media duration is %" PRIu64
|
|
" frames",
|
|
mediaFrameCount);
|
|
} else {
|
|
LOG("AAC stream in MP4 container, couldn't determine total media time");
|
|
}
|
|
}
|
|
|
|
AacCodecSpecificData aacCodecSpecificData{};
|
|
|
|
aacCodecSpecificData.mEncoderDelayFrames = encoderDelayFrameCount;
|
|
aacCodecSpecificData.mMediaFrameCount = mediaFrameCount;
|
|
|
|
// codec specific data is used to store the DecoderConfigDescriptor.
|
|
aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob->AppendElements(
|
|
mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length);
|
|
// extra data stores the ES_Descriptor.
|
|
aacCodecSpecificData.mEsDescriptorBinaryBlob->AppendElements(
|
|
extraData.data, extraData.length);
|
|
mCodecSpecificConfig =
|
|
AudioCodecSpecificVariant{std::move(aacCodecSpecificData)};
|
|
} else if (codecType == MP4PARSE_CODEC_FLAC) {
|
|
MOZ_ASSERT(extraData.length == 0,
|
|
"FLAC doesn't expect extra data so doesn't handle it!");
|
|
mMimeType = "audio/flac"_ns;
|
|
FlacCodecSpecificData flacCodecSpecificData{};
|
|
flacCodecSpecificData.mStreamInfoBinaryBlob->AppendElements(
|
|
mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length);
|
|
mCodecSpecificConfig =
|
|
AudioCodecSpecificVariant{std::move(flacCodecSpecificData)};
|
|
} else if (codecType == MP4PARSE_CODEC_MP3) {
|
|
// mp3 in mp4 can contain ES_Descriptor info (it also has a flash in mp4
|
|
// specific box, which the rust parser recognizes). However, we don't
|
|
// handle any such data here.
|
|
mMimeType = "audio/mpeg"_ns;
|
|
// TODO(bug 1705812): parse the encoder delay values from the mp4.
|
|
mCodecSpecificConfig = AudioCodecSpecificVariant{Mp3CodecSpecificData{}};
|
|
}
|
|
|
|
mRate = aAudio->sample_info[0].sample_rate;
|
|
mChannels = aAudio->sample_info[0].channels;
|
|
mBitDepth = aAudio->sample_info[0].bit_depth;
|
|
mExtendedProfile =
|
|
AssertedCast<int8_t>(aAudio->sample_info[0].extended_profile);
|
|
if (aTrack->duration > TimeUnit::MaxTicks()) {
|
|
mDuration = TimeUnit::FromInfinity();
|
|
} else {
|
|
mDuration =
|
|
TimeUnit(AssertedCast<int64_t>(aTrack->duration), aTrack->time_scale);
|
|
}
|
|
mMediaTime = TimeUnit(aTrack->media_time, aTrack->time_scale);
|
|
mTrackId = aTrack->track_id;
|
|
|
|
// In stagefright, mProfile is kKeyAACProfile, mExtendedProfile is kKeyAACAOT.
|
|
if (aAudio->sample_info[0].profile <= 4) {
|
|
mProfile = AssertedCast<int8_t>(aAudio->sample_info[0].profile);
|
|
}
|
|
|
|
if (mCodecSpecificConfig.is<NoCodecSpecificData>()) {
|
|
// Handle codecs that are not explicitly handled above.
|
|
MOZ_ASSERT(
|
|
extraData.length == 0,
|
|
"Codecs that use extra data should be explicitly handled already");
|
|
AudioCodecSpecificBinaryBlob codecSpecificBinaryBlob;
|
|
// No codec specific metadata set, use the generic form.
|
|
codecSpecificBinaryBlob.mBinaryBlob->AppendElements(
|
|
mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length);
|
|
mCodecSpecificConfig =
|
|
AudioCodecSpecificVariant{std::move(codecSpecificBinaryBlob)};
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
bool MP4AudioInfo::IsValid() const {
|
|
return mChannels > 0 && mRate > 0 &&
|
|
// Accept any mime type here, but if it's aac, validate the profile.
|
|
(!mMimeType.EqualsLiteral("audio/mp4a-latm") || mProfile > 0 ||
|
|
mExtendedProfile > 0);
|
|
}
|
|
|
|
MediaResult MP4VideoInfo::Update(const Mp4parseTrackInfo* track,
|
|
const Mp4parseTrackVideoInfo* video) {
|
|
auto rv = VerifyAudioOrVideoInfoAndRecordTelemetry(video);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
Mp4parseCodec codecType = video->sample_info[0].codec_type;
|
|
for (uint32_t i = 0; i < video->sample_info_count; i++) {
|
|
if (video->sample_info[i].protected_data.is_encrypted) {
|
|
auto rv =
|
|
UpdateTrackProtectedInfo(*this, video->sample_info[i].protected_data);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
break;
|
|
}
|
|
}
|
|
|
|
// We assume that the members of the first sample info are representative of
|
|
// the entire track. This code will need to be updated should this assumption
|
|
// ever not hold. E.g. if we need to handle different codecs in a single
|
|
// track, or if we have different numbers or channels in a single track.
|
|
if (codecType == MP4PARSE_CODEC_AVC) {
|
|
mMimeType = "video/avc"_ns;
|
|
} else if (codecType == MP4PARSE_CODEC_VP9) {
|
|
mMimeType = "video/vp9"_ns;
|
|
} else if (codecType == MP4PARSE_CODEC_AV1) {
|
|
mMimeType = "video/av1"_ns;
|
|
} else if (codecType == MP4PARSE_CODEC_MP4V) {
|
|
mMimeType = "video/mp4v-es"_ns;
|
|
} else if (codecType == MP4PARSE_CODEC_HEVC) {
|
|
mMimeType = "video/hevc"_ns;
|
|
}
|
|
mTrackId = track->track_id;
|
|
if (track->duration > TimeUnit::MaxTicks()) {
|
|
mDuration = TimeUnit::FromInfinity();
|
|
} else {
|
|
mDuration =
|
|
TimeUnit(AssertedCast<int64_t>(track->duration), track->time_scale);
|
|
}
|
|
mMediaTime = TimeUnit(track->media_time, track->time_scale);
|
|
mDisplay.width = AssertedCast<int32_t>(video->display_width);
|
|
mDisplay.height = AssertedCast<int32_t>(video->display_height);
|
|
mImage.width = video->sample_info[0].image_width;
|
|
mImage.height = video->sample_info[0].image_height;
|
|
mRotation = ToSupportedRotation(video->rotation);
|
|
Mp4parseByteData extraData = video->sample_info[0].extra_data;
|
|
// If length is 0 we append nothing
|
|
mExtraData->AppendElements(extraData.data, extraData.length);
|
|
return NS_OK;
|
|
}
|
|
|
|
bool MP4VideoInfo::IsValid() const {
|
|
return (mDisplay.width > 0 && mDisplay.height > 0) ||
|
|
(mImage.width > 0 && mImage.height > 0);
|
|
}
|
|
|
|
} // namespace mozilla
|
|
|
|
#undef LOG
|