1 files changed, 357 insertions, 0 deletions
diff --git a/dom/media/mp4/DecoderData.cpp b/dom/media/mp4/DecoderData.cpp
new file mode 100644
index 0000000000..b7c9c86954
--- /dev/null
+++ b/dom/media/mp4/DecoderData.cpp
@@ -0,0 +1,357 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "Adts.h"
+#include "AnnexB.h"
+#include "BufferReader.h"
+#include "DecoderData.h"
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/EndianUtils.h"
+#include "mozilla/Telemetry.h"
+#include "VideoUtils.h"
+#include "MP4Metadata.h"
+#include "mozilla/Logging.h"
+
+// OpusDecoder header is really needed only by MP4 in rust
+#include "OpusDecoder.h"
+#include "mp4parse.h"
+
+#define LOG(...) \
+  MOZ_LOG(gMP4MetadataLog, mozilla::LogLevel::Debug, (__VA_ARGS__))
+
+using mozilla::media::TimeUnit;
+
+namespace mozilla {
+
+mozilla::Result<mozilla::Ok, nsresult> CryptoFile::DoUpdate(
+    const uint8_t* aData, size_t aLength) {
+  BufferReader reader(aData, aLength);
+  while (reader.Remaining()) {
+    PsshInfo psshInfo;
+    if (!reader.ReadArray(psshInfo.uuid, 16)) {
+      return mozilla::Err(NS_ERROR_FAILURE);
+    }
+
+    if (!reader.CanReadType<uint32_t>()) {
+      return mozilla::Err(NS_ERROR_FAILURE);
+    }
+    auto length = reader.ReadType<uint32_t>();
+
+    if (!reader.ReadArray(psshInfo.data, length)) {
+      return mozilla::Err(NS_ERROR_FAILURE);
+    }
+    pssh.AppendElement(std::move(psshInfo));
+  }
+  return mozilla::Ok();
+}
+
+static MediaResult UpdateTrackProtectedInfo(mozilla::TrackInfo& aConfig,
+                                            const Mp4parseSinfInfo& aSinf) {
+  if (aSinf.is_encrypted != 0) {
+    if (aSinf.scheme_type == MP4_PARSE_ENCRYPTION_SCHEME_TYPE_CENC) {
+      aConfig.mCrypto.mCryptoScheme = CryptoScheme::Cenc;
+    } else if (aSinf.scheme_type == MP4_PARSE_ENCRYPTION_SCHEME_TYPE_CBCS) {
+      aConfig.mCrypto.mCryptoScheme = CryptoScheme::Cbcs;
+    } else {
+      // Unsupported encryption type;
+      return MediaResult(
+          NS_ERROR_DOM_MEDIA_METADATA_ERR,
+          RESULT_DETAIL(
+              "Unsupported encryption scheme encountered aSinf.scheme_type=%d",
+              static_cast<int>(aSinf.scheme_type)));
+    }
+    aConfig.mCrypto.mIVSize = aSinf.iv_size;
+    aConfig.mCrypto.mKeyId.AppendElements(aSinf.kid.data, aSinf.kid.length);
+    aConfig.mCrypto.mCryptByteBlock = aSinf.crypt_byte_block;
+    aConfig.mCrypto.mSkipByteBlock = aSinf.skip_byte_block;
+    aConfig.mCrypto.mConstantIV.AppendElements(aSinf.constant_iv.data,
+                                               aSinf.constant_iv.length);
+  }
+  return NS_OK;
+}
+
+// Verify various information shared by Mp4ParseTrackAudioInfo and
+// Mp4ParseTrackVideoInfo and record telemetry on that info. Returns an
+// appropriate MediaResult indicating if the info is valid or not.
+// This verifies:
+// - That we have a sample_info_count > 0 (valid tracks should have at least one
+//   sample description entry)
+// - That only a single codec is used across all sample infos, as we don't
+//   handle multiple.
+// - If more than one sample information structures contain crypto info. This
+//   case is not fatal (we don't return an error), but does record telemetry
+//   to help judge if we need more handling in gecko for multiple crypto.
+//
+// Telemetry is also recorded on the above. As of writing, the
+// telemetry is recorded to give us early warning if MP4s exist that we're not
+// handling. Note, if adding new checks and telemetry to this function,
+// telemetry should be recorded before returning to ensure it is gathered.
+template <typename Mp4ParseTrackAudioOrVideoInfo>
+static MediaResult VerifyAudioOrVideoInfoAndRecordTelemetry(
+    Mp4ParseTrackAudioOrVideoInfo* audioOrVideoInfo) {
+  Telemetry::Accumulate(
+      Telemetry::MEDIA_MP4_PARSE_NUM_SAMPLE_DESCRIPTION_ENTRIES,
+      audioOrVideoInfo->sample_info_count);
+
+  bool hasMultipleCodecs = false;
+  uint32_t cryptoCount = 0;
+  Mp4parseCodec codecType = audioOrVideoInfo->sample_info[0].codec_type;
+  for (uint32_t i = 0; i < audioOrVideoInfo->sample_info_count; i++) {
+    if (audioOrVideoInfo->sample_info[0].codec_type != codecType) {
+      hasMultipleCodecs = true;
+    }
+
+    // Update our encryption info if any is present on the sample info.
+    if (audioOrVideoInfo->sample_info[i].protected_data.is_encrypted) {
+      cryptoCount += 1;
+    }
+  }
+
+  Telemetry::Accumulate(
+      Telemetry::
+          MEDIA_MP4_PARSE_SAMPLE_DESCRIPTION_ENTRIES_HAVE_MULTIPLE_CODECS,
+      hasMultipleCodecs);
+
+  // Accumulate if we have multiple (2 or more) crypto entries.
+  // TODO(1715283): rework this to count number of crypto entries + gather
+  // richer data.
+  Telemetry::Accumulate(
+      Telemetry::
+          MEDIA_MP4_PARSE_SAMPLE_DESCRIPTION_ENTRIES_HAVE_MULTIPLE_CRYPTO,
+      cryptoCount >= 2);
+
+  if (audioOrVideoInfo->sample_info_count == 0) {
+    return MediaResult(
+        NS_ERROR_DOM_MEDIA_METADATA_ERR,
+        RESULT_DETAIL("Got 0 sample info while verifying track."));
+  }
+
+  if (hasMultipleCodecs) {
+    // Different codecs in a single track. We don't handle this.
+    return MediaResult(
+        NS_ERROR_DOM_MEDIA_METADATA_ERR,
+        RESULT_DETAIL("Multiple codecs encountered while verifying track."));
+  }
+
+  return NS_OK;
+}
+
+MediaResult MP4AudioInfo::Update(const Mp4parseTrackInfo* aTrack,
+                                 const Mp4parseTrackAudioInfo* aAudio,
+                                 const IndiceWrapper* aIndices) {
+  auto rv = VerifyAudioOrVideoInfoAndRecordTelemetry(aAudio);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  Mp4parseCodec codecType = aAudio->sample_info[0].codec_type;
+  for (uint32_t i = 0; i < aAudio->sample_info_count; i++) {
+    if (aAudio->sample_info[i].protected_data.is_encrypted) {
+      auto rv = UpdateTrackProtectedInfo(*this,
+                                         aAudio->sample_info[i].protected_data);
+      NS_ENSURE_SUCCESS(rv, rv);
+      break;
+    }
+  }
+
+  // We assume that the members of the first sample info are representative of
+  // the entire track. This code will need to be updated should this assumption
+  // ever not hold. E.g. if we need to handle different codecs in a single
+  // track, or if we have different numbers or channels in a single track.
+  Mp4parseByteData mp4ParseSampleCodecSpecific =
+      aAudio->sample_info[0].codec_specific_config;
+  Mp4parseByteData extraData = aAudio->sample_info[0].extra_data;
+  MOZ_ASSERT(mCodecSpecificConfig.is<NoCodecSpecificData>(),
+             "Should have no codec specific data yet");
+  if (codecType == MP4PARSE_CODEC_OPUS) {
+    mMimeType = "audio/opus"_ns;
+    OpusCodecSpecificData opusCodecSpecificData{};
+    // The Opus decoder expects the container's codec delay or
+    // pre-skip value, in microseconds, as a 64-bit int at the
+    // start of the codec-specific config blob.
+    if (mp4ParseSampleCodecSpecific.data &&
+        mp4ParseSampleCodecSpecific.length >= 12) {
+      uint16_t preskip = mozilla::LittleEndian::readUint16(
+          mp4ParseSampleCodecSpecific.data + 10);
+      opusCodecSpecificData.mContainerCodecDelayMicroSeconds =
+          mozilla::FramesToUsecs(preskip, 48000).value();
+      LOG("Opus stream in MP4 container, %" PRId64
+          " microseconds of encoder delay (%" PRIu16 ").",
+          opusCodecSpecificData.mContainerCodecDelayMicroSeconds, preskip);
+    } else {
+      // This file will error later as it will be rejected by the opus decoder.
+      opusCodecSpecificData.mContainerCodecDelayMicroSeconds = 0;
+    }
+    opusCodecSpecificData.mHeadersBinaryBlob->AppendElements(
+        mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length);
+    mCodecSpecificConfig =
+        AudioCodecSpecificVariant{std::move(opusCodecSpecificData)};
+  } else if (codecType == MP4PARSE_CODEC_AAC) {
+    mMimeType = "audio/mp4a-latm"_ns;
+    int64_t codecDelayUS = aTrack->media_time;
+    double USECS_PER_S = 1e6;
+    // We can't use mozilla::UsecsToFrames here because we need to round, and it
+    // floors.
+    uint32_t encoderDelayFrameCount = 0;
+    if (codecDelayUS > 0) {
+      encoderDelayFrameCount = static_cast<uint32_t>(
+          std::lround(static_cast<double>(codecDelayUS) *
+                      aAudio->sample_info->sample_rate / USECS_PER_S));
+      LOG("AAC stream in MP4 container, %" PRIu32 " frames of encoder delay.",
+          encoderDelayFrameCount);
+    }
+
+    uint64_t mediaFrameCount = 0;
+    // Pass the padding number, in frames, to the AAC decoder as well.
+    if (aIndices) {
+      MP4SampleIndex::Indice firstIndice = {0};
+      MP4SampleIndex::Indice lastIndice = {0};
+      bool rv = aIndices->GetIndice(0, firstIndice);
+      rv |= aIndices->GetIndice(aIndices->Length() - 1, lastIndice);
+      if (rv) {
+        if (firstIndice.start_composition > lastIndice.end_composition) {
+          return MediaResult(
+              NS_ERROR_DOM_MEDIA_METADATA_ERR,
+              RESULT_DETAIL("Inconsistent start and end time in index"));
+        }
+        // The `end_composition` member of the very last index member is the
+        // duration of the media in microseconds, excluding decoder delay and
+        // padding. Convert to frames and give to the decoder so that trimming
+        // can be done properly.
+        mediaFrameCount =
+            lastIndice.end_composition - firstIndice.start_composition;
+        LOG("AAC stream in MP4 container, total media duration is %" PRIu64
+            " frames",
+            mediaFrameCount);
+      } else {
+        LOG("AAC stream in MP4 container, couldn't determine total media time");
+      }
+    }
+
+    AacCodecSpecificData aacCodecSpecificData{};
+
+    aacCodecSpecificData.mEncoderDelayFrames = encoderDelayFrameCount;
+    aacCodecSpecificData.mMediaFrameCount = mediaFrameCount;
+
+    // codec specific data is used to store the DecoderConfigDescriptor.
+    aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob->AppendElements(
+        mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length);
+    // extra data stores the ES_Descriptor.
+    aacCodecSpecificData.mEsDescriptorBinaryBlob->AppendElements(
+        extraData.data, extraData.length);
+    mCodecSpecificConfig =
+        AudioCodecSpecificVariant{std::move(aacCodecSpecificData)};
+  } else if (codecType == MP4PARSE_CODEC_FLAC) {
+    MOZ_ASSERT(extraData.length == 0,
+               "FLAC doesn't expect extra data so doesn't handle it!");
+    mMimeType = "audio/flac"_ns;
+    FlacCodecSpecificData flacCodecSpecificData{};
+    flacCodecSpecificData.mStreamInfoBinaryBlob->AppendElements(
+        mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length);
+    mCodecSpecificConfig =
+        AudioCodecSpecificVariant{std::move(flacCodecSpecificData)};
+  } else if (codecType == MP4PARSE_CODEC_MP3) {
+    // mp3 in mp4 can contain ES_Descriptor info (it also has a flash in mp4
+    // specific box, which the rust parser recognizes). However, we don't
+    // handle any such data here.
+    mMimeType = "audio/mpeg"_ns;
+    // TODO(bug 1705812): parse the encoder delay values from the mp4.
+    mCodecSpecificConfig = AudioCodecSpecificVariant{Mp3CodecSpecificData{}};
+  }
+
+  mRate = aAudio->sample_info[0].sample_rate;
+  mChannels = aAudio->sample_info[0].channels;
+  mBitDepth = aAudio->sample_info[0].bit_depth;
+  mExtendedProfile =
+      AssertedCast<int8_t>(aAudio->sample_info[0].extended_profile);
+  if (aTrack->duration > TimeUnit::MaxTicks()) {
+    mDuration = TimeUnit::FromInfinity();
+  } else {
+    mDuration =
+        TimeUnit(AssertedCast<int64_t>(aTrack->duration), aTrack->time_scale);
+  }
+  mMediaTime = TimeUnit(aTrack->media_time, aTrack->time_scale);
+  mTrackId = aTrack->track_id;
+
+  // In stagefright, mProfile is kKeyAACProfile, mExtendedProfile is kKeyAACAOT.
+  if (aAudio->sample_info[0].profile <= 4) {
+    mProfile = AssertedCast<int8_t>(aAudio->sample_info[0].profile);
+  }
+
+  if (mCodecSpecificConfig.is<NoCodecSpecificData>()) {
+    // Handle codecs that are not explicitly handled above.
+    MOZ_ASSERT(
+        extraData.length == 0,
+        "Codecs that use extra data should be explicitly handled already");
+    AudioCodecSpecificBinaryBlob codecSpecificBinaryBlob;
+    // No codec specific metadata set, use the generic form.
+    codecSpecificBinaryBlob.mBinaryBlob->AppendElements(
+        mp4ParseSampleCodecSpecific.data, mp4ParseSampleCodecSpecific.length);
+    mCodecSpecificConfig =
+        AudioCodecSpecificVariant{std::move(codecSpecificBinaryBlob)};
+  }
+
+  return NS_OK;
+}
+
+bool MP4AudioInfo::IsValid() const {
+  return mChannels > 0 && mRate > 0 &&
+         // Accept any mime type here, but if it's aac, validate the profile.
+         (!mMimeType.EqualsLiteral("audio/mp4a-latm") || mProfile > 0 ||
+          mExtendedProfile > 0);
+}
+
+MediaResult MP4VideoInfo::Update(const Mp4parseTrackInfo* track,
+                                 const Mp4parseTrackVideoInfo* video) {
+  auto rv = VerifyAudioOrVideoInfoAndRecordTelemetry(video);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  Mp4parseCodec codecType = video->sample_info[0].codec_type;
+  for (uint32_t i = 0; i < video->sample_info_count; i++) {
+    if (video->sample_info[i].protected_data.is_encrypted) {
+      auto rv =
+          UpdateTrackProtectedInfo(*this, video->sample_info[i].protected_data);
+      NS_ENSURE_SUCCESS(rv, rv);
+      break;
+    }
+  }
+
+  // We assume that the members of the first sample info are representative of
+  // the entire track. This code will need to be updated should this assumption
+  // ever not hold. E.g. if we need to handle different codecs in a single
+  // track, or if we have different numbers or channels in a single track.
+  if (codecType == MP4PARSE_CODEC_AVC) {
+    mMimeType = "video/avc"_ns;
+  } else if (codecType == MP4PARSE_CODEC_VP9) {
+    mMimeType = "video/vp9"_ns;
+  } else if (codecType == MP4PARSE_CODEC_AV1) {
+    mMimeType = "video/av1"_ns;
+  } else if (codecType == MP4PARSE_CODEC_MP4V) {
+    mMimeType = "video/mp4v-es"_ns;
+  }
+  mTrackId = track->track_id;
+  if (track->duration > TimeUnit::MaxTicks()) {
+    mDuration = TimeUnit::FromInfinity();
+  } else {
+    mDuration =
+        TimeUnit(AssertedCast<int64_t>(track->duration), track->time_scale);
+  }
+  mMediaTime = TimeUnit(track->media_time, track->time_scale);
+  mDisplay.width = AssertedCast<int32_t>(video->display_width);
+  mDisplay.height = AssertedCast<int32_t>(video->display_height);
+  mImage.width = video->sample_info[0].image_width;
+  mImage.height = video->sample_info[0].image_height;
+  mRotation = ToSupportedRotation(video->rotation);
+  Mp4parseByteData extraData = video->sample_info[0].extra_data;
+  // If length is 0 we append nothing
+  mExtraData->AppendElements(extraData.data, extraData.length);
+  return NS_OK;
+}
+
+bool MP4VideoInfo::IsValid() const {
+  return (mDisplay.width > 0 && mDisplay.height > 0) ||
+         (mImage.width > 0 && mImage.height > 0);
+}
+
+}  // namespace mozilla
+
+#undef LOG