diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /dom/media/mp3 | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'dom/media/mp3')
-rw-r--r-- | dom/media/mp3/MP3Decoder.cpp | 45 | ||||
-rw-r--r-- | dom/media/mp3/MP3Decoder.h | 29 | ||||
-rw-r--r-- | dom/media/mp3/MP3Demuxer.cpp | 890 | ||||
-rw-r--r-- | dom/media/mp3/MP3Demuxer.h | 187 | ||||
-rw-r--r-- | dom/media/mp3/MP3FrameParser.cpp | 817 | ||||
-rw-r--r-- | dom/media/mp3/MP3FrameParser.h | 374 | ||||
-rw-r--r-- | dom/media/mp3/moz.build | 22 |
7 files changed, 2364 insertions, 0 deletions
diff --git a/dom/media/mp3/MP3Decoder.cpp b/dom/media/mp3/MP3Decoder.cpp new file mode 100644 index 0000000000..efbe96cf4b --- /dev/null +++ b/dom/media/mp3/MP3Decoder.cpp @@ -0,0 +1,45 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "MP3Decoder.h" +#include "MediaContainerType.h" +#include "PDMFactory.h" + +namespace mozilla { + +/* static */ +bool MP3Decoder::IsEnabled() { + RefPtr<PDMFactory> platform = new PDMFactory(); + return platform->SupportsMimeType("audio/mpeg"_ns) != + media::DecodeSupport::Unsupported; +} + +/* static */ +bool MP3Decoder::IsSupportedType(const MediaContainerType& aContainerType) { + if (aContainerType.Type() == MEDIAMIMETYPE("audio/mp3") || + aContainerType.Type() == MEDIAMIMETYPE("audio/mpeg")) { + return IsEnabled() && (aContainerType.ExtendedType().Codecs().IsEmpty() || + aContainerType.ExtendedType().Codecs() == "mp3"); + } + return false; +} + +/* static */ +nsTArray<UniquePtr<TrackInfo>> MP3Decoder::GetTracksInfo( + const MediaContainerType& aType) { + nsTArray<UniquePtr<TrackInfo>> tracks; + if (!IsSupportedType(aType)) { + return tracks; + } + + tracks.AppendElement( + CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters( + "audio/mpeg"_ns, aType)); + + return tracks; +} + +} // namespace mozilla diff --git a/dom/media/mp3/MP3Decoder.h b/dom/media/mp3/MP3Decoder.h new file mode 100644 index 0000000000..eab5387b51 --- /dev/null +++ b/dom/media/mp3/MP3Decoder.h @@ -0,0 +1,29 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef MP3Decoder_h_ +#define MP3Decoder_h_ + +#include "mozilla/UniquePtr.h" +#include "nsTArray.h" + +namespace mozilla { + +class MediaContainerType; +class TrackInfo; + +class MP3Decoder { + public: + // Returns true if the MP3 backend is preffed on, and we're running on a + // platform that is likely to have decoders for the format. + static bool IsEnabled(); + static bool IsSupportedType(const MediaContainerType& aContainerType); + static nsTArray<UniquePtr<TrackInfo>> GetTracksInfo( + const MediaContainerType& aType); +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/mp3/MP3Demuxer.cpp b/dom/media/mp3/MP3Demuxer.cpp new file mode 100644 index 0000000000..25d878b3be --- /dev/null +++ b/dom/media/mp3/MP3Demuxer.cpp @@ -0,0 +1,890 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "MP3Demuxer.h" + +#include <algorithm> +#include <inttypes.h> +#include <limits> + +#include "ByteWriter.h" +#include "TimeUnits.h" +#include "VideoUtils.h" +#include "mozilla/Assertions.h" + +extern mozilla::LazyLogModule gMediaDemuxerLog; +#define MP3LOG(msg, ...) \ + DDMOZ_LOG(gMediaDemuxerLog, LogLevel::Debug, msg, ##__VA_ARGS__) +#define MP3LOGV(msg, ...) \ + DDMOZ_LOG(gMediaDemuxerLog, LogLevel::Verbose, msg, ##__VA_ARGS__) + +using mozilla::media::TimeInterval; +using mozilla::media::TimeIntervals; +using mozilla::media::TimeUnit; + +namespace mozilla { + +// MP3Demuxer + +MP3Demuxer::MP3Demuxer(MediaResource* aSource) : mSource(aSource) { + DDLINKCHILD("source", aSource); +} + +bool MP3Demuxer::InitInternal() { + if (!mTrackDemuxer) { + mTrackDemuxer = new MP3TrackDemuxer(mSource); + DDLINKCHILD("track demuxer", mTrackDemuxer.get()); + } + return mTrackDemuxer->Init(); +} + +RefPtr<MP3Demuxer::InitPromise> MP3Demuxer::Init() { + if (!InitInternal()) { + MP3LOG("MP3Demuxer::Init() failure: waiting for data"); + + return InitPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_METADATA_ERR, + __func__); + } + + MP3LOG("MP3Demuxer::Init() successful"); + return InitPromise::CreateAndResolve(NS_OK, __func__); +} + +uint32_t MP3Demuxer::GetNumberTracks(TrackInfo::TrackType aType) const { + return aType == TrackInfo::kAudioTrack ? 1u : 0u; +} + +already_AddRefed<MediaTrackDemuxer> MP3Demuxer::GetTrackDemuxer( + TrackInfo::TrackType aType, uint32_t aTrackNumber) { + if (!mTrackDemuxer) { + return nullptr; + } + return RefPtr<MP3TrackDemuxer>(mTrackDemuxer).forget(); +} + +bool MP3Demuxer::IsSeekable() const { return true; } + +void MP3Demuxer::NotifyDataArrived() { + // TODO: bug 1169485. + NS_WARNING("Unimplemented function NotifyDataArrived"); + MP3LOGV("NotifyDataArrived()"); +} + +void MP3Demuxer::NotifyDataRemoved() { + // TODO: bug 1169485. + NS_WARNING("Unimplemented function NotifyDataRemoved"); + MP3LOGV("NotifyDataRemoved()"); +} + +// MP3TrackDemuxer + +MP3TrackDemuxer::MP3TrackDemuxer(MediaResource* aSource) + : mSource(aSource), + mFrameLock(false), + mOffset(0), + mFirstFrameOffset(0), + mNumParsedFrames(0), + mFrameIndex(0), + mTotalFrameLen(0), + mSamplesPerFrame(0), + mSamplesPerSecond(0), + mChannels(0) { + DDLINKCHILD("source", aSource); + Reset(); +} + +bool MP3TrackDemuxer::Init() { + Reset(); + FastSeek(TimeUnit()); + // Read the first frame to fetch sample rate and other meta data. + RefPtr<MediaRawData> frame(GetNextFrame(FindFirstFrame())); + + MP3LOG("Init StreamLength()=%" PRId64 " first-frame-found=%d", StreamLength(), + !!frame); + + if (!frame) { + return false; + } + + // Rewind back to the stream begin to avoid dropping the first frame. + FastSeek(TimeUnit()); + + if (!mInfo) { + mInfo = MakeUnique<AudioInfo>(); + } + + mInfo->mRate = mSamplesPerSecond; + mInfo->mChannels = mChannels; + mInfo->mBitDepth = 16; + mInfo->mMimeType = "audio/mpeg"; + mInfo->mDuration = Duration().valueOr(TimeUnit::FromInfinity()); + Mp3CodecSpecificData mp3CodecData{}; + if (mEncoderDelay) { + mp3CodecData.mEncoderDelayFrames = mEncoderDelay; + mp3CodecData.mEncoderPaddingFrames = mEncoderPadding; + } + mInfo->mCodecSpecificConfig = + AudioCodecSpecificVariant{std::move(mp3CodecData)}; + + MP3LOG("Init mInfo={mRate=%d mChannels=%d mBitDepth=%d mDuration=%s (%lfs)}", + mInfo->mRate, mInfo->mChannels, mInfo->mBitDepth, + mInfo->mDuration.ToString().get(), mInfo->mDuration.ToSeconds()); + + return mSamplesPerSecond && mChannels; +} + +media::TimeUnit MP3TrackDemuxer::SeekPosition() const { + TimeUnit pos = Duration(mFrameIndex); + auto duration = Duration(); + if (duration) { + pos = std::min(*duration, pos); + } + return pos; +} + +const FrameParser::Frame& MP3TrackDemuxer::LastFrame() const { + return mParser.PrevFrame(); +} + +RefPtr<MediaRawData> MP3TrackDemuxer::DemuxSample() { + return GetNextFrame(FindNextFrame()); +} + +const ID3Parser::ID3Header& MP3TrackDemuxer::ID3Header() const { + return mParser.ID3Header(); +} + +const FrameParser::VBRHeader& MP3TrackDemuxer::VBRInfo() const { + return mParser.VBRInfo(); +} + +UniquePtr<TrackInfo> MP3TrackDemuxer::GetInfo() const { return mInfo->Clone(); } + +RefPtr<MP3TrackDemuxer::SeekPromise> MP3TrackDemuxer::Seek( + const TimeUnit& aTime) { + mRemainingEncoderPadding = AssertedCast<int32_t>(mEncoderPadding); + // Efficiently seek to the position. + FastSeek(aTime); + // Correct seek position by scanning the next frames. + const TimeUnit seekTime = ScanUntil(aTime); + + return SeekPromise::CreateAndResolve(seekTime, __func__); +} + +TimeUnit MP3TrackDemuxer::FastSeek(const TimeUnit& aTime) { + MP3LOG("FastSeek(%" PRId64 ") avgFrameLen=%f mNumParsedFrames=%" PRIu64 + " mFrameIndex=%" PRId64 " mOffset=%" PRIu64, + aTime.ToMicroseconds(), AverageFrameLength(), mNumParsedFrames, + mFrameIndex, mOffset); + + const auto& vbr = mParser.VBRInfo(); + if (aTime.IsZero()) { + // Quick seek to the beginning of the stream. + mFrameIndex = 0; + } else if (vbr.IsTOCPresent() && Duration() && + *Duration() != TimeUnit::Zero()) { + // Use TOC for more precise seeking. + mFrameIndex = FrameIndexFromOffset(vbr.Offset(aTime, Duration().value())); + } else if (AverageFrameLength() > 0) { + mFrameIndex = FrameIndexFromTime(aTime); + } + + mOffset = OffsetFromFrameIndex(mFrameIndex); + + if (mOffset > mFirstFrameOffset && StreamLength() > 0) { + mOffset = std::min(StreamLength() - 1, mOffset); + } + + mParser.EndFrameSession(); + + MP3LOG("FastSeek End TOC=%d avgFrameLen=%f mNumParsedFrames=%" PRIu64 + " mFrameIndex=%" PRId64 " mFirstFrameOffset=%" PRId64 + " mOffset=%" PRIu64 " SL=%" PRId64 " NumBytes=%u", + vbr.IsTOCPresent(), AverageFrameLength(), mNumParsedFrames, + mFrameIndex, mFirstFrameOffset, mOffset, StreamLength(), + vbr.NumBytes().valueOr(0)); + + return Duration(mFrameIndex); +} + +TimeUnit MP3TrackDemuxer::ScanUntil(const TimeUnit& aTime) { + MP3LOG("ScanUntil(%" PRId64 ") avgFrameLen=%f mNumParsedFrames=%" PRIu64 + " mFrameIndex=%" PRId64 " mOffset=%" PRIu64, + aTime.ToMicroseconds(), AverageFrameLength(), mNumParsedFrames, + mFrameIndex, mOffset); + + if (aTime.IsZero()) { + return FastSeek(aTime); + } + + if (Duration(mFrameIndex) > aTime) { + // We've seeked past the target time, rewind back a little to correct it. + const int64_t rewind = aTime.ToMicroseconds() / 100; + FastSeek(aTime - TimeUnit::FromMicroseconds(rewind)); + } + + if (Duration(mFrameIndex + 1) > aTime) { + return SeekPosition(); + } + + MediaByteRange nextRange = FindNextFrame(); + while (SkipNextFrame(nextRange) && Duration(mFrameIndex + 1) < aTime) { + nextRange = FindNextFrame(); + MP3LOGV("ScanUntil* avgFrameLen=%f mNumParsedFrames=%" PRIu64 + " mFrameIndex=%" PRId64 " mOffset=%" PRIu64 " Duration=%" PRId64, + AverageFrameLength(), mNumParsedFrames, mFrameIndex, mOffset, + Duration(mFrameIndex + 1).ToMicroseconds()); + } + + MP3LOG("ScanUntil End avgFrameLen=%f mNumParsedFrames=%" PRIu64 + " mFrameIndex=%" PRId64 " mOffset=%" PRIu64, + AverageFrameLength(), mNumParsedFrames, mFrameIndex, mOffset); + + return SeekPosition(); +} + +RefPtr<MP3TrackDemuxer::SamplesPromise> MP3TrackDemuxer::GetSamples( + int32_t aNumSamples) { + MP3LOGV("GetSamples(%d) Begin mOffset=%" PRIu64 " mNumParsedFrames=%" PRIu64 + " mFrameIndex=%" PRId64 " mTotalFrameLen=%" PRIu64 + " mSamplesPerFrame=%d mSamplesPerSecond=%d mChannels=%d", + aNumSamples, mOffset, mNumParsedFrames, mFrameIndex, mTotalFrameLen, + mSamplesPerFrame, mSamplesPerSecond, mChannels); + + if (!aNumSamples) { + return SamplesPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_DEMUXER_ERR, + __func__); + } + + RefPtr<SamplesHolder> frames = new SamplesHolder(); + + while (aNumSamples--) { + RefPtr<MediaRawData> frame(GetNextFrame(FindNextFrame())); + if (!frame) { + break; + } + if (!frame->HasValidTime()) { + return SamplesPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_DEMUXER_ERR, + __func__); + } + frames->AppendSample(frame); + } + + MP3LOGV("GetSamples() End mSamples.Size()=%zu aNumSamples=%d mOffset=%" PRIu64 + " mNumParsedFrames=%" PRIu64 " mFrameIndex=%" PRId64 + " mTotalFrameLen=%" PRIu64 + " mSamplesPerFrame=%d mSamplesPerSecond=%d " + "mChannels=%d", + frames->GetSamples().Length(), aNumSamples, mOffset, mNumParsedFrames, + mFrameIndex, mTotalFrameLen, mSamplesPerFrame, mSamplesPerSecond, + mChannels); + + if (frames->GetSamples().IsEmpty()) { + return SamplesPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_END_OF_STREAM, + __func__); + } + return SamplesPromise::CreateAndResolve(frames, __func__); +} + +void MP3TrackDemuxer::Reset() { + MP3LOG("Reset()"); + + FastSeek(TimeUnit()); + mParser.Reset(); +} + +RefPtr<MP3TrackDemuxer::SkipAccessPointPromise> +MP3TrackDemuxer::SkipToNextRandomAccessPoint(const TimeUnit& aTimeThreshold) { + // Will not be called for audio-only resources. + return SkipAccessPointPromise::CreateAndReject( + SkipFailureHolder(NS_ERROR_DOM_MEDIA_DEMUXER_ERR, 0), __func__); +} + +int64_t MP3TrackDemuxer::GetResourceOffset() const { return mOffset; } + +TimeIntervals MP3TrackDemuxer::GetBuffered() { + AutoPinned<MediaResource> stream(mSource.GetResource()); + TimeIntervals buffered; + + if (Duration() && stream->IsDataCachedToEndOfResource(0)) { + // Special case completely cached files. This also handles local files. + buffered += TimeInterval(TimeUnit(), *Duration()); + MP3LOGV("buffered = [[%" PRId64 ", %" PRId64 "]]", + TimeUnit().ToMicroseconds(), Duration()->ToMicroseconds()); + return buffered; + } + + MediaByteRangeSet ranges; + nsresult rv = stream->GetCachedRanges(ranges); + NS_ENSURE_SUCCESS(rv, buffered); + + for (const auto& range : ranges) { + if (range.IsEmpty()) { + continue; + } + TimeUnit start = Duration(FrameIndexFromOffset(range.mStart)); + TimeUnit end = Duration(FrameIndexFromOffset(range.mEnd)); + MP3LOGV("buffered += [%" PRId64 ", %" PRId64 "]", start.ToMicroseconds(), + end.ToMicroseconds()); + buffered += TimeInterval(start, end); + } + + // If the number of frames reported by the header is valid, + // the duration calculated from it is the maximal duration. + if (ValidNumAudioFrames() && Duration()) { + TimeInterval duration = TimeInterval(TimeUnit(), *Duration()); + return buffered.Intersection(duration); + } + + return buffered; +} + +int64_t MP3TrackDemuxer::StreamLength() const { return mSource.GetLength(); } + +media::NullableTimeUnit NothingIfNegative(TimeUnit aDuration) { + if (aDuration.IsNegative()) { + return Nothing(); + } + return Some(aDuration); +} + +media::NullableTimeUnit MP3TrackDemuxer::Duration() const { + if (!mNumParsedFrames) { + return Nothing(); + } + + int64_t numFrames = 0; + const auto numAudioFrames = ValidNumAudioFrames(); + if (numAudioFrames) { + // VBR headers don't include the VBR header frame. + numFrames = numAudioFrames.value() + 1; + return NothingIfNegative(Duration(numFrames) - + (EncoderDelay() + Padding())); + } + + const int64_t streamLen = StreamLength(); + if (streamLen < 0) { // Live streams. + // Unknown length, we can't estimate duration. + return Nothing(); + } + // We can't early return when streamLen < 0 before checking numAudioFrames + // since some live radio will give an opening remark before playing music + // and the duration of the opening talk can be calculated by numAudioFrames. + + int64_t size = streamLen - mFirstFrameOffset; + MOZ_ASSERT(size); + + if (mParser.ID3v1MetadataFound() && size > 128) { + size -= 128; + } + + // If it's CBR, calculate the duration by bitrate. + if (!mParser.VBRInfo().IsValid()) { + const uint32_t bitrate = mParser.CurrentFrame().Header().Bitrate(); + return NothingIfNegative( + media::TimeUnit::FromSeconds(static_cast<double>(size) * 8 / bitrate)); + } + + if (AverageFrameLength() > 0) { + numFrames = std::lround(AssertedCast<double>(size) / AverageFrameLength()); + } + + return NothingIfNegative(Duration(numFrames) - (EncoderDelay() + Padding())); +} + +TimeUnit MP3TrackDemuxer::Duration(int64_t aNumFrames) const { + if (!mSamplesPerSecond) { + return TimeUnit::Invalid(); + } + + const int64_t frameCount = aNumFrames * mSamplesPerFrame; + return TimeUnit(frameCount, mSamplesPerSecond); +} + +MediaByteRange MP3TrackDemuxer::FindFirstFrame() { + // We attempt to find multiple successive frames to avoid locking onto a false + // positive if we're fed a stream that has been cut mid-frame. + // For compatibility reasons we have to use the same frame count as Chrome, + // since some web sites actually use a file that short to test our playback + // capabilities. + static const int MIN_SUCCESSIVE_FRAMES = 3; + mFrameLock = false; + + MediaByteRange candidateFrame = FindNextFrame(); + int numSuccFrames = candidateFrame.Length() > 0; + MediaByteRange currentFrame = candidateFrame; + MP3LOGV("FindFirst() first candidate frame: mOffset=%" PRIu64 + " Length()=%" PRIu64, + candidateFrame.mStart, candidateFrame.Length()); + + while (candidateFrame.Length()) { + mParser.EndFrameSession(); + mOffset = currentFrame.mEnd; + const MediaByteRange prevFrame = currentFrame; + + // FindNextFrame() here will only return frames consistent with our + // candidate frame. + currentFrame = FindNextFrame(); + numSuccFrames += currentFrame.Length() > 0; + // Multiple successive false positives, which wouldn't be caught by the + // consistency checks alone, can be detected by wrong alignment (non-zero + // gap between frames). + const int64_t frameSeparation = currentFrame.mStart - prevFrame.mEnd; + + if (!currentFrame.Length() || frameSeparation != 0) { + MP3LOGV( + "FindFirst() not enough successive frames detected, " + "rejecting candidate frame: successiveFrames=%d, last " + "Length()=%" PRIu64 ", last frameSeparation=%" PRId64, + numSuccFrames, currentFrame.Length(), frameSeparation); + + mParser.ResetFrameData(); + mOffset = candidateFrame.mStart + 1; + candidateFrame = FindNextFrame(); + numSuccFrames = candidateFrame.Length() > 0; + currentFrame = candidateFrame; + MP3LOGV("FindFirst() new candidate frame: mOffset=%" PRIu64 + " Length()=%" PRIu64, + candidateFrame.mStart, candidateFrame.Length()); + } else if (numSuccFrames >= MIN_SUCCESSIVE_FRAMES) { + MP3LOG( + "FindFirst() accepting candidate frame: " + "successiveFrames=%d", + numSuccFrames); + mFrameLock = true; + return candidateFrame; + } else if (prevFrame.mStart == mParser.TotalID3HeaderSize() && + currentFrame.mEnd == StreamLength()) { + // We accept streams with only two frames if both frames are valid. This + // is to handle very short files and provide parity with Chrome. See + // bug 1432195 for more information. This will not handle short files + // with a trailing tag, but as of writing we lack infrastructure to + // handle such tags. + MP3LOG( + "FindFirst() accepting candidate frame for short stream: " + "successiveFrames=%d", + numSuccFrames); + mFrameLock = true; + return candidateFrame; + } + } + + MP3LOG("FindFirst() no suitable first frame found"); + return candidateFrame; +} + +static bool VerifyFrameConsistency(const FrameParser::Frame& aFrame1, + const FrameParser::Frame& aFrame2) { + const auto& h1 = aFrame1.Header(); + const auto& h2 = aFrame2.Header(); + + return h1.IsValid() && h2.IsValid() && h1.Layer() == h2.Layer() && + h1.SlotSize() == h2.SlotSize() && + h1.SamplesPerFrame() == h2.SamplesPerFrame() && + h1.Channels() == h2.Channels() && h1.SampleRate() == h2.SampleRate() && + h1.RawVersion() == h2.RawVersion() && + h1.RawProtection() == h2.RawProtection(); +} + +MediaByteRange MP3TrackDemuxer::FindNextFrame() { + static const int BUFFER_SIZE = 64; + static const uint32_t MAX_SKIPPABLE_BYTES = 1024 * BUFFER_SIZE; + + MP3LOGV("FindNext() Begin mOffset=%" PRIu64 " mNumParsedFrames=%" PRIu64 + " mFrameIndex=%" PRId64 " mTotalFrameLen=%" PRIu64 + " mSamplesPerFrame=%d mSamplesPerSecond=%d mChannels=%d", + mOffset, mNumParsedFrames, mFrameIndex, mTotalFrameLen, + mSamplesPerFrame, mSamplesPerSecond, mChannels); + + uint8_t buffer[BUFFER_SIZE]; + uint32_t read = 0; + + bool foundFrame = false; + int64_t frameHeaderOffset = 0; + int64_t startOffset = mOffset; + const bool searchingForID3 = !mParser.ID3Header().HasSizeBeenSet(); + + // Check whether we've found a valid MPEG frame. + while (!foundFrame) { + // How many bytes we can go without finding a valid MPEG frame + // (effectively rounded up to the next full buffer size multiple, as we + // only check this before reading the next set of data into the buffer). + + // This default value of 0 will be used during testing whether we're being + // fed a valid stream, which shouldn't have any gaps between frames. + uint32_t maxSkippableBytes = 0; + + if (!mParser.FirstFrame().Length()) { + // We're looking for the first valid frame. A well-formed file should + // have its first frame header right at the start (skipping an ID3 tag + // if necessary), but in order to support files that might have been + // improperly cut, we search the first few kB for a frame header. + maxSkippableBytes = MAX_SKIPPABLE_BYTES; + // Since we're counting the skipped bytes from the offset we started + // this parsing session with, we need to discount the ID3 tag size only + // if we were looking for one during the current frame parsing session. + if (searchingForID3) { + maxSkippableBytes += mParser.TotalID3HeaderSize(); + } + } else if (mFrameLock) { + // We've found a valid MPEG stream, so don't impose any limits + // to allow skipping corrupted data until we hit EOS. + maxSkippableBytes = std::numeric_limits<uint32_t>::max(); + } + + if ((mOffset - startOffset > maxSkippableBytes) || + (read = Read(buffer, mOffset, BUFFER_SIZE)) == 0) { + MP3LOG( + "FindNext() EOS or exceeded maxSkippeableBytes without a frame " + "(read: %d)", + read); + // This is not a valid MPEG audio stream or we've reached EOS, give up. + break; + } + + BufferReader reader(buffer, read); + uint32_t bytesToSkip = 0; + auto res = mParser.Parse(&reader, &bytesToSkip); + foundFrame = res.unwrapOr(false); + int64_t readerOffset = static_cast<int64_t>(reader.Offset()); + frameHeaderOffset = mOffset + readerOffset - FrameParser::FrameHeader::SIZE; + + // If we've found neither an MPEG frame header nor an ID3v2 tag, + // the reader shouldn't have any bytes remaining. + MOZ_ASSERT(foundFrame || bytesToSkip || !reader.Remaining()); + + if (foundFrame && mParser.FirstFrame().Length() && + !VerifyFrameConsistency(mParser.FirstFrame(), mParser.CurrentFrame())) { + MP3LOG("Skipping frame"); + // We've likely hit a false-positive, ignore it and proceed with the + // search for the next valid frame. + foundFrame = false; + mOffset = frameHeaderOffset + 1; + mParser.EndFrameSession(); + } else { + // Advance mOffset by the amount of bytes read and if necessary, + // skip an ID3v2 tag which stretches beyond the current buffer. + NS_ENSURE_TRUE(mOffset + read + bytesToSkip > mOffset, + MediaByteRange(0, 0)); + mOffset += static_cast<int64_t>(read + bytesToSkip); + } + } + + if (StreamLength() != -1) { + mEOS = frameHeaderOffset + mParser.CurrentFrame().Length() + BUFFER_SIZE > + StreamLength(); + } + + if (!foundFrame || !mParser.CurrentFrame().Length()) { + MP3LOG("FindNext() Exit foundFrame=%d mParser.CurrentFrame().Length()=%d ", + foundFrame, mParser.CurrentFrame().Length()); + return {0, 0}; + } + + MP3LOGV("FindNext() End mOffset=%" PRIu64 " mNumParsedFrames=%" PRIu64 + " mFrameIndex=%" PRId64 " frameHeaderOffset=%" PRId64 + " mTotalFrameLen=%" PRIu64 + " mSamplesPerFrame=%d mSamplesPerSecond=%d" + " mChannels=%d, mEOS=%s", + mOffset, mNumParsedFrames, mFrameIndex, frameHeaderOffset, + mTotalFrameLen, mSamplesPerFrame, mSamplesPerSecond, mChannels, + mEOS ? "true" : "false"); + + return {frameHeaderOffset, + frameHeaderOffset + mParser.CurrentFrame().Length()}; +} + +bool MP3TrackDemuxer::SkipNextFrame(const MediaByteRange& aRange) { + if (!mNumParsedFrames || !aRange.Length()) { + // We can't skip the first frame, since it could contain VBR headers. + RefPtr<MediaRawData> frame(GetNextFrame(aRange)); + return frame; + } + + UpdateState(aRange); + + MP3LOGV("SkipNext() End mOffset=%" PRIu64 " mNumParsedFrames=%" PRIu64 + " mFrameIndex=%" PRId64 " mTotalFrameLen=%" PRIu64 + " mSamplesPerFrame=%d mSamplesPerSecond=%d mChannels=%d", + mOffset, mNumParsedFrames, mFrameIndex, mTotalFrameLen, + mSamplesPerFrame, mSamplesPerSecond, mChannels); + + return true; +} + +media::TimeUnit MP3TrackDemuxer::EncoderDelay() const { + return media::TimeUnit(mEncoderDelay, mSamplesPerSecond); +} + +uint32_t MP3TrackDemuxer::EncoderDelayFrames() const { return mEncoderDelay; } + +media::TimeUnit MP3TrackDemuxer::Padding() const { + return media::TimeUnit(mEncoderPadding, mSamplesPerSecond); +} + +uint32_t MP3TrackDemuxer::PaddingFrames() const { return mEncoderPadding; } + +already_AddRefed<MediaRawData> MP3TrackDemuxer::GetNextFrame( + const MediaByteRange& aRange) { + MP3LOG("GetNext() Begin({mStart=%" PRId64 " Length()=%" PRId64 "})", + aRange.mStart, aRange.Length()); + if (!aRange.Length()) { + return nullptr; + } + + RefPtr<MediaRawData> frame = new MediaRawData(); + frame->mOffset = aRange.mStart; + + UniquePtr<MediaRawDataWriter> frameWriter(frame->CreateWriter()); + if (!frameWriter->SetSize(static_cast<size_t>(aRange.Length()))) { + MP3LOG("GetNext() Exit failed to allocated media buffer"); + return nullptr; + } + + const uint32_t read = + Read(frameWriter->Data(), frame->mOffset, frame->Size()); + + if (read != aRange.Length()) { + MP3LOG("GetNext() Exit read=%u frame->Size()=%zu", read, frame->Size()); + return nullptr; + } + + UpdateState(aRange); + + if (mNumParsedFrames == 1) { + // First frame parsed, let's read VBR info if available. + BufferReader reader(frame->Data(), frame->Size()); + mFirstFrameOffset = frame->mOffset; + + if (mParser.ParseVBRHeader(&reader)) { + // Parsing was successful + if (mParser.VBRInfo().Type() == FrameParser::VBRHeader::XING) { + MP3LOG("XING header present, skipping encoder delay (%u frames)", + mParser.VBRInfo().EncoderDelay()); + mEncoderDelay = mParser.VBRInfo().EncoderDelay(); + mEncoderPadding = mParser.VBRInfo().EncoderPadding(); + // Padding is encoded as a 12-bit unsigned number so this is fine. + mRemainingEncoderPadding = AssertedCast<int32_t>(mEncoderPadding); + if (mEncoderDelay == 0) { + // Skip the VBR frame + the decoder delay, that is always 529 frames + // in practice for the decoder we're using. + mEncoderDelay = mSamplesPerFrame + 529; + MP3LOG( + "No explicit delay present in vbr header, delay is assumed to be " + "%u frames\n", + mEncoderDelay); + } + } else if (mParser.VBRInfo().Type() == FrameParser::VBRHeader::VBRI) { + MP3LOG("VBRI header present, skipping encoder delay (%u frames)", + mParser.VBRInfo().EncoderDelay()); + mEncoderDelay = mParser.VBRInfo().EncoderDelay(); + } + } + } + + TimeUnit rawPts = Duration(mFrameIndex - 1) - EncoderDelay(); + TimeUnit rawDuration = Duration(1); + TimeUnit rawEnd = rawPts + rawDuration; + + frame->mTime = std::max(TimeUnit::Zero(mSamplesPerSecond), rawPts); + + frame->mDuration = Duration(1); + frame->mTimecode = frame->mTime; + frame->mKeyframe = true; + frame->mEOS = mEOS; + + // Handle decoder delay. A packet must be trimmed if its pts, adjusted for + // decoder delay, is negative. A packet can be trimmed entirely. + if (rawPts.IsNegative()) { + frame->mDuration = + std::max(TimeUnit::Zero(mSamplesPerSecond), rawEnd - frame->mTime); + } + + // It's possible to create an mp3 file that has a padding value that somehow + // spans multiple packets. In that case the duration is probably known, + // because it's probably a VBR file with a XING header (that has a duration + // field). Use the duration to be able to set the correct duration on + // packets that aren't the last one. + // For most files, the padding is less than a packet, it's simply substracted. + if (mParser.VBRInfo().Type() == FrameParser::VBRHeader::XING && + mRemainingEncoderPadding > 0 && + frame->GetEndTime() > Duration().valueOr(TimeUnit::FromInfinity())) { + TimeUnit duration = Duration().value(); + TimeUnit inPaddingZone = frame->GetEndTime() - duration; + TimeUnit originalEnd = frame->GetEndTime(); + TimeUnit originalPts = frame->mTime; + frame->mDuration -= inPaddingZone; + // Packet is entirely padding and will be completely discarded by the + // decoder. + if (frame->mDuration.IsNegative()) { + frame->mDuration = TimeUnit::Zero(mSamplesPerSecond); + } + int32_t paddingFrames = + AssertedCast<int32_t>(inPaddingZone.ToTicksAtRate(mSamplesPerSecond)); + if (mRemainingEncoderPadding >= paddingFrames) { + mRemainingEncoderPadding -= paddingFrames; + } else { + mRemainingEncoderPadding = 0; + } + MP3LOG("Trimming [%s, %s] to [%s,%s] (padding) (stream duration: %s)", + originalPts.ToString().get(), originalEnd.ToString().get(), + frame->mTime.ToString().get(), frame->GetEndTime().ToString().get(), + duration.ToString().get()); + } else if (frame->mEOS && + mRemainingEncoderPadding <= + frame->mDuration.ToTicksAtRate(mSamplesPerSecond)) { + frame->mDuration -= TimeUnit(mRemainingEncoderPadding, mSamplesPerSecond); + MOZ_ASSERT(frame->mDuration.IsPositiveOrZero()); + MP3LOG("Trimming last packet %s to [%s,%s]", Padding().ToString().get(), + frame->mTime.ToString().get(), frame->GetEndTime().ToString().get()); + } + + MP3LOGV("GetNext() End mOffset=%" PRIu64 " mNumParsedFrames=%" PRIu64 + " mFrameIndex=%" PRId64 " mTotalFrameLen=%" PRIu64 + " mSamplesPerFrame=%d mSamplesPerSecond=%d mChannels=%d, mEOS=%s", + mOffset, mNumParsedFrames, mFrameIndex, mTotalFrameLen, + mSamplesPerFrame, mSamplesPerSecond, mChannels, + mEOS ? "true" : "false"); + + // It's possible for the duration of a frame to be zero if the frame is to be + // trimmed entirely because it's fully comprised of decoder delay samples. + // This is common at the beginning of an stream. + MOZ_ASSERT(frame->mDuration.IsPositiveOrZero()); + + MP3LOG("Packet demuxed: pts [%s, %s] (duration: %s)", + frame->mTime.ToString().get(), frame->GetEndTime().ToString().get(), + frame->mDuration.ToString().get()); + + // Indicate original packet information to trim after decoding. + if (frame->mDuration != rawDuration) { + frame->mOriginalPresentationWindow = Some(TimeInterval{rawPts, rawEnd}); + MP3LOG("Total packet time excluding trimming: [%s, %s]", + rawPts.ToString().get(), rawEnd.ToString().get()); + } + + return frame.forget(); +} + +int64_t MP3TrackDemuxer::OffsetFromFrameIndex(int64_t aFrameIndex) const { + int64_t offset = 0; + const auto& vbr = mParser.VBRInfo(); + + if (vbr.IsComplete()) { + offset = mFirstFrameOffset + aFrameIndex * vbr.NumBytes().value() / + vbr.NumAudioFrames().value(); + } else if (AverageFrameLength() > 0) { + offset = mFirstFrameOffset + + AssertedCast<int64_t>(static_cast<float>(aFrameIndex) * + AverageFrameLength()); + } + + MP3LOGV("OffsetFromFrameIndex(%" PRId64 ") -> %" PRId64, aFrameIndex, offset); + return std::max<int64_t>(mFirstFrameOffset, offset); +} + +int64_t MP3TrackDemuxer::FrameIndexFromOffset(int64_t aOffset) const { + int64_t frameIndex = 0; + const auto& vbr = mParser.VBRInfo(); + + if (vbr.IsComplete()) { + frameIndex = + AssertedCast<int64_t>(static_cast<float>(aOffset - mFirstFrameOffset) / + static_cast<float>(vbr.NumBytes().value()) * + static_cast<float>(vbr.NumAudioFrames().value())); + frameIndex = std::min<int64_t>(vbr.NumAudioFrames().value(), frameIndex); + } else if (AverageFrameLength() > 0) { + frameIndex = AssertedCast<int64_t>( + static_cast<float>(aOffset - mFirstFrameOffset) / AverageFrameLength()); + } + + MP3LOGV("FrameIndexFromOffset(%" PRId64 ") -> %" PRId64, aOffset, frameIndex); + return std::max<int64_t>(0, frameIndex); +} + +int64_t MP3TrackDemuxer::FrameIndexFromTime( + const media::TimeUnit& aTime) const { + int64_t frameIndex = 0; + if (mSamplesPerSecond > 0 && mSamplesPerFrame > 0) { + frameIndex = AssertedCast<int64_t>( + aTime.ToSeconds() * mSamplesPerSecond / mSamplesPerFrame - 1); + } + + MP3LOGV("FrameIndexFromOffset(%fs) -> %" PRId64, aTime.ToSeconds(), + frameIndex); + return std::max<int64_t>(0, frameIndex); +} + +void MP3TrackDemuxer::UpdateState(const MediaByteRange& aRange) { + // Prevent overflow. + if (mTotalFrameLen + aRange.Length() < mTotalFrameLen) { + // These variables have a linear dependency and are only used to derive the + // average frame length. + mTotalFrameLen /= 2; + mNumParsedFrames /= 2; + } + + // Full frame parsed, move offset to its end. + mOffset = aRange.mEnd; + + mTotalFrameLen += aRange.Length(); + + if (!mSamplesPerFrame) { + mSamplesPerFrame = mParser.CurrentFrame().Header().SamplesPerFrame(); + mSamplesPerSecond = mParser.CurrentFrame().Header().SampleRate(); + mChannels = mParser.CurrentFrame().Header().Channels(); + } + + ++mNumParsedFrames; + ++mFrameIndex; + MOZ_ASSERT(mFrameIndex > 0); + + // Prepare the parser for the next frame parsing session. + mParser.EndFrameSession(); +} + +uint32_t MP3TrackDemuxer::Read(uint8_t* aBuffer, int64_t aOffset, + uint32_t aSize) { + MP3LOGV("MP3TrackDemuxer::Read(%p %" PRId64 " %d)", aBuffer, aOffset, aSize); + + const int64_t streamLen = StreamLength(); + if (mInfo && streamLen > 0) { + // Prevent blocking reads after successful initialization. + int64_t max = streamLen > aOffset ? streamLen - aOffset : 0; + aSize = std::min<int64_t>(aSize, max); + } + + uint32_t read = 0; + MP3LOGV("MP3TrackDemuxer::Read -> ReadAt(%u)", aSize); + const nsresult rv = mSource.ReadAt(aOffset, reinterpret_cast<char*>(aBuffer), + static_cast<uint32_t>(aSize), &read); + NS_ENSURE_SUCCESS(rv, 0); + return read; +} + +double MP3TrackDemuxer::AverageFrameLength() const { + if (mNumParsedFrames) { + return static_cast<double>(mTotalFrameLen) / + static_cast<double>(mNumParsedFrames); + } + const auto& vbr = mParser.VBRInfo(); + if (vbr.IsComplete() && vbr.NumAudioFrames().value() + 1) { + return static_cast<double>(vbr.NumBytes().value()) / + (vbr.NumAudioFrames().value() + 1); + } + return 0.0; +} + +Maybe<uint32_t> MP3TrackDemuxer::ValidNumAudioFrames() const { + return mParser.VBRInfo().IsValid() && + mParser.VBRInfo().NumAudioFrames().valueOr(0) + 1 > 1 + ? mParser.VBRInfo().NumAudioFrames() + : Nothing(); +} + +} // namespace mozilla + +#undef MP3LOG +#undef MP3LOGV diff --git a/dom/media/mp3/MP3Demuxer.h b/dom/media/mp3/MP3Demuxer.h new file mode 100644 index 0000000000..5189e82acf --- /dev/null +++ b/dom/media/mp3/MP3Demuxer.h @@ -0,0 +1,187 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MP3_DEMUXER_H_ +#define MP3_DEMUXER_H_ + +#include "MediaDataDemuxer.h" +#include "MediaResource.h" +#include "MP3FrameParser.h" + +namespace mozilla { + +class MP3TrackDemuxer; + +DDLoggedTypeDeclNameAndBase(MP3Demuxer, MediaDataDemuxer); +DDLoggedTypeNameAndBase(MP3TrackDemuxer, MediaTrackDemuxer); + +class MP3Demuxer : public MediaDataDemuxer, + public DecoderDoctorLifeLogger<MP3Demuxer> { + public: + // MediaDataDemuxer interface. + explicit MP3Demuxer(MediaResource* aSource); + RefPtr<InitPromise> Init() override; + uint32_t GetNumberTracks(TrackInfo::TrackType aType) const override; + already_AddRefed<MediaTrackDemuxer> GetTrackDemuxer( + TrackInfo::TrackType aType, uint32_t aTrackNumber) override; + bool IsSeekable() const override; + void NotifyDataArrived() override; + void NotifyDataRemoved() override; + + private: + // Synchronous initialization. + bool InitInternal(); + + RefPtr<MediaResource> mSource; + RefPtr<MP3TrackDemuxer> mTrackDemuxer; +}; + +// The MP3 demuxer used to extract MPEG frames and side information out of +// MPEG streams. +class MP3TrackDemuxer : public MediaTrackDemuxer, + public DecoderDoctorLifeLogger<MP3TrackDemuxer> { + public: + // Constructor, expecting a valid media resource. + explicit MP3TrackDemuxer(MediaResource* aSource); + + // Initializes the track demuxer by reading the first frame for meta data. + // Returns initialization success state. + bool Init(); + + // Returns the total stream length if known, -1 otherwise. + int64_t StreamLength() const; + + // Returns the estimated stream duration, or a 0-duration if unknown. + media::NullableTimeUnit Duration() const; + + // Returns the estimated duration up to the given frame number, + // or a 0-duration if unknown. + media::TimeUnit Duration(int64_t aNumFrames) const; + + // Returns the estimated current seek position time. + media::TimeUnit SeekPosition() const; + + const FrameParser::Frame& LastFrame() const; + RefPtr<MediaRawData> DemuxSample(); + + const ID3Parser::ID3Header& ID3Header() const; + const FrameParser::VBRHeader& VBRInfo() const; + + // MediaTrackDemuxer interface. + UniquePtr<TrackInfo> GetInfo() const override; + RefPtr<SeekPromise> Seek(const media::TimeUnit& aTime) override; + RefPtr<SamplesPromise> GetSamples(int32_t aNumSamples = 1) override; + void Reset() override; + RefPtr<SkipAccessPointPromise> SkipToNextRandomAccessPoint( + const media::TimeUnit& aTimeThreshold) override; + int64_t GetResourceOffset() const override; + media::TimeIntervals GetBuffered() override; + // Return the duration in frames of the encoder delay. + uint32_t EncoderDelayFrames() const; + // Return the duration in frames of the padding. + uint32_t PaddingFrames() const; + + private: + // Destructor. + ~MP3TrackDemuxer() = default; + + // Fast approximate seeking to given time. + media::TimeUnit FastSeek(const media::TimeUnit& aTime); + + // Seeks by scanning the stream up to the given time for more accurate + // results. + media::TimeUnit ScanUntil(const media::TimeUnit& aTime); + + // Finds the first valid frame and returns its byte range if found + // or a null-byte range otherwise. + MediaByteRange FindFirstFrame(); + + // Finds the next valid frame and returns its byte range if found + // or a null-byte range otherwise. + MediaByteRange FindNextFrame(); + + // Skips the next frame given the provided byte range. + bool SkipNextFrame(const MediaByteRange& aRange); + + // Returns the next MPEG frame, if available. + already_AddRefed<MediaRawData> GetNextFrame(const MediaByteRange& aRange); + + // Updates post-read meta data. + void UpdateState(const MediaByteRange& aRange); + + // Returns the estimated offset for the given frame index. + int64_t OffsetFromFrameIndex(int64_t aFrameIndex) const; + + // Returns the estimated frame index for the given offset. + int64_t FrameIndexFromOffset(int64_t aOffset) const; + + // Returns the estimated frame index for the given time. + int64_t FrameIndexFromTime(const media::TimeUnit& aTime) const; + + // Reads aSize bytes into aBuffer from the source starting at aOffset. + // Returns the actual size read. + uint32_t Read(uint8_t* aBuffer, int64_t aOffset, uint32_t aSize); + + // Returns the average frame length derived from the previously parsed frames. + double AverageFrameLength() const; + + // Returns the number of frames reported by the header if it's valid. Nothing + // otherwise. + Maybe<uint32_t> ValidNumAudioFrames() const; + + // Return the duration of the encoder delay. + media::TimeUnit EncoderDelay() const; + + // Return the duration of the padding. + media::TimeUnit Padding() const; + + // The (hopefully) MPEG resource. + MediaResourceIndex mSource; + + // MPEG frame parser used to detect frames and extract side info. + FrameParser mParser; + + // Whether we've locked onto a valid sequence of frames or not. + bool mFrameLock; + + // Current byte offset in the source stream. + int64_t mOffset; + + // Byte offset of the begin of the first frame, or 0 if none parsed yet. + int64_t mFirstFrameOffset; + + // Total parsed frames. + uint64_t mNumParsedFrames; + + // Current frame index. + int64_t mFrameIndex; + + // Sum of parsed frames' lengths in bytes. + int64_t mTotalFrameLen; + + // Samples per frame metric derived from frame headers or 0 if none available. + uint32_t mSamplesPerFrame; + + // Samples per second metric derived from frame headers or 0 if none + // available. + uint32_t mSamplesPerSecond; + + // Channel count derived from frame headers or 0 if none available. + uint32_t mChannels; + + // Audio track config info. + UniquePtr<AudioInfo> mInfo; + + // Number of frames to skip at the beginning + uint32_t mEncoderDelay = 0; + // Number of frames to skip at the end + uint32_t mEncoderPadding = 0; + int32_t mRemainingEncoderPadding = 0; + // End of stream has been found + bool mEOS = false; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/mp3/MP3FrameParser.cpp b/dom/media/mp3/MP3FrameParser.cpp new file mode 100644 index 0000000000..9701aa2f8f --- /dev/null +++ b/dom/media/mp3/MP3FrameParser.cpp @@ -0,0 +1,817 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "MP3FrameParser.h" + +#include <algorithm> +#include <inttypes.h> + +#include "TimeUnits.h" +#include "mozilla/Assertions.h" +#include "mozilla/EndianUtils.h" +#include "mozilla/ResultExtensions.h" +#include "mozilla/ScopeExit.h" +#include "VideoUtils.h" + +extern mozilla::LazyLogModule gMediaDemuxerLog; +#define MP3LOG(msg, ...) \ + MOZ_LOG(gMediaDemuxerLog, LogLevel::Debug, ("MP3Demuxer " msg, ##__VA_ARGS__)) +#define MP3LOGV(msg, ...) \ + MOZ_LOG(gMediaDemuxerLog, LogLevel::Verbose, \ + ("MP3Demuxer " msg, ##__VA_ARGS__)) + +namespace mozilla { + +// FrameParser + +namespace frame_header { +// FrameHeader mRaw byte offsets. +static const int SYNC1 = 0; +static const int SYNC2_VERSION_LAYER_PROTECTION = 1; +static const int BITRATE_SAMPLERATE_PADDING_PRIVATE = 2; +static const int CHANNELMODE_MODEEXT_COPY_ORIG_EMPH = 3; +} // namespace frame_header + +FrameParser::FrameParser() = default; + +void FrameParser::Reset() { + mID3Parser.Reset(); + mFrame.Reset(); +} + +void FrameParser::ResetFrameData() { + mFrame.Reset(); + mFirstFrame.Reset(); + mPrevFrame.Reset(); +} + +void FrameParser::EndFrameSession() { + if (!mID3Parser.Header().IsValid()) { + // Reset ID3 tags only if we have not parsed a valid ID3 header yet. + mID3Parser.Reset(); + } + mPrevFrame = mFrame; + mFrame.Reset(); +} + +const FrameParser::Frame& FrameParser::CurrentFrame() const { return mFrame; } + +const FrameParser::Frame& FrameParser::PrevFrame() const { return mPrevFrame; } + +const FrameParser::Frame& FrameParser::FirstFrame() const { + return mFirstFrame; +} + +const ID3Parser::ID3Header& FrameParser::ID3Header() const { + return mID3Parser.Header(); +} + +uint32_t FrameParser::TotalID3HeaderSize() const { + uint32_t ID3v1Size = 0; + if (mID3v1MetadataFound) { + ID3v1Size = 128; + } + return ID3v1Size + mID3Parser.TotalHeadersSize(); +} + +const FrameParser::VBRHeader& FrameParser::VBRInfo() const { + return mVBRHeader; +} + +Result<bool, nsresult> FrameParser::Parse(BufferReader* aReader, + uint32_t* aBytesToSkip) { + MOZ_ASSERT(aReader && aBytesToSkip); + *aBytesToSkip = 0; + + if (ID3Parser::IsBufferStartingWithID3v1Tag(aReader)) { + // This is usually at the end of the file, and is always 128 bytes, that + // can simply be skipped. + aReader->Read(128); + *aBytesToSkip = 128; + mID3v1MetadataFound = true; + MP3LOGV("ID3v1 tag detected, skipping 128 bytes past the current buffer"); + return false; + } + + if (ID3Parser::IsBufferStartingWithID3Tag(aReader) && !mFirstFrame.Length()) { + // No MP3 frames have been parsed yet, look for ID3v2 headers at file begin. + // ID3v1 tags may only be at file end. + const size_t prevReaderOffset = aReader->Offset(); + const uint32_t tagSize = mID3Parser.Parse(aReader); + if (!!tagSize) { + // ID3 tag found, skip past it. + const uint32_t skipSize = tagSize - ID3Parser::ID3Header::SIZE; + + if (skipSize > aReader->Remaining()) { + // Skipping across the ID3v2 tag would take us past the end of the + // buffer, therefore we return immediately and let the calling function + // handle skipping the rest of the tag. + MP3LOGV( + "ID3v2 tag detected, size=%d," + " needing to skip %zu bytes past the current buffer", + tagSize, skipSize - aReader->Remaining()); + *aBytesToSkip = skipSize - aReader->Remaining(); + return false; + } + MP3LOGV("ID3v2 tag detected, size=%d", tagSize); + aReader->Read(skipSize); + } else { + // No ID3v2 tag found, rewinding reader in order to search for a MPEG + // frame header. + aReader->Seek(prevReaderOffset); + } + } + + for (auto res = aReader->ReadU8(); + res.isOk() && !mFrame.ParseNext(res.unwrap()); res = aReader->ReadU8()) { + } + + if (mFrame.Length()) { + // MP3 frame found. + if (!mFirstFrame.Length()) { + mFirstFrame = mFrame; + } + // Indicate success. + return true; + } + return false; +} + +// FrameParser::Header + +FrameParser::FrameHeader::FrameHeader() { Reset(); } + +uint8_t FrameParser::FrameHeader::Sync1() const { + return mRaw[frame_header::SYNC1]; +} + +uint8_t FrameParser::FrameHeader::Sync2() const { + return 0x7 & mRaw[frame_header::SYNC2_VERSION_LAYER_PROTECTION] >> 5; +} + +uint8_t FrameParser::FrameHeader::RawVersion() const { + return 0x3 & mRaw[frame_header::SYNC2_VERSION_LAYER_PROTECTION] >> 3; +} + +uint8_t FrameParser::FrameHeader::RawLayer() const { + return 0x3 & mRaw[frame_header::SYNC2_VERSION_LAYER_PROTECTION] >> 1; +} + +uint8_t FrameParser::FrameHeader::RawProtection() const { + return 0x1 & mRaw[frame_header::SYNC2_VERSION_LAYER_PROTECTION] >> 6; +} + +uint8_t FrameParser::FrameHeader::RawBitrate() const { + return 0xF & mRaw[frame_header::BITRATE_SAMPLERATE_PADDING_PRIVATE] >> 4; +} + +uint8_t FrameParser::FrameHeader::RawSampleRate() const { + return 0x3 & mRaw[frame_header::BITRATE_SAMPLERATE_PADDING_PRIVATE] >> 2; +} + +uint8_t FrameParser::FrameHeader::Padding() const { + return 0x1 & mRaw[frame_header::BITRATE_SAMPLERATE_PADDING_PRIVATE] >> 1; +} + +uint8_t FrameParser::FrameHeader::Private() const { + return 0x1 & mRaw[frame_header::BITRATE_SAMPLERATE_PADDING_PRIVATE]; +} + +uint8_t FrameParser::FrameHeader::RawChannelMode() const { + return 0x3 & mRaw[frame_header::CHANNELMODE_MODEEXT_COPY_ORIG_EMPH] >> 6; +} + +uint32_t FrameParser::FrameHeader::Layer() const { + static const uint8_t LAYERS[4] = {0, 3, 2, 1}; + + return LAYERS[RawLayer()]; +} + +uint32_t FrameParser::FrameHeader::SampleRate() const { + // Sample rates - use [version][srate] + static const uint16_t SAMPLE_RATE[4][4] = { + // clang-format off + { 11025, 12000, 8000, 0 }, // MPEG 2.5 + { 0, 0, 0, 0 }, // Reserved + { 22050, 24000, 16000, 0 }, // MPEG 2 + { 44100, 48000, 32000, 0 } // MPEG 1 + // clang-format on + }; + + return SAMPLE_RATE[RawVersion()][RawSampleRate()]; +} + +uint32_t FrameParser::FrameHeader::Channels() const { + // 3 is single channel (mono), any other value is some variant of dual + // channel. + return RawChannelMode() == 3 ? 1 : 2; +} + +uint32_t FrameParser::FrameHeader::SamplesPerFrame() const { + // Samples per frame - use [version][layer] + static const uint16_t FRAME_SAMPLE[4][4] = { + // clang-format off + // Layer 3 2 1 Version + { 0, 576, 1152, 384 }, // 2.5 + { 0, 0, 0, 0 }, // Reserved + { 0, 576, 1152, 384 }, // 2 + { 0, 1152, 1152, 384 } // 1 + // clang-format on + }; + + return FRAME_SAMPLE[RawVersion()][RawLayer()]; +} + +uint32_t FrameParser::FrameHeader::Bitrate() const { + // Bitrates - use [version][layer][bitrate] + static const uint16_t BITRATE[4][4][16] = { + // clang-format off + { // Version 2.5 + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Reserved + { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0 }, // Layer 3 + { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0 }, // Layer 2 + { 0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, 0 } // Layer 1 + }, + { // Reserved + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Invalid + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Invalid + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Invalid + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } // Invalid + }, + { // Version 2 + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Reserved + { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0 }, // Layer 3 + { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0 }, // Layer 2 + { 0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, 0 } // Layer 1 + }, + { // Version 1 + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Reserved + { 0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 0 }, // Layer 3 + { 0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 0 }, // Layer 2 + { 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 0 }, // Layer 1 + } + // clang-format on + }; + + return 1000 * BITRATE[RawVersion()][RawLayer()][RawBitrate()]; +} + +uint32_t FrameParser::FrameHeader::SlotSize() const { + // Slot size (MPEG unit of measurement) - use [layer] + static const uint8_t SLOT_SIZE[4] = {0, 1, 1, 4}; // Rsvd, 3, 2, 1 + + return SLOT_SIZE[RawLayer()]; +} + +bool FrameParser::FrameHeader::ParseNext(uint8_t c) { + if (!Update(c)) { + Reset(); + if (!Update(c)) { + Reset(); + } + } + return IsValid(); +} + +bool FrameParser::ID3v1MetadataFound() const { return mID3v1MetadataFound; } + +bool FrameParser::FrameHeader::IsValid(int aPos) const { + if (aPos >= SIZE) { + return true; + } + if (aPos == frame_header::SYNC1) { + return Sync1() == 0xFF; + } + if (aPos == frame_header::SYNC2_VERSION_LAYER_PROTECTION) { + return Sync2() == 7 && RawVersion() != 1 && Layer() == 3; + } + if (aPos == frame_header::BITRATE_SAMPLERATE_PADDING_PRIVATE) { + return RawBitrate() != 0xF && RawBitrate() != 0 && RawSampleRate() != 3; + } + return true; +} + +bool FrameParser::FrameHeader::IsValid() const { return mPos >= SIZE; } + +void FrameParser::FrameHeader::Reset() { mPos = 0; } + +bool FrameParser::FrameHeader::Update(uint8_t c) { + if (mPos < SIZE) { + mRaw[mPos] = c; + } + return IsValid(mPos++); +} + +// FrameParser::VBRHeader + +namespace vbr_header { +static const char* TYPE_STR[3] = {"NONE", "XING", "VBRI"}; +static const uint32_t TOC_SIZE = 100; +} // namespace vbr_header + +FrameParser::VBRHeader::VBRHeader() : mType(NONE) {} + +FrameParser::VBRHeader::VBRHeaderType FrameParser::VBRHeader::Type() const { + return mType; +} + +const Maybe<uint32_t>& FrameParser::VBRHeader::NumAudioFrames() const { + return mNumAudioFrames; +} + +const Maybe<uint32_t>& FrameParser::VBRHeader::NumBytes() const { + return mNumBytes; +} + +const Maybe<uint32_t>& FrameParser::VBRHeader::Scale() const { return mScale; } + +bool FrameParser::VBRHeader::IsTOCPresent() const { + // This doesn't use VBRI TOC + return !mTOC.empty() && mType != VBRI; +} + +bool FrameParser::VBRHeader::IsValid() const { return mType != NONE; } + +bool FrameParser::VBRHeader::IsComplete() const { + return IsValid() && mNumAudioFrames.valueOr(0) > 0 && mNumBytes.valueOr(0) > 0 + // We don't care about the scale for any computations here. + // && mScale < 101 + ; +} + +int64_t FrameParser::VBRHeader::Offset(media::TimeUnit aTime, + media::TimeUnit aDuration) const { + if (!IsTOCPresent()) { + return -1; + } + + int64_t offset = -1; + if (mType == XING) { + // Constrain the duration percentage to [0, 99]. + double percent = 100. * aTime.ToSeconds() / aDuration.ToSeconds(); + const double durationPer = std::clamp(percent, 0., 99.); + double integer; + const double fractional = modf(durationPer, &integer); + size_t integerPer = AssertedCast<size_t>(integer); + + MOZ_ASSERT(integerPer < mTOC.size()); + offset = mTOC.at(integerPer); + if (fractional > 0.0 && integerPer + 1 < mTOC.size()) { + offset += AssertedCast<int64_t>(fractional) * + (mTOC.at(integerPer + 1) - offset); + } + } + // TODO: VBRI TOC seeking + MP3LOG("VBRHeader::Offset (%s): %f is at byte %" PRId64 "", + mType == XING ? "XING" : "VBRI", aTime.ToSeconds(), offset); + + return offset; +} + +Result<bool, nsresult> FrameParser::VBRHeader::ParseXing(BufferReader* aReader, + size_t aFrameSize) { + static const uint32_t XING_TAG = BigEndian::readUint32("Xing"); + static const uint32_t INFO_TAG = BigEndian::readUint32("Info"); + static const uint32_t LAME_TAG = BigEndian::readUint32("LAME"); + static const uint32_t LAVC_TAG = BigEndian::readUint32("Lavc"); + + enum Flags { + NUM_FRAMES = 0x01, + NUM_BYTES = 0x02, + TOC = 0x04, + VBR_SCALE = 0x08 + }; + + MOZ_ASSERT(aReader); + + // Seek backward to the original position before leaving this scope. + const size_t prevReaderOffset = aReader->Offset(); + auto scopeExit = MakeScopeExit([&] { aReader->Seek(prevReaderOffset); }); + + // We have to search for the Xing header as its position can change. + for (auto res = aReader->PeekU32(); + res.isOk() && res.unwrap() != XING_TAG && res.unwrap() != INFO_TAG;) { + aReader->Read(1); + res = aReader->PeekU32(); + } + + // Skip across the VBR header ID tag. + MOZ_TRY(aReader->ReadU32()); + mType = XING; + + uint32_t flags; + MOZ_TRY_VAR(flags, aReader->ReadU32()); + + if (flags & NUM_FRAMES) { + uint32_t frames; + MOZ_TRY_VAR(frames, aReader->ReadU32()); + mNumAudioFrames = Some(frames); + } + if (flags & NUM_BYTES) { + uint32_t bytes; + MOZ_TRY_VAR(bytes, aReader->ReadU32()); + mNumBytes = Some(bytes); + } + if (flags & TOC && aReader->Remaining() >= vbr_header::TOC_SIZE) { + if (!mNumBytes) { + // We don't have the stream size to calculate offsets, skip the TOC. + aReader->Read(vbr_header::TOC_SIZE); + } else { + mTOC.clear(); + mTOC.reserve(vbr_header::TOC_SIZE); + uint8_t data; + for (size_t i = 0; i < vbr_header::TOC_SIZE; ++i) { + MOZ_TRY_VAR(data, aReader->ReadU8()); + mTOC.push_back( + AssertedCast<uint32_t>(1.0f / 256.0f * data * mNumBytes.value())); + } + } + } + if (flags & VBR_SCALE) { + uint32_t scale; + MOZ_TRY_VAR(scale, aReader->ReadU32()); + mScale = Some(scale); + } + + uint32_t lameOrLavcTag; + MOZ_TRY_VAR(lameOrLavcTag, aReader->ReadU32()); + + if (lameOrLavcTag == LAME_TAG || lameOrLavcTag == LAVC_TAG) { + // Skip 17 bytes after the LAME tag: + // - http://gabriel.mp3-tech.org/mp3infotag.html + // - 5 bytes for the encoder short version string + // - 1 byte for the info tag revision + VBR method + // - 1 byte for the lowpass filter value + // - 8 bytes for the ReplayGain information + // - 1 byte for the encoding flags + ATH Type + // - 1 byte for the specified bitrate if ABR, else the minimal bitrate + if (!aReader->Read(17)) { + return mozilla::Err(NS_ERROR_FAILURE); + } + + // The encoder delay is three bytes, for two 12-bits integers are the + // encoder delay and the padding. + const uint8_t* delayPadding = aReader->Read(3); + if (!delayPadding) { + return mozilla::Err(NS_ERROR_FAILURE); + } + mEncoderDelay = + uint32_t(delayPadding[0]) << 4 | (delayPadding[1] & 0xf0) >> 4; + mEncoderPadding = uint32_t(delayPadding[1] & 0x0f) << 8 | delayPadding[2]; + + constexpr uint16_t DEFAULT_DECODER_DELAY = 529; + mEncoderDelay += DEFAULT_DECODER_DELAY + aFrameSize; // ignore first frame. + mEncoderPadding -= std::min(mEncoderPadding, DEFAULT_DECODER_DELAY); + + MP3LOG("VBRHeader::ParseXing: LAME encoder delay section: delay: %" PRIu16 + " frames, padding: %" PRIu16 " frames", + mEncoderDelay, mEncoderPadding); + } + + return mType == XING; +} + +template <typename T> +int readAndConvertToInt(BufferReader* aReader) { + int value = AssertedCast<int>(aReader->ReadType<T>()); + return value; +} + +Result<bool, nsresult> FrameParser::VBRHeader::ParseVBRI( + BufferReader* aReader) { + static const uint32_t TAG = BigEndian::readUint32("VBRI"); + static const uint32_t OFFSET = 32 + FrameParser::FrameHeader::SIZE; + static const uint32_t MIN_FRAME_SIZE = OFFSET + 26; + + MOZ_ASSERT(aReader); + // ParseVBRI assumes that the ByteReader offset points to the beginning of a + // frame, therefore as a simple check, we look for the presence of a frame + // sync at that position. + auto sync = aReader->PeekU16(); + if (sync.isOk()) { // To avoid compiler complains 'set but unused'. + MOZ_ASSERT((sync.unwrap() & 0xFFE0) == 0xFFE0); + } + + // Seek backward to the original position before leaving this scope. + const size_t prevReaderOffset = aReader->Offset(); + auto scopeExit = MakeScopeExit([&] { aReader->Seek(prevReaderOffset); }); + + // VBRI have a fixed relative position, so let's check for it there. + if (aReader->Remaining() > MIN_FRAME_SIZE) { + aReader->Seek(prevReaderOffset + OFFSET); + uint32_t tag; + MOZ_TRY_VAR(tag, aReader->ReadU32()); + if (tag == TAG) { + uint16_t vbriEncoderVersion, vbriEncoderDelay, vbriQuality; + uint32_t vbriBytes, vbriFrames; + uint16_t vbriSeekOffsetsTableSize, vbriSeekOffsetsScaleFactor, + vbriSeekOffsetsBytesPerEntry, vbriSeekOffsetsFramesPerEntry; + MOZ_TRY_VAR(vbriEncoderVersion, aReader->ReadU16()); + MOZ_TRY_VAR(vbriEncoderDelay, aReader->ReadU16()); + MOZ_TRY_VAR(vbriQuality, aReader->ReadU16()); + MOZ_TRY_VAR(vbriBytes, aReader->ReadU32()); + MOZ_TRY_VAR(vbriFrames, aReader->ReadU32()); + MOZ_TRY_VAR(vbriSeekOffsetsTableSize, aReader->ReadU16()); + MOZ_TRY_VAR(vbriSeekOffsetsScaleFactor, aReader->ReadU32()); + MOZ_TRY_VAR(vbriSeekOffsetsBytesPerEntry, aReader->ReadU16()); + MOZ_TRY_VAR(vbriSeekOffsetsFramesPerEntry, aReader->ReadU16()); + + mTOC.reserve(vbriSeekOffsetsTableSize + 1); + + int (*readFunc)(BufferReader*) = nullptr; + switch (vbriSeekOffsetsBytesPerEntry) { + case 1: + readFunc = &readAndConvertToInt<uint8_t>; + break; + case 2: + readFunc = &readAndConvertToInt<int16_t>; + break; + case 4: + readFunc = &readAndConvertToInt<int32_t>; + break; + case 8: + readFunc = &readAndConvertToInt<int64_t>; + break; + default: + MP3LOG("Unhandled vbriSeekOffsetsBytesPerEntry size of %hd", + vbriSeekOffsetsBytesPerEntry); + break; + } + for (uint32_t i = 0; readFunc && i < vbriSeekOffsetsTableSize; i++) { + int entry = readFunc(aReader); + mTOC.push_back(entry * vbriSeekOffsetsScaleFactor); + } + MP3LOG( + "Header::Parse found valid header: EncoderVersion=%hu " + "EncoderDelay=%hu " + "Quality=%hu " + "Bytes=%u " + "Frames=%u " + "SeekOffsetsTableSize=%u " + "SeekOffsetsScaleFactor=%hu " + "SeekOffsetsBytesPerEntry=%hu " + "SeekOffsetsFramesPerEntry=%hu", + vbriEncoderVersion, vbriEncoderDelay, vbriQuality, vbriBytes, + vbriFrames, vbriSeekOffsetsTableSize, vbriSeekOffsetsScaleFactor, + vbriSeekOffsetsBytesPerEntry, vbriSeekOffsetsFramesPerEntry); + // Adjust the number of frames so it's counted the same way as in the XING + // header + if (vbriFrames < 1) { + return false; + } + mNumAudioFrames = Some(vbriFrames - 1); + mNumBytes = Some(vbriBytes); + mEncoderDelay = vbriEncoderDelay; + mVBRISeekOffsetsFramesPerEntry = vbriSeekOffsetsFramesPerEntry; + MP3LOG("TOC:"); + for (auto entry : mTOC) { + MP3LOG("%" PRId64, entry); + } + + mType = VBRI; + return true; + } + } + return false; +} + +bool FrameParser::VBRHeader::Parse(BufferReader* aReader, size_t aFrameSize) { + auto res = std::make_pair(ParseVBRI(aReader), ParseXing(aReader, aFrameSize)); + const bool rv = (res.first.isOk() && res.first.unwrap()) || + (res.second.isOk() && res.second.unwrap()); + if (rv) { + MP3LOG( + "VBRHeader::Parse found valid VBR/CBR header: type=%s" + " NumAudioFrames=%u NumBytes=%u Scale=%u TOC-size=%zu Delay=%u", + vbr_header::TYPE_STR[Type()], NumAudioFrames().valueOr(0), + NumBytes().valueOr(0), Scale().valueOr(0), mTOC.size(), mEncoderDelay); + } + return rv; +} + +// FrameParser::Frame + +void FrameParser::Frame::Reset() { mHeader.Reset(); } + +uint32_t FrameParser::Frame::Length() const { + if (!mHeader.IsValid() || !mHeader.SampleRate()) { + return 0; + } + + const uint32_t bitsPerSample = mHeader.SamplesPerFrame() / 8; + const uint32_t frameLen = + bitsPerSample * mHeader.Bitrate() / mHeader.SampleRate() + + mHeader.Padding() * mHeader.SlotSize(); + return frameLen; +} + +bool FrameParser::Frame::ParseNext(uint8_t c) { return mHeader.ParseNext(c); } + +const FrameParser::FrameHeader& FrameParser::Frame::Header() const { + return mHeader; +} + +bool FrameParser::ParseVBRHeader(BufferReader* aReader) { + return mVBRHeader.Parse(aReader, CurrentFrame().Header().SamplesPerFrame()); +} + +// ID3Parser + +// Constants +namespace id3_header { +static const int ID_LEN = 3; +static const int VERSION_LEN = 2; +static const int FLAGS_LEN = 1; +static const int SIZE_LEN = 4; + +static const int ID_END = ID_LEN; +static const int VERSION_END = ID_END + VERSION_LEN; +static const int FLAGS_END = VERSION_END + FLAGS_LEN; +static const int SIZE_END = FLAGS_END + SIZE_LEN; + +static const uint8_t ID[ID_LEN] = {'I', 'D', '3'}; +static const uint8_t IDv1[ID_LEN] = {'T', 'A', 'G'}; + +static const uint8_t MIN_MAJOR_VER = 2; +static const uint8_t MAX_MAJOR_VER = 4; +} // namespace id3_header + +bool ID3Parser::IsBufferStartingWithID3v1Tag(BufferReader* aReader) { + mozilla::Result<uint32_t, nsresult> res = aReader->PeekU24(); + if (res.isErr()) { + return false; + } + // If buffer starts with ID3v1 tag, `rv` would be reverse and its content + // should be '3' 'D' 'I' from the lowest bit. + uint32_t rv = res.unwrap(); + for (int idx = id3_header::ID_LEN - 1; idx >= 0; idx--) { + if ((rv & 0xff) != id3_header::IDv1[idx]) { + return false; + } + rv = rv >> 8; + } + return true; +} + +/* static */ +bool ID3Parser::IsBufferStartingWithID3Tag(BufferReader* aReader) { + mozilla::Result<uint32_t, nsresult> res = aReader->PeekU24(); + if (res.isErr()) { + return false; + } + // If buffer starts with ID3v2 tag, `rv` would be reverse and its content + // should be '3' 'D' 'I' from the lowest bit. + uint32_t rv = res.unwrap(); + for (int idx = id3_header::ID_LEN - 1; idx >= 0; idx--) { + if ((rv & 0xff) != id3_header::ID[idx]) { + return false; + } + rv = rv >> 8; + } + return true; +} + +uint32_t ID3Parser::Parse(BufferReader* aReader) { + MOZ_ASSERT(aReader); + MOZ_ASSERT(ID3Parser::IsBufferStartingWithID3Tag(aReader)); + + if (!mHeader.HasSizeBeenSet()) { + return ParseInternal(aReader); + } + + // Encounter another possible ID3 header, if that is valid then we would use + // it and save the size of previous one in order to report the size of all ID3 + // headers together in `TotalHeadersSize()`. + ID3Header prevHeader = mHeader; + mHeader.Reset(); + uint32_t size = ParseInternal(aReader); + if (!size) { + // next ID3 is invalid, so revert the header. + mHeader = prevHeader; + return size; + } + + mFormerID3Size += prevHeader.TotalTagSize(); + return size; +} + +uint32_t ID3Parser::ParseInternal(BufferReader* aReader) { + for (auto res = aReader->ReadU8(); + res.isOk() && !mHeader.ParseNext(res.unwrap()); + res = aReader->ReadU8()) { + } + return mHeader.TotalTagSize(); +} + +void ID3Parser::Reset() { + mHeader.Reset(); + mFormerID3Size = 0; +} + +uint32_t ID3Parser::TotalHeadersSize() const { + return mHeader.TotalTagSize() + mFormerID3Size; +} + +const ID3Parser::ID3Header& ID3Parser::Header() const { return mHeader; } + +// ID3Parser::Header + +ID3Parser::ID3Header::ID3Header() { Reset(); } + +void ID3Parser::ID3Header::Reset() { + mSize.reset(); + mPos = 0; +} + +uint8_t ID3Parser::ID3Header::MajorVersion() const { + return mRaw[id3_header::ID_END]; +} + +uint8_t ID3Parser::ID3Header::MinorVersion() const { + return mRaw[id3_header::ID_END + 1]; +} + +uint8_t ID3Parser::ID3Header::Flags() const { + return mRaw[id3_header::FLAGS_END - id3_header::FLAGS_LEN]; +} + +uint32_t ID3Parser::ID3Header::Size() const { + if (!IsValid() || !mSize) { + return 0; + } + return *mSize; +} + +bool ID3Parser::ID3Header::HasSizeBeenSet() const { return !!mSize; } + +uint8_t ID3Parser::ID3Header::FooterSize() const { + if (Flags() & (1 << 4)) { + return SIZE; + } + return 0; +} + +uint32_t ID3Parser::ID3Header::TotalTagSize() const { + if (IsValid()) { + // Header found, return total tag size. + return ID3Header::SIZE + Size() + FooterSize(); + } + return 0; +} + +bool ID3Parser::ID3Header::ParseNext(uint8_t c) { + if (!Update(c)) { + Reset(); + if (!Update(c)) { + Reset(); + } + } + return IsValid(); +} + +bool ID3Parser::ID3Header::IsValid(int aPos) const { + if (aPos >= SIZE) { + return true; + } + const uint8_t c = mRaw[aPos]; + switch (aPos) { + case 0: + case 1: + case 2: + // Expecting "ID3". + return id3_header::ID[aPos] == c; + case 3: + return MajorVersion() >= id3_header::MIN_MAJOR_VER && + MajorVersion() <= id3_header::MAX_MAJOR_VER; + case 4: + return MinorVersion() < 0xFF; + case 5: + // Validate flags for supported versions, see bug 949036. + return ((0xFF >> MajorVersion()) & c) == 0; + case 6: + case 7: + case 8: + case 9: + return c < 0x80; + } + return true; +} + +bool ID3Parser::ID3Header::IsValid() const { return mPos >= SIZE; } + +bool ID3Parser::ID3Header::Update(uint8_t c) { + if (mPos >= id3_header::SIZE_END - id3_header::SIZE_LEN && + mPos < id3_header::SIZE_END) { + uint32_t tmp = mSize.valueOr(0) << 7; + mSize = Some(tmp | c); + } + if (mPos < SIZE) { + mRaw[mPos] = c; + } + return IsValid(mPos++); +} + +} // namespace mozilla diff --git a/dom/media/mp3/MP3FrameParser.h b/dom/media/mp3/MP3FrameParser.h new file mode 100644 index 0000000000..d0d7a372d2 --- /dev/null +++ b/dom/media/mp3/MP3FrameParser.h @@ -0,0 +1,374 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MP3_FRAME_PARSER_H_ +#define MP3_FRAME_PARSER_H_ + +#include <vector> + +#include "mozilla/Maybe.h" +#include "mozilla/Result.h" +#include "BufferReader.h" + +namespace mozilla { + +// ID3 header parser state machine used by FrameParser. +// The header contains the following format (one byte per term): +// 'I' 'D' '3' MajorVersion MinorVersion Flags Size1 Size2 Size3 Size4 +// For more details see https://id3.org/id3v2.4.0-structure +class ID3Parser { + public: + // Holds the ID3 header and its parsing state. + class ID3Header { + public: + // The header size is static, see class comment. + static const int SIZE = 10; + static const int ID3v1_SIZE = 128; + + // Constructor. + ID3Header(); + + // Resets the state to allow for a new parsing session. + void Reset(); + + // The ID3 tags are versioned like this: ID3vMajorVersion.MinorVersion. + uint8_t MajorVersion() const; + uint8_t MinorVersion() const; + + // The ID3 flags field. + uint8_t Flags() const; + + // The derived size based on the provided size fields. + uint32_t Size() const; + + // To see whether we have parsed the value of the size from header. + bool HasSizeBeenSet() const; + + // Returns the size of an ID3v2.4 footer if present and zero otherwise. + uint8_t FooterSize() const; + + // The total size of the ID3 tag including header/footer, or zero if + // none has been found. + uint32_t TotalTagSize() const; + + // Returns whether the parsed data is a valid ID3 header up to the given + // byte position. + bool IsValid(int aPos) const; + + // Returns whether the parsed data is a complete and valid ID3 header. + bool IsValid() const; + + // Parses the next provided byte. + // Returns whether the byte creates a valid sequence up to this point. + bool ParseNext(uint8_t c); + + private: + // Updates the parser state machine with the provided next byte. + // Returns whether the provided byte is a valid next byte in the sequence. + bool Update(uint8_t c); + + // The currently parsed byte sequence. + uint8_t mRaw[SIZE] = {}; + + // The derived size as provided by the size fields. + // The header size fields holds a 4 byte sequence with each MSB set to 0, + // this bits need to be ignored when deriving the actual size. + Maybe<uint32_t> mSize; + + // The current byte position in the parsed sequence. Reset via Reset and + // incremented via Update. + int mPos = 0; + }; + + // Check if the buffer is starting with ID3v2 tag. + static bool IsBufferStartingWithID3Tag(BufferReader* aReader); + // Similarly, if the buffer is starting with ID3v1 tag. + static bool IsBufferStartingWithID3v1Tag(BufferReader* aReader); + + // Returns the parsed ID3 header. Note: check for validity. + const ID3Header& Header() const; + + // Returns the size of all parsed ID3 headers. + uint32_t TotalHeadersSize() const; + + // Parses contents of given BufferReader for a valid ID3v2 header. + // Returns the parsed ID3v2 tag size if successful and zero otherwise. + uint32_t Parse(BufferReader* aReader); + + // Resets the state to allow for a new parsing session. + void Reset(); + + private: + uint32_t ParseInternal(BufferReader* aReader); + + // The currently parsed ID3 header. Reset via Reset, updated via Parse. + ID3Header mHeader; + // If a file contains multiple ID3 headers, then we would only select the + // latest one, but keep the size of former abandoned in order to return the + // correct size offset. + uint32_t mFormerID3Size = 0; +}; + +// MPEG audio frame parser. +// The MPEG frame header has the following format (one bit per character): +// 11111111 111VVLLC BBBBSSPR MMEETOHH +// { sync } - 11 sync bits +// VV - MPEG audio version ID (0->2.5, 1->reserved, 2->2, 3->1) +// LL - Layer description (0->reserved, 1->III, 2->II, 3->I) +// C - CRC protection bit (0->protected, 1->not protected) +// BBBB - Bitrate index (see table in implementation) +// SS - Sampling rate index (see table in implementation) +// P - Padding bit (0->not padded, 1->padded by 1 slot size) +// R - Private bit (ignored) +// MM - Channel mode (0->stereo, 1->joint stereo, 2->dual channel, +// 3->single channel) +// EE - Mode extension for joint stereo (ignored) +// T - Copyright (0->disabled, 1->enabled) +// O - Original (0->copy, 1->original) +// HH - Emphasis (0->none, 1->50/15 ms, 2->reserved, 3->CCIT J.17) +class FrameParser { + public: + // Holds the frame header and its parsing state. + class FrameHeader { + public: + // The header size is static, see class comments. + static const int SIZE = 4; + + // Constructor. + FrameHeader(); + + // Raw field access, see class comments for details. + uint8_t Sync1() const; + uint8_t Sync2() const; + uint8_t RawVersion() const; + uint8_t RawLayer() const; + uint8_t RawProtection() const; + uint8_t RawBitrate() const; + uint8_t RawSampleRate() const; + uint8_t Padding() const; + uint8_t Private() const; + uint8_t RawChannelMode() const; + + // Sampling rate frequency in Hz. + uint32_t SampleRate() const; + + // Number of audio channels. + uint32_t Channels() const; + + // Samples per frames, static depending on MPEG version and layer. + uint32_t SamplesPerFrame() const; + + // Slot size used for padding, static depending on MPEG layer. + uint32_t SlotSize() const; + + // Bitrate in kbps, can vary between frames. + uint32_t Bitrate() const; + + // MPEG layer (0->invalid, 1->I, 2->II, 3->III). + uint32_t Layer() const; + + // Returns whether the parsed data is a valid frame header up to the given + // byte position. + bool IsValid(const int aPos) const; + + // Returns whether the parsed data is a complete and valid frame header. + bool IsValid() const; + + // Resets the state to allow for a new parsing session. + void Reset(); + + // Parses the next provided byte. + // Returns whether the byte creates a valid sequence up to this point. + bool ParseNext(const uint8_t c); + + private: + // Updates the parser state machine with the provided next byte. + // Returns whether the provided byte is a valid next byte in the sequence. + bool Update(const uint8_t c); + + // The currently parsed byte sequence. + uint8_t mRaw[SIZE] = {}; + + // The current byte position in the parsed sequence. Reset via Reset and + // incremented via Update. + int mPos = 0; + }; + + // VBR frames may contain Xing or VBRI headers for additional info, we use + // this class to parse them and access this info. + class VBRHeader { + public: + // Synchronize with vbr_header TYPE_STR on change. + enum VBRHeaderType { NONE = 0, XING, VBRI }; + + // Constructor. + VBRHeader(); + + // Returns the parsed VBR header type, or NONE if no valid header found. + VBRHeaderType Type() const; + + // Returns the total number of audio frames (excluding the VBR header frame) + // expected in the stream/file. + const Maybe<uint32_t>& NumAudioFrames() const; + + // Returns the expected size of the stream. + const Maybe<uint32_t>& NumBytes() const; + + // Returns the VBR scale factor (0: best quality, 100: lowest quality). + const Maybe<uint32_t>& Scale() const; + + // Returns true iff Xing/Info TOC (table of contents) is present. + bool IsTOCPresent() const; + + // Returns whether the header is valid (type XING or VBRI). + bool IsValid() const; + + // Returns whether the header is valid and contains reasonable non-zero + // field values. + bool IsComplete() const; + + // Returns the byte offset for the given duration percentage as a factor + // (0: begin, 1.0: end). + int64_t Offset(media::TimeUnit aTime, media::TimeUnit aDuration) const; + + // Parses contents of given ByteReader for a valid VBR header. + // The offset of the passed ByteReader needs to point to an MPEG frame + // begin, as a VBRI-style header is searched at a fixed offset relative to + // frame begin. Returns whether a valid VBR header was found in the range. + bool Parse(BufferReader* aReader, size_t aFrameSize); + + uint32_t EncoderDelay() const { return mEncoderDelay; } + uint32_t EncoderPadding() const { return mEncoderPadding; } + + private: + // Parses contents of given ByteReader for a valid Xing header. + // The initial ByteReader offset will be preserved. + // Returns whether a valid Xing header was found in the range. + Result<bool, nsresult> ParseXing(BufferReader* aReader, size_t aFrameSize); + + // Parses contents of given ByteReader for a valid VBRI header. + // The initial ByteReader offset will be preserved. It also needs to point + // to the beginning of a valid MPEG frame, as VBRI headers are searched + // at a fixed offset relative to frame begin. + // Returns whether a valid VBRI header was found in the range. + Result<bool, nsresult> ParseVBRI(BufferReader* aReader); + + // The total number of frames expected as parsed from a VBR header. + Maybe<uint32_t> mNumAudioFrames; + + // The total number of bytes expected in the stream. + Maybe<uint32_t> mNumBytes; + + // The VBR scale factor. + Maybe<uint32_t> mScale; + + // The TOC table mapping duration percentage to byte offset. + std::vector<int64_t> mTOC; + + // The detected VBR header type. + VBRHeaderType mType; + + uint16_t mVBRISeekOffsetsFramesPerEntry = 0; + + // Delay and padding values found in the LAME header. The encoder delay is a + // number of frames that has to be skipped at the beginning of the stream, + // encoder padding is a number of frames that needs to be ignored in the + // last packet. + uint16_t mEncoderDelay = 0; + uint16_t mEncoderPadding = 0; + }; + + // Frame meta container used to parse and hold a frame header and side info. + class Frame { + public: + // Returns the length of the frame excluding the header in bytes. + uint32_t Length() const; + + // Returns the parsed frame header. + const FrameHeader& Header() const; + + // Resets the frame header and data. + void Reset(); + + // Parses the next provided byte. + // Returns whether the byte creates a valid sequence up to this point. + bool ParseNext(uint8_t c); + + private: + // The currently parsed frame header. + FrameHeader mHeader; + }; + + // Constructor. + FrameParser(); + + // Returns the currently parsed frame. Reset via Reset or EndFrameSession. + const Frame& CurrentFrame() const; + + // Returns the previously parsed frame. Reset via Reset. + const Frame& PrevFrame() const; + + // Returns the first parsed frame. Reset via Reset. + const Frame& FirstFrame() const; + + // Returns the parsed ID3 header. Note: check for validity. + const ID3Parser::ID3Header& ID3Header() const; + + // Returns whether ID3 metadata have been found, at the end of the file. + bool ID3v1MetadataFound() const; + + // Returns the size of all parsed ID3 headers. + uint32_t TotalID3HeaderSize() const; + + // Returns the parsed VBR header info. Note: check for validity by type. + const VBRHeader& VBRInfo() const; + + // Resets the parser. + void Reset(); + + // Resets all frame data, but not the ID3Header. + // Don't use between frames as first frame data is reset. + void ResetFrameData(); + + // Clear the last parsed frame to allow for next frame parsing, i.e.: + // - sets PrevFrame to CurrentFrame + // - resets the CurrentFrame + // - resets ID3Header if no valid header was parsed yet + void EndFrameSession(); + + // Parses contents of given BufferReader for a valid frame header and returns + // true if one was found. After returning, the variable passed to + // 'aBytesToSkip' holds the amount of bytes to be skipped (if any) in order to + // jump across a large ID3v2 tag spanning multiple buffers. + Result<bool, nsresult> Parse(BufferReader* aReader, uint32_t* aBytesToSkip); + + // Parses contents of given BufferReader for a valid VBR header. + // The offset of the passed BufferReader needs to point to an MPEG frame + // begin, as a VBRI-style header is searched at a fixed offset relative to + // frame begin. Returns whether a valid VBR header was found. + bool ParseVBRHeader(BufferReader* aReader); + + private: + // ID3 header parser. + ID3Parser mID3Parser; + + // VBR header parser. + VBRHeader mVBRHeader; + + // We keep the first parsed frame around for static info access, the + // previously parsed frame for debugging and the currently parsed frame. + Frame mFirstFrame; + Frame mFrame; + Frame mPrevFrame; + // If this is true, ID3v1 metadata have been found at the end of the file, and + // must be sustracted from the stream size in order to compute the stream + // duration, when computing the duration of a CBR file based on its length in + // bytes. This means that the duration can change at the moment we reach the + // end of the file. + bool mID3v1MetadataFound = false; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/mp3/moz.build b/dom/media/mp3/moz.build new file mode 100644 index 0000000000..70031cc2b4 --- /dev/null +++ b/dom/media/mp3/moz.build @@ -0,0 +1,22 @@ +# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +EXPORTS += [ + "MP3Decoder.h", + "MP3Demuxer.h", + "MP3FrameParser.h", +] + +UNIFIED_SOURCES += [ + "MP3Decoder.cpp", + "MP3Demuxer.cpp", + "MP3FrameParser.cpp", +] + +FINAL_LIBRARY = "xul" + +# Add libFuzzer configuration directives +include("/tools/fuzzing/libfuzzer-config.mozbuild") |