diff options
Diffstat (limited to 'dom/media/webm')
-rw-r--r-- | dom/media/webm/EbmlComposer.cpp | 185 | ||||
-rw-r--r-- | dom/media/webm/EbmlComposer.h | 81 | ||||
-rw-r--r-- | dom/media/webm/NesteggPacketHolder.h | 135 | ||||
-rw-r--r-- | dom/media/webm/WebMBufferedParser.cpp | 676 | ||||
-rw-r--r-- | dom/media/webm/WebMBufferedParser.h | 309 | ||||
-rw-r--r-- | dom/media/webm/WebMDecoder.cpp | 125 | ||||
-rw-r--r-- | dom/media/webm/WebMDecoder.h | 35 | ||||
-rw-r--r-- | dom/media/webm/WebMDemuxer.cpp | 1361 | ||||
-rw-r--r-- | dom/media/webm/WebMDemuxer.h | 293 | ||||
-rw-r--r-- | dom/media/webm/WebMWriter.cpp | 111 | ||||
-rw-r--r-- | dom/media/webm/WebMWriter.h | 69 | ||||
-rw-r--r-- | dom/media/webm/moz.build | 28 |
12 files changed, 3408 insertions, 0 deletions
diff --git a/dom/media/webm/EbmlComposer.cpp b/dom/media/webm/EbmlComposer.cpp new file mode 100644 index 0000000000..e3f04fd89b --- /dev/null +++ b/dom/media/webm/EbmlComposer.cpp @@ -0,0 +1,185 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "EbmlComposer.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/EndianUtils.h" +#include "libmkv/EbmlIDs.h" +#include "libmkv/EbmlWriter.h" +#include "libmkv/WebMElement.h" +#include "prtime.h" +#include "limits.h" + +namespace mozilla { + +// Timecode scale in nanoseconds +constexpr unsigned long TIME_CODE_SCALE = 1000000; +// The WebM header size without audio CodecPrivateData +constexpr int32_t DEFAULT_HEADER_SIZE = 1024; +// Number of milliseconds after which we flush audio-only clusters +constexpr int32_t FLUSH_AUDIO_ONLY_AFTER_MS = 1000; + +void EbmlComposer::GenerateHeader() { + MOZ_RELEASE_ASSERT(!mMetadataFinished); + MOZ_RELEASE_ASSERT(mHasAudio || mHasVideo); + + // Write the EBML header. + EbmlGlobal ebml; + // The WEbM header default size usually smaller than 1k. + auto buffer = + MakeUnique<uint8_t[]>(DEFAULT_HEADER_SIZE + mCodecPrivateData.Length()); + ebml.buf = buffer.get(); + ebml.offset = 0; + writeHeader(&ebml); + { + EbmlLoc segEbmlLoc, ebmlLocseg, ebmlLoc; + Ebml_StartSubElement(&ebml, &segEbmlLoc, Segment); + { + Ebml_StartSubElement(&ebml, &ebmlLocseg, SeekHead); + // Todo: We don't know the exact sizes of encoded data and + // ignore this section. + Ebml_EndSubElement(&ebml, &ebmlLocseg); + writeSegmentInformation(&ebml, &ebmlLoc, TIME_CODE_SCALE, 0); + { + EbmlLoc trackLoc; + Ebml_StartSubElement(&ebml, &trackLoc, Tracks); + { + // Video + if (mWidth > 0 && mHeight > 0) { + writeVideoTrack(&ebml, 0x1, 0, "V_VP8", mWidth, mHeight, + mDisplayWidth, mDisplayHeight); + } + // Audio + if (mCodecPrivateData.Length() > 0) { + // Extract the pre-skip from mCodecPrivateData + // then convert it to nanoseconds. + // For more details see + // https://tools.ietf.org/html/rfc7845#section-4.2 + uint64_t codecDelay = (uint64_t)LittleEndian::readUint16( + mCodecPrivateData.Elements() + 10) * + PR_NSEC_PER_SEC / 48000; + // Fixed 80ms, convert into nanoseconds. + uint64_t seekPreRoll = 80 * PR_NSEC_PER_MSEC; + writeAudioTrack(&ebml, 0x2, 0x0, "A_OPUS", mSampleFreq, mChannels, + codecDelay, seekPreRoll, + mCodecPrivateData.Elements(), + mCodecPrivateData.Length()); + } + } + Ebml_EndSubElement(&ebml, &trackLoc); + } + } + // The Recording length is unknown and + // ignore write the whole Segment element size + } + MOZ_ASSERT(ebml.offset <= DEFAULT_HEADER_SIZE + mCodecPrivateData.Length(), + "write more data > EBML_BUFFER_SIZE"); + auto block = mBuffer.AppendElement(); + block->SetLength(ebml.offset); + memcpy(block->Elements(), ebml.buf, ebml.offset); + mMetadataFinished = true; +} + +nsresult EbmlComposer::WriteSimpleBlock(EncodedFrame* aFrame) { + MOZ_RELEASE_ASSERT(mMetadataFinished); + auto frameType = aFrame->mFrameType; + const bool isVP8IFrame = (frameType == EncodedFrame::FrameType::VP8_I_FRAME); + const bool isVP8PFrame = (frameType == EncodedFrame::FrameType::VP8_P_FRAME); + const bool isOpus = (frameType == EncodedFrame::FrameType::OPUS_AUDIO_FRAME); + + MOZ_ASSERT_IF(isVP8IFrame, mHasVideo); + MOZ_ASSERT_IF(isVP8PFrame, mHasVideo); + MOZ_ASSERT_IF(isOpus, mHasAudio); + + if (isVP8PFrame && !mHasWrittenCluster) { + // We ensure there is a cluster header and an I-frame prior to any P-frame. + return NS_ERROR_INVALID_ARG; + } + + int64_t timeCode = aFrame->mTime.ToMicroseconds() / PR_USEC_PER_MSEC - + mCurrentClusterTimecode; + + const bool needClusterHeader = + !mHasWrittenCluster || + (!mHasVideo && timeCode >= FLUSH_AUDIO_ONLY_AFTER_MS) || isVP8IFrame; + + auto block = mBuffer.AppendElement(); + block->SetLength(aFrame->mFrameData->Length() + DEFAULT_HEADER_SIZE); + + EbmlGlobal ebml; + ebml.offset = 0; + ebml.buf = block->Elements(); + + if (needClusterHeader) { + mHasWrittenCluster = true; + EbmlLoc ebmlLoc; + // This starts the Cluster element. Note that we never end this element + // through Ebml_EndSubElement. What the ending would allow us to do is write + // the full length of the cluster in the element header. That would also + // force us to keep the entire cluster in memory until we know where it + // ends. Now it instead ends through the start of the next cluster. This + // allows us to stream the muxed data with much lower latency than if we + // would have to wait for clusters to end. + Ebml_StartSubElement(&ebml, &ebmlLoc, Cluster); + // if timeCode didn't under/overflow before, it shouldn't after this + mCurrentClusterTimecode = aFrame->mTime.ToMicroseconds() / PR_USEC_PER_MSEC; + Ebml_SerializeUnsigned(&ebml, Timecode, mCurrentClusterTimecode); + + // Can't under-/overflow now + timeCode = 0; + } + + if (MOZ_UNLIKELY(timeCode < SHRT_MIN || timeCode > SHRT_MAX)) { + MOZ_CRASH_UNSAFE_PRINTF( + "Invalid cluster timecode! audio=%d, video=%d, timeCode=%" PRId64 + "ms, currentClusterTimecode=%" PRIu64 "ms", + mHasAudio, mHasVideo, timeCode, mCurrentClusterTimecode); + } + + writeSimpleBlock(&ebml, isOpus ? 0x2 : 0x1, static_cast<short>(timeCode), + isVP8IFrame, 0, 0, + (unsigned char*)aFrame->mFrameData->Elements(), + aFrame->mFrameData->Length()); + MOZ_ASSERT(ebml.offset <= DEFAULT_HEADER_SIZE + aFrame->mFrameData->Length(), + "write more data > EBML_BUFFER_SIZE"); + block->SetLength(ebml.offset); + + return NS_OK; +} + +void EbmlComposer::SetVideoConfig(uint32_t aWidth, uint32_t aHeight, + uint32_t aDisplayWidth, + uint32_t aDisplayHeight) { + MOZ_RELEASE_ASSERT(!mMetadataFinished); + MOZ_ASSERT(aWidth > 0, "Width should > 0"); + MOZ_ASSERT(aHeight > 0, "Height should > 0"); + MOZ_ASSERT(aDisplayWidth > 0, "DisplayWidth should > 0"); + MOZ_ASSERT(aDisplayHeight > 0, "DisplayHeight should > 0"); + mWidth = aWidth; + mHeight = aHeight; + mDisplayWidth = aDisplayWidth; + mDisplayHeight = aDisplayHeight; + mHasVideo = true; +} + +void EbmlComposer::SetAudioConfig(uint32_t aSampleFreq, uint32_t aChannels) { + MOZ_RELEASE_ASSERT(!mMetadataFinished); + MOZ_ASSERT(aSampleFreq > 0, "SampleFreq should > 0"); + MOZ_ASSERT(aChannels > 0, "Channels should > 0"); + mSampleFreq = aSampleFreq; + mChannels = aChannels; + mHasAudio = true; +} + +void EbmlComposer::ExtractBuffer(nsTArray<nsTArray<uint8_t> >* aDestBufs, + uint32_t aFlag) { + if (!mMetadataFinished) { + return; + } + aDestBufs->AppendElements(std::move(mBuffer)); + MOZ_ASSERT(mBuffer.IsEmpty()); +} + +} // namespace mozilla diff --git a/dom/media/webm/EbmlComposer.h b/dom/media/webm/EbmlComposer.h new file mode 100644 index 0000000000..a037e4ef8b --- /dev/null +++ b/dom/media/webm/EbmlComposer.h @@ -0,0 +1,81 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef EbmlComposer_h_ +#define EbmlComposer_h_ +#include "nsTArray.h" +#include "ContainerWriter.h" + +namespace mozilla { + +/* + * A WebM muxer helper for package the valid WebM format. + */ +class EbmlComposer { + public: + EbmlComposer() = default; + /* + * Assign the parameters which header requires. These can be called multiple + * times to change paramter values until GenerateHeader() is called, when this + * becomes illegal to call again. + */ + void SetVideoConfig(uint32_t aWidth, uint32_t aHeight, uint32_t aDisplayWidth, + uint32_t aDisplayHeight); + void SetAudioConfig(uint32_t aSampleFreq, uint32_t aChannels); + /* + * Set the CodecPrivateData for writing in header. + */ + void SetAudioCodecPrivateData(nsTArray<uint8_t>& aBufs) { + mCodecPrivateData.AppendElements(aBufs); + } + /* + * Generate the whole WebM header with the configured tracks, and make + * available to ExtractBuffer. Must only be called once. + */ + void GenerateHeader(); + /* + * Insert media encoded buffer into muxer and it would be package + * into SimpleBlock. If no cluster is opened, new cluster will start for + * writing. Frames passed to this function should already have any codec delay + * applied. + */ + nsresult WriteSimpleBlock(EncodedFrame* aFrame); + /* + * Get valid cluster data. + */ + void ExtractBuffer(nsTArray<nsTArray<uint8_t>>* aDestBufs, + uint32_t aFlag = 0); + + private: + // True once we have written the first cluster header. We cannot serialize any + // P-frames until this is true, since we start a new cluster every I-frame. + bool mHasWrittenCluster = false; + // The timecode of the cluster. + uint64_t mCurrentClusterTimecode = 0; + + // Written data to be flushed out by ExtractBuffer(). + nsTArray<nsTArray<uint8_t>> mBuffer; + + // True when Metadata has been serialized into mBuffer. + bool mMetadataFinished = false; + + // Video configuration + int mWidth = 0; + int mHeight = 0; + int mDisplayWidth = 0; + int mDisplayHeight = 0; + bool mHasVideo = false; + + // Audio configuration + float mSampleFreq = 0; + int mChannels = 0; + bool mHasAudio = false; + // Audio codec specific header data. + nsTArray<uint8_t> mCodecPrivateData; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/webm/NesteggPacketHolder.h b/dom/media/webm/NesteggPacketHolder.h new file mode 100644 index 0000000000..7c74f752d3 --- /dev/null +++ b/dom/media/webm/NesteggPacketHolder.h @@ -0,0 +1,135 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#if !defined(NesteggPacketHolder_h_) +# define NesteggPacketHolder_h_ + +# include <deque> +# include <stdint.h> +# include "nsAutoRef.h" +# include "nestegg/nestegg.h" + +namespace mozilla { + +// Holds a nestegg_packet, and its file offset. This is needed so we +// know the offset in the file we've played up to, in order to calculate +// whether it's likely we can play through to the end without needing +// to stop to buffer, given the current download rate. +class NesteggPacketHolder { + public: + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(NesteggPacketHolder) + NesteggPacketHolder() + : mPacket(nullptr), + mOffset(-1), + mTimestamp(-1), + mDuration(-1), + mTrack(0), + mIsKeyframe(false) {} + + bool Init(nestegg_packet* aPacket, int64_t aOffset, unsigned aTrack, + bool aIsKeyframe) { + uint64_t timestamp_ns; + if (nestegg_packet_tstamp(aPacket, ×tamp_ns) == -1) { + return false; + } + + // We store the timestamp as signed microseconds so that it's easily + // comparable to other timestamps we have in the system. + mTimestamp = timestamp_ns / 1000; + mPacket = aPacket; + mOffset = aOffset; + mTrack = aTrack; + mIsKeyframe = aIsKeyframe; + + uint64_t duration_ns; + if (!nestegg_packet_duration(aPacket, &duration_ns)) { + mDuration = duration_ns / 1000; + } + return true; + } + + nestegg_packet* Packet() { + MOZ_ASSERT(IsInitialized()); + return mPacket; + } + int64_t Offset() { + MOZ_ASSERT(IsInitialized()); + return mOffset; + } + int64_t Timestamp() { + MOZ_ASSERT(IsInitialized()); + return mTimestamp; + } + int64_t Duration() { + MOZ_ASSERT(IsInitialized()); + return mDuration; + } + unsigned Track() { + MOZ_ASSERT(IsInitialized()); + return mTrack; + } + bool IsKeyframe() { + MOZ_ASSERT(IsInitialized()); + return mIsKeyframe; + } + + private: + ~NesteggPacketHolder() { nestegg_free_packet(mPacket); } + + bool IsInitialized() { return mOffset >= 0; } + + nestegg_packet* mPacket; + + // Offset in bytes. This is the offset of the end of the Block + // which contains the packet. + int64_t mOffset; + + // Packet presentation timestamp in microseconds. + int64_t mTimestamp; + + // Packet duration in microseconds; -1 if unknown or retrieval failed. + int64_t mDuration; + + // Track ID. + unsigned mTrack; + + // Does this packet contain a keyframe? + bool mIsKeyframe; + + // Copy constructor and assignment operator not implemented. Don't use them! + NesteggPacketHolder(const NesteggPacketHolder& aOther); + NesteggPacketHolder& operator=(NesteggPacketHolder const& aOther); +}; + +// Queue for holding nestegg packets. +class WebMPacketQueue { + public: + int32_t GetSize() { return mQueue.size(); } + + void Push(NesteggPacketHolder* aItem) { mQueue.push_back(aItem); } + + void PushFront(NesteggPacketHolder* aItem) { + mQueue.push_front(std::move(aItem)); + } + + already_AddRefed<NesteggPacketHolder> PopFront() { + RefPtr<NesteggPacketHolder> result = std::move(mQueue.front()); + mQueue.pop_front(); + return result.forget(); + } + + void Reset() { + while (!mQueue.empty()) { + mQueue.pop_front(); + } + } + + private: + std::deque<RefPtr<NesteggPacketHolder>> mQueue; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/webm/WebMBufferedParser.cpp b/dom/media/webm/WebMBufferedParser.cpp new file mode 100644 index 0000000000..114fd7df89 --- /dev/null +++ b/dom/media/webm/WebMBufferedParser.cpp @@ -0,0 +1,676 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "WebMBufferedParser.h" + +#include <algorithm> + +#include "mozilla/CheckedInt.h" +#include "nsAlgorithm.h" +#include "nsThreadUtils.h" + +extern mozilla::LazyLogModule gMediaDemuxerLog; +#define WEBM_DEBUG(arg, ...) \ + MOZ_LOG(gMediaDemuxerLog, mozilla::LogLevel::Debug, \ + ("WebMBufferedParser(%p)::%s: " arg, this, __func__, ##__VA_ARGS__)) + +namespace mozilla { + +static uint32_t VIntLength(unsigned char aFirstByte, uint32_t* aMask) { + uint32_t count = 1; + uint32_t mask = 1 << 7; + while (count < 8) { + if ((aFirstByte & mask) != 0) { + break; + } + mask >>= 1; + count += 1; + } + if (aMask) { + *aMask = mask; + } + NS_ASSERTION(count >= 1 && count <= 8, "Insane VInt length."); + return count; +} + +constexpr uint8_t EBML_MAX_ID_LENGTH_DEFAULT = 4; +constexpr uint8_t EBML_MAX_SIZE_LENGTH_DEFAULT = 8; + +WebMBufferedParser::WebMBufferedParser(int64_t aOffset) + : mStartOffset(aOffset), + mCurrentOffset(aOffset), + mInitEndOffset(-1), + mBlockEndOffset(-1), + mState(READ_ELEMENT_ID), + mNextState(READ_ELEMENT_ID), + mVIntRaw(false), + mLastInitStartOffset(-1), + mLastInitSize(0), + mEBMLMaxIdLength(EBML_MAX_ID_LENGTH_DEFAULT), + mEBMLMaxSizeLength(EBML_MAX_SIZE_LENGTH_DEFAULT), + mClusterSyncPos(0), + mVIntLeft(0), + mBlockSize(0), + mClusterTimecode(0), + mClusterOffset(-1), + mClusterEndOffset(-1), + mBlockOffset(0), + mBlockTimecode(0), + mBlockTimecodeLength(0), + mSkipBytes(0), + mTimecodeScale(1000000), + mGotTimecodeScale(false), + mGotClusterTimecode(false) { + if (mStartOffset != 0) { + mState = FIND_CLUSTER_SYNC; + } +} + +MediaResult WebMBufferedParser::Append(const unsigned char* aBuffer, + uint32_t aLength, + nsTArray<WebMTimeDataOffset>& aMapping) { + static const uint32_t EBML_ID = 0x1a45dfa3; + static const uint32_t SEGMENT_ID = 0x18538067; + static const uint32_t SEGINFO_ID = 0x1549a966; + static const uint32_t TRACKS_ID = 0x1654AE6B; + static const uint32_t CLUSTER_ID = 0x1f43b675; + static const uint32_t TIMECODESCALE_ID = 0x2ad7b1; + static const unsigned char TIMECODE_ID = 0xe7; + static const unsigned char BLOCKGROUP_ID = 0xa0; + static const unsigned char BLOCK_ID = 0xa1; + static const unsigned char SIMPLEBLOCK_ID = 0xa3; + static const uint16_t EBML_MAX_ID_LENGTH_ID = 0x42f2; + static const uint16_t EBML_MAX_SIZE_LENGTH_ID = 0x42f3; + static const uint32_t BLOCK_TIMECODE_LENGTH = 2; + + static const unsigned char CLUSTER_SYNC_ID[] = {0x1f, 0x43, 0xb6, 0x75}; + + const unsigned char* p = aBuffer; + + // Parse each byte in aBuffer one-by-one, producing timecodes and updating + // aMapping as we go. Parser pauses at end of stream (which may be at any + // point within the parse) and resumes parsing the next time Append is + // called with new data. + while (p < aBuffer + aLength) { + switch (mState) { + case READ_ELEMENT_ID: + mVIntRaw = true; + mState = READ_VINT; + mNextState = READ_ELEMENT_SIZE; + break; + case READ_ELEMENT_SIZE: + if (mVInt.mLength > mEBMLMaxIdLength) { + nsPrintfCString detail("Invalid element id of length %" PRIu64, + mVInt.mLength); + WEBM_DEBUG("%s", detail.get()); + return MediaResult(NS_ERROR_FAILURE, detail); + } + mVIntRaw = false; + mElement.mID = mVInt; + mState = READ_VINT; + mNextState = PARSE_ELEMENT; + break; + case FIND_CLUSTER_SYNC: + if (*p++ == CLUSTER_SYNC_ID[mClusterSyncPos]) { + mClusterSyncPos += 1; + } else { + mClusterSyncPos = 0; + } + if (mClusterSyncPos == sizeof(CLUSTER_SYNC_ID)) { + mVInt.mValue = CLUSTER_ID; + mVInt.mLength = sizeof(CLUSTER_SYNC_ID); + mState = READ_ELEMENT_SIZE; + } + break; + case PARSE_ELEMENT: + if (mVInt.mLength > mEBMLMaxSizeLength) { + nsPrintfCString detail("Invalid element size of length %" PRIu64, + mVInt.mLength); + WEBM_DEBUG("%s", detail.get()); + return MediaResult(NS_ERROR_FAILURE, detail); + } + mElement.mSize = mVInt; + switch (mElement.mID.mValue) { + case SEGMENT_ID: + mState = READ_ELEMENT_ID; + break; + case SEGINFO_ID: + mGotTimecodeScale = true; + mState = READ_ELEMENT_ID; + break; + case TIMECODE_ID: + mVInt = VInt(); + mVIntLeft = mElement.mSize.mValue; + mState = READ_VINT_REST; + mNextState = READ_CLUSTER_TIMECODE; + break; + case TIMECODESCALE_ID: + mVInt = VInt(); + mVIntLeft = mElement.mSize.mValue; + mState = READ_VINT_REST; + mNextState = READ_TIMECODESCALE; + break; + case CLUSTER_ID: + mClusterOffset = mCurrentOffset + (p - aBuffer) - + (mElement.mID.mLength + mElement.mSize.mLength); + // Handle "unknown" length; + if (mElement.mSize.mValue + 1 != + uint64_t(1) << (mElement.mSize.mLength * 7)) { + mClusterEndOffset = mClusterOffset + mElement.mID.mLength + + mElement.mSize.mLength + + mElement.mSize.mValue; + } else { + mClusterEndOffset = -1; + } + mGotClusterTimecode = false; + mState = READ_ELEMENT_ID; + break; + case BLOCKGROUP_ID: + mState = READ_ELEMENT_ID; + break; + case SIMPLEBLOCK_ID: + /* FALLTHROUGH */ + case BLOCK_ID: + if (!mGotClusterTimecode) { + WEBM_DEBUG( + "The Timecode element must appear before any Block or " + "SimpleBlock elements in a Cluster"); + return MediaResult( + NS_ERROR_FAILURE, + "The Timecode element must appear before any Block or " + "SimpleBlock elements in a Cluster"); + } + mBlockSize = mElement.mSize.mValue; + mBlockTimecode = 0; + mBlockTimecodeLength = BLOCK_TIMECODE_LENGTH; + mBlockOffset = mCurrentOffset + (p - aBuffer) - + (mElement.mID.mLength + mElement.mSize.mLength); + mState = READ_VINT; + mNextState = READ_BLOCK_TIMECODE; + break; + case TRACKS_ID: + mSkipBytes = mElement.mSize.mValue; + mState = CHECK_INIT_FOUND; + break; + case EBML_MAX_ID_LENGTH_ID: + case EBML_MAX_SIZE_LENGTH_ID: + if (int64_t currentOffset = mCurrentOffset + (p - aBuffer); + currentOffset < mLastInitStartOffset || + currentOffset >= mLastInitStartOffset + mLastInitSize) { + nsPrintfCString str("Unexpected %s outside init segment", + mElement.mID.mValue == EBML_MAX_ID_LENGTH_ID + ? "EBMLMaxIdLength" + : "EBMLMaxSizeLength"); + WEBM_DEBUG("%s", str.get()); + return MediaResult(NS_ERROR_FAILURE, str); + } + if (mElement.mSize.mValue > 8) { + // https://www.rfc-editor.org/rfc/rfc8794.html (EBML): + // An Unsigned Integer Element MUST declare a length from zero + // to eight octets. + nsPrintfCString str("Bad length of %s size", + mElement.mID.mValue == EBML_MAX_ID_LENGTH_ID + ? "EBMLMaxIdLength" + : "EBMLMaxSizeLength"); + WEBM_DEBUG("%s", str.get()); + return MediaResult(NS_ERROR_FAILURE, str); + } + mVInt = VInt(); + mVIntLeft = mElement.mSize.mValue; + mState = READ_VINT_REST; + mNextState = mElement.mID.mValue == EBML_MAX_ID_LENGTH_ID + ? READ_EBML_MAX_ID_LENGTH + : READ_EBML_MAX_SIZE_LENGTH; + break; + case EBML_ID: + mLastInitStartOffset = + mCurrentOffset + (p - aBuffer) - + (mElement.mID.mLength + mElement.mSize.mLength); + mLastInitSize = mElement.mSize.mValue; + mEBMLMaxIdLength = EBML_MAX_ID_LENGTH_DEFAULT; + mEBMLMaxSizeLength = EBML_MAX_SIZE_LENGTH_DEFAULT; + mState = READ_ELEMENT_ID; + break; + default: + mSkipBytes = mElement.mSize.mValue; + mState = SKIP_DATA; + mNextState = READ_ELEMENT_ID; + break; + } + break; + case READ_VINT: { + unsigned char c = *p++; + uint32_t mask; + mVInt.mLength = VIntLength(c, &mask); + mVIntLeft = mVInt.mLength - 1; + mVInt.mValue = mVIntRaw ? c : c & ~mask; + mState = READ_VINT_REST; + break; + } + case READ_VINT_REST: + if (mVIntLeft) { + mVInt.mValue <<= 8; + mVInt.mValue |= *p++; + mVIntLeft -= 1; + } else { + mState = mNextState; + } + break; + case READ_TIMECODESCALE: + if (!mGotTimecodeScale) { + WEBM_DEBUG("Should get the SegmentInfo first"); + return MediaResult(NS_ERROR_FAILURE, + "TimecodeScale appeared before SegmentInfo"); + } + mTimecodeScale = mVInt.mValue; + mState = READ_ELEMENT_ID; + break; + case READ_CLUSTER_TIMECODE: + mClusterTimecode = mVInt.mValue; + mGotClusterTimecode = true; + mState = READ_ELEMENT_ID; + break; + case READ_BLOCK_TIMECODE: + if (mBlockTimecodeLength) { + mBlockTimecode <<= 8; + mBlockTimecode |= *p++; + mBlockTimecodeLength -= 1; + } else { + // It's possible we've parsed this data before, so avoid inserting + // duplicate WebMTimeDataOffset entries. + { + int64_t endOffset = mBlockOffset + mBlockSize + + mElement.mID.mLength + mElement.mSize.mLength; + uint32_t idx = aMapping.IndexOfFirstElementGt(endOffset); + if (idx == 0 || aMapping[idx - 1] != endOffset) { + // Don't insert invalid negative timecodes. + if (mBlockTimecode >= 0 || + mClusterTimecode >= uint16_t(abs(mBlockTimecode))) { + if (!mGotTimecodeScale) { + WEBM_DEBUG("Should get the TimecodeScale first"); + return MediaResult(NS_ERROR_FAILURE, + "Timecode appeared before SegmentInfo"); + } + uint64_t absTimecode = mClusterTimecode + mBlockTimecode; + absTimecode *= mTimecodeScale; + // Avoid creating an entry if the timecode is out of order + // (invalid according to the WebM specification) so that + // ordering invariants of aMapping are not violated. + if (idx == 0 || aMapping[idx - 1].mTimecode <= absTimecode || + (idx + 1 < aMapping.Length() && + aMapping[idx + 1].mTimecode >= absTimecode)) { + WebMTimeDataOffset entry(endOffset, absTimecode, + mLastInitStartOffset, mClusterOffset, + mClusterEndOffset); + aMapping.InsertElementAt(idx, entry); + } else { + WEBM_DEBUG("Out of order timecode %" PRIu64 + " in Cluster at %" PRId64 " ignored", + absTimecode, mClusterOffset); + } + } + } + } + + // Skip rest of block header and the block's payload. + mBlockSize -= mVInt.mLength; + mBlockSize -= BLOCK_TIMECODE_LENGTH; + mSkipBytes = uint32_t(mBlockSize); + mState = SKIP_DATA; + mNextState = READ_ELEMENT_ID; + } + break; + case READ_EBML_MAX_ID_LENGTH: + if (mElement.mSize.mLength == 0) { + // https://www.rfc-editor.org/rfc/rfc8794.html (EBML): + // If an Empty Element has a default value declared, then the EBML + // Reader MUST interpret the value of the Empty Element as the + // default value. + mVInt.mValue = EBML_MAX_ID_LENGTH_DEFAULT; + } + if (mVInt.mValue < 4 || mVInt.mValue > 5) { + // https://www.ietf.org/archive/id/draft-ietf-cellar-matroska-13.html + // (Matroska): + // The EBMLMaxIDLength of the EBML Header MUST be "4". + // + // Also Matroska: + // Element IDs are encoded using the VINT mechanism described in + // Section 4 of [RFC8794] and can be between one and five octets + // long. Five-octet-long Element IDs are possible only if declared + // in the EBML header. + nsPrintfCString detail("Invalid EMBLMaxIdLength %" PRIu64, + mVInt.mValue); + WEBM_DEBUG("%s", detail.get()); + return MediaResult(NS_ERROR_FAILURE, detail); + } + mEBMLMaxIdLength = mVInt.mValue; + mState = READ_ELEMENT_ID; + break; + case READ_EBML_MAX_SIZE_LENGTH: + if (mElement.mSize.mLength == 0) { + // https://www.rfc-editor.org/rfc/rfc8794.html (EBML): + // If an Empty Element has a default value declared, then the EBML + // Reader MUST interpret the value of the Empty Element as the + // default value. + mVInt.mValue = EBML_MAX_SIZE_LENGTH_DEFAULT; + } + if (mVInt.mValue < 1 || mVInt.mValue > 8) { + // https://www.ietf.org/archive/id/draft-ietf-cellar-matroska-13.html + // (Matroska): + // The EBMLMaxSizeLength of the EBML Header MUST be between "1" and + // "8" inclusive. + nsPrintfCString detail("Invalid EMBLMaxSizeLength %" PRIu64, + mVInt.mValue); + WEBM_DEBUG("%s", detail.get()); + return MediaResult(NS_ERROR_FAILURE, detail); + } + mEBMLMaxSizeLength = mVInt.mValue; + mState = READ_ELEMENT_ID; + break; + case SKIP_DATA: + if (mSkipBytes) { + uint32_t left = aLength - (p - aBuffer); + left = std::min(left, mSkipBytes); + p += left; + mSkipBytes -= left; + } + if (!mSkipBytes) { + mBlockEndOffset = mCurrentOffset + (p - aBuffer); + mState = mNextState; + } + break; + case CHECK_INIT_FOUND: + if (mSkipBytes) { + uint32_t left = aLength - (p - aBuffer); + left = std::min(left, mSkipBytes); + p += left; + mSkipBytes -= left; + } + if (!mSkipBytes) { + if (mInitEndOffset < 0) { + mInitEndOffset = mCurrentOffset + (p - aBuffer); + mBlockEndOffset = mCurrentOffset + (p - aBuffer); + } + mState = READ_ELEMENT_ID; + } + break; + } + } + + NS_ASSERTION(p == aBuffer + aLength, "Must have parsed to end of data."); + mCurrentOffset += aLength; + + return NS_OK; +} + +int64_t WebMBufferedParser::EndSegmentOffset(int64_t aOffset) { + if (mLastInitStartOffset > aOffset || mClusterOffset > aOffset) { + return std::min( + mLastInitStartOffset >= 0 ? mLastInitStartOffset : INT64_MAX, + mClusterOffset >= 0 ? mClusterOffset : INT64_MAX); + } + return mBlockEndOffset; +} + +int64_t WebMBufferedParser::GetClusterOffset() const { return mClusterOffset; } + +// SyncOffsetComparator and TimeComparator are slightly confusing, in that +// the nsTArray they're used with (mTimeMapping) is sorted by mEndOffset and +// these comparators are used on the other fields of WebMTimeDataOffset. +// This is only valid because timecodes are required to be monotonically +// increasing within a file (thus establishing an ordering relationship with +// mTimecode), and mEndOffset is derived from mSyncOffset. +struct SyncOffsetComparator { + bool Equals(const WebMTimeDataOffset& a, const int64_t& b) const { + return a.mSyncOffset == b; + } + + bool LessThan(const WebMTimeDataOffset& a, const int64_t& b) const { + return a.mSyncOffset < b; + } +}; + +struct TimeComparator { + bool Equals(const WebMTimeDataOffset& a, const uint64_t& b) const { + return a.mTimecode == b; + } + + bool LessThan(const WebMTimeDataOffset& a, const uint64_t& b) const { + return a.mTimecode < b; + } +}; + +bool WebMBufferedState::CalculateBufferedForRange(int64_t aStartOffset, + int64_t aEndOffset, + uint64_t* aStartTime, + uint64_t* aEndTime) { + MutexAutoLock lock(mMutex); + + // Find the first WebMTimeDataOffset at or after aStartOffset. + uint32_t start = mTimeMapping.IndexOfFirstElementGt(aStartOffset - 1, + SyncOffsetComparator()); + if (start == mTimeMapping.Length()) { + return false; + } + + // Find the first WebMTimeDataOffset at or before aEndOffset. + uint32_t end = mTimeMapping.IndexOfFirstElementGt(aEndOffset); + if (end > 0) { + end -= 1; + } + + // Range is empty. + if (end <= start) { + return false; + } + + NS_ASSERTION(mTimeMapping[start].mSyncOffset >= aStartOffset && + mTimeMapping[end].mEndOffset <= aEndOffset, + "Computed time range must lie within data range."); + if (start > 0) { + NS_ASSERTION(mTimeMapping[start - 1].mSyncOffset < aStartOffset, + "Must have found least WebMTimeDataOffset for start"); + } + if (end < mTimeMapping.Length() - 1) { + NS_ASSERTION(mTimeMapping[end + 1].mEndOffset > aEndOffset, + "Must have found greatest WebMTimeDataOffset for end"); + } + + MOZ_ASSERT(mTimeMapping[end].mTimecode >= mTimeMapping[end - 1].mTimecode); + uint64_t frameDuration = + mTimeMapping[end].mTimecode - mTimeMapping[end - 1].mTimecode; + *aStartTime = mTimeMapping[start].mTimecode; + CheckedUint64 endTime{mTimeMapping[end].mTimecode}; + endTime += frameDuration; + if (!endTime.isValid()) { + WEBM_DEBUG("End time overflow during CalculateBufferedForRange."); + return false; + } + *aEndTime = endTime.value(); + return true; +} + +bool WebMBufferedState::GetOffsetForTime(uint64_t aTime, int64_t* aOffset) { + MutexAutoLock lock(mMutex); + + if (mTimeMapping.IsEmpty()) { + return false; + } + + uint64_t time = aTime; + if (time > 0) { + time = time - 1; + } + uint32_t idx = mTimeMapping.IndexOfFirstElementGt(time, TimeComparator()); + if (idx == mTimeMapping.Length()) { + // Clamp to end + *aOffset = mTimeMapping[mTimeMapping.Length() - 1].mSyncOffset; + } else { + // Idx is within array or has been clamped to start + *aOffset = mTimeMapping[idx].mSyncOffset; + } + return true; +} + +void WebMBufferedState::NotifyDataArrived(const unsigned char* aBuffer, + uint32_t aLength, int64_t aOffset) { + uint32_t idx = mRangeParsers.IndexOfFirstElementGt(aOffset - 1); + if (idx == 0 || !(mRangeParsers[idx - 1] == aOffset)) { + // If the incoming data overlaps an already parsed range, adjust the + // buffer so that we only reparse the new data. It's also possible to + // have an overlap where the end of the incoming data is within an + // already parsed range, but we don't bother handling that other than by + // avoiding storing duplicate timecodes when the parser runs. + if (idx != mRangeParsers.Length() && + mRangeParsers[idx].mStartOffset <= aOffset) { + // Complete overlap, skip parsing. + if (aOffset + aLength <= mRangeParsers[idx].mCurrentOffset) { + return; + } + + // Partial overlap, adjust the buffer to parse only the new data. + int64_t adjust = mRangeParsers[idx].mCurrentOffset - aOffset; + NS_ASSERTION(adjust >= 0, "Overlap detection bug."); + aBuffer += adjust; + aLength -= uint32_t(adjust); + } else { + mRangeParsers.InsertElementAt(idx, WebMBufferedParser(aOffset)); + if (idx != 0) { + mRangeParsers[idx].SetTimecodeScale( + mRangeParsers[0].GetTimecodeScale()); + } + } + } + + { + MutexAutoLock lock(mMutex); + mRangeParsers[idx].Append(aBuffer, aLength, mTimeMapping); + } + + // Merge parsers with overlapping regions and clean up the remnants. + uint32_t i = 0; + while (i + 1 < mRangeParsers.Length()) { + if (mRangeParsers[i].mCurrentOffset >= mRangeParsers[i + 1].mStartOffset) { + mRangeParsers[i + 1].mStartOffset = mRangeParsers[i].mStartOffset; + mRangeParsers[i + 1].mInitEndOffset = mRangeParsers[i].mInitEndOffset; + mRangeParsers.RemoveElementAt(i); + } else { + i += 1; + } + } + + if (mRangeParsers.IsEmpty()) { + return; + } + + MutexAutoLock lock(mMutex); + mLastBlockOffset = mRangeParsers.LastElement().mBlockEndOffset; +} + +void WebMBufferedState::Reset() { + MutexAutoLock lock(mMutex); + mRangeParsers.Clear(); + mTimeMapping.Clear(); +} + +void WebMBufferedState::UpdateIndex(const MediaByteRangeSet& aRanges, + MediaResource* aResource) { + for (uint32_t index = 0; index < aRanges.Length(); index++) { + const MediaByteRange& range = aRanges[index]; + int64_t offset = range.mStart; + uint32_t length = range.mEnd - range.mStart; + + uint32_t idx = mRangeParsers.IndexOfFirstElementGt(offset - 1); + if (!idx || !(mRangeParsers[idx - 1] == offset)) { + // If the incoming data overlaps an already parsed range, adjust the + // buffer so that we only reparse the new data. It's also possible to + // have an overlap where the end of the incoming data is within an + // already parsed range, but we don't bother handling that other than by + // avoiding storing duplicate timecodes when the parser runs. + if (idx != mRangeParsers.Length() && + mRangeParsers[idx].mStartOffset <= offset) { + // Complete overlap, skip parsing. + if (offset + length <= mRangeParsers[idx].mCurrentOffset) { + continue; + } + + // Partial overlap, adjust the buffer to parse only the new data. + int64_t adjust = mRangeParsers[idx].mCurrentOffset - offset; + NS_ASSERTION(adjust >= 0, "Overlap detection bug."); + offset += adjust; + length -= uint32_t(adjust); + } else { + mRangeParsers.InsertElementAt(idx, WebMBufferedParser(offset)); + if (idx) { + mRangeParsers[idx].SetTimecodeScale( + mRangeParsers[0].GetTimecodeScale()); + } + } + } + + MediaResourceIndex res(aResource); + while (length > 0) { + static const uint32_t BLOCK_SIZE = 1048576; + uint32_t block = std::min(length, BLOCK_SIZE); + RefPtr<MediaByteBuffer> bytes = res.CachedMediaReadAt(offset, block); + if (!bytes) { + break; + } + NotifyDataArrived(bytes->Elements(), bytes->Length(), offset); + length -= bytes->Length(); + offset += bytes->Length(); + } + } +} + +int64_t WebMBufferedState::GetInitEndOffset() { + if (mRangeParsers.IsEmpty()) { + return -1; + } + return mRangeParsers[0].mInitEndOffset; +} + +int64_t WebMBufferedState::GetLastBlockOffset() { + MutexAutoLock lock(mMutex); + + return mLastBlockOffset; +} + +bool WebMBufferedState::GetStartTime(uint64_t* aTime) { + MutexAutoLock lock(mMutex); + + if (mTimeMapping.IsEmpty()) { + return false; + } + + uint32_t idx = mTimeMapping.IndexOfFirstElementGt(0, SyncOffsetComparator()); + if (idx == mTimeMapping.Length()) { + return false; + } + + *aTime = mTimeMapping[idx].mTimecode; + return true; +} + +bool WebMBufferedState::GetNextKeyframeTime(uint64_t aTime, + uint64_t* aKeyframeTime) { + MutexAutoLock lock(mMutex); + int64_t offset = 0; + bool rv = GetOffsetForTime(aTime, &offset); + if (!rv) { + return false; + } + uint32_t idx = + mTimeMapping.IndexOfFirstElementGt(offset, SyncOffsetComparator()); + if (idx == mTimeMapping.Length()) { + return false; + } + *aKeyframeTime = mTimeMapping[idx].mTimecode; + return true; +} +} // namespace mozilla + +#undef WEBM_DEBUG diff --git a/dom/media/webm/WebMBufferedParser.h b/dom/media/webm/WebMBufferedParser.h new file mode 100644 index 0000000000..bf553d8f24 --- /dev/null +++ b/dom/media/webm/WebMBufferedParser.h @@ -0,0 +1,309 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#if !defined(WebMBufferedParser_h_) +# define WebMBufferedParser_h_ + +# include "nsISupportsImpl.h" +# include "nsTArray.h" +# include "mozilla/Mutex.h" +# include "MediaResource.h" +# include "MediaResult.h" + +namespace mozilla { + +// Stores a stream byte offset and the scaled timecode of the block at +// that offset. +struct WebMTimeDataOffset { + WebMTimeDataOffset(int64_t aEndOffset, uint64_t aTimecode, + int64_t aInitOffset, int64_t aSyncOffset, + int64_t aClusterEndOffset) + : mEndOffset(aEndOffset), + mInitOffset(aInitOffset), + mSyncOffset(aSyncOffset), + mClusterEndOffset(aClusterEndOffset), + mTimecode(aTimecode) {} + + bool operator==(int64_t aEndOffset) const { return mEndOffset == aEndOffset; } + + bool operator!=(int64_t aEndOffset) const { return mEndOffset != aEndOffset; } + + bool operator<(int64_t aEndOffset) const { return mEndOffset < aEndOffset; } + + int64_t mEndOffset; + int64_t mInitOffset; + int64_t mSyncOffset; + int64_t mClusterEndOffset; + // In nanoseconds + uint64_t mTimecode; +}; + +// A simple WebM parser that produces data offset to timecode pairs as it +// consumes blocks. A new parser is created for each distinct range of data +// received and begins parsing from the first WebM cluster within that +// range. Old parsers are destroyed when their range merges with a later +// parser or an already parsed range. The parser may start at any position +// within the stream. +struct WebMBufferedParser { + explicit WebMBufferedParser(int64_t aOffset); + + uint32_t GetTimecodeScale() { + MOZ_ASSERT(mGotTimecodeScale); + return mTimecodeScale; + } + + // Use this function when we would only feed media segment for the parser. + void AppendMediaSegmentOnly() { mGotTimecodeScale = true; } + + // If this parser is not expected to parse a segment info, it must be told + // the appropriate timecode scale to use from elsewhere. + void SetTimecodeScale(uint32_t aTimecodeScale) { + mTimecodeScale = aTimecodeScale; + mGotTimecodeScale = true; + } + + // Steps the parser through aLength bytes of data. Always consumes + // aLength bytes. Updates mCurrentOffset before returning. + // Returns false if an error was encountered. + MediaResult Append(const unsigned char* aBuffer, uint32_t aLength, + nsTArray<WebMTimeDataOffset>& aMapping); + + bool operator==(int64_t aOffset) const { return mCurrentOffset == aOffset; } + + bool operator<(int64_t aOffset) const { return mCurrentOffset < aOffset; } + + // Returns the start offset of the init (EBML) or media segment (Cluster) + // following the aOffset position. If none were found, returns + // mBlockEndOffset. This allows to determine the end of the interval containg + // aOffset. + int64_t EndSegmentOffset(int64_t aOffset); + + // Return the Cluster offset, return -1 if we can't find the Cluster. + int64_t GetClusterOffset() const; + + // The offset at which this parser started parsing. Used to merge + // adjacent parsers, in which case the later parser adopts the earlier + // parser's mStartOffset. + int64_t mStartOffset; + + // Current offset within the stream. Updated in chunks as Append() consumes + // data. + int64_t mCurrentOffset; + + // Tracks element's end offset. This indicates the end of the first init + // segment. Will only be set if a Segment Information has been found. + int64_t mInitEndOffset; + + // End offset of the last block parsed. + // Will only be set if a complete block has been parsed. + int64_t mBlockEndOffset; + + private: + enum State { + // Parser start state. Expects to begin at a valid EBML element. Move + // to READ_VINT with mVIntRaw true, then return to READ_ELEMENT_SIZE. + READ_ELEMENT_ID, + + // Store element ID read into mVInt into mElement.mID. Move to + // READ_VINT with mVIntRaw false, then return to PARSE_ELEMENT. + READ_ELEMENT_SIZE, + + // Parser start state for parsers started at an arbitrary offset. Scans + // forward for the first cluster, then move to READ_ELEMENT_ID. + FIND_CLUSTER_SYNC, + + // Simplistic core of the parser. Does not pay attention to nesting of + // elements. Checks mElement for an element ID of interest, then moves + // to the next state as determined by the element ID. + PARSE_ELEMENT, + + // Read the first byte of a variable length integer. The first byte + // encodes both the variable integer's length and part of the value. + // The value read so far is stored in mVInt.mValue and the length is + // stored in mVInt.mLength. The number of bytes left to read is stored + // in mVIntLeft. + READ_VINT, + + // Reads the remaining mVIntLeft bytes into mVInt.mValue. + READ_VINT_REST, + + // mVInt holds the parsed timecode scale, store it in mTimecodeScale, + // then return READ_ELEMENT_ID. + READ_TIMECODESCALE, + + // mVInt holds the parsed cluster timecode, store it in + // mClusterTimecode, then return to READ_ELEMENT_ID. + READ_CLUSTER_TIMECODE, + + // mBlockTimecodeLength holds the remaining length of the block timecode + // left to read. Read each byte of the timecode into mBlockTimecode. + // Once complete, calculate the scaled timecode from the cluster + // timecode, block timecode, and timecode scale, and insert a + // WebMTimeDataOffset entry into aMapping if one is not already present + // for this offset. + READ_BLOCK_TIMECODE, + + // mVInt holds the parsed EBMLMaxIdLength, store it in mEBMLMaxIdLength, + // then return to READ_ELEMENT_ID. + READ_EBML_MAX_ID_LENGTH, + + // mVInt holds the parsed EBMLMaxSizeLength, store it in mEBMLMaxSizeLength, + // then return to READ_ELEMENT_ID. + READ_EBML_MAX_SIZE_LENGTH, + + // Will skip the current tracks element and set mInitEndOffset if an init + // segment has been found. + // Currently, only assumes it's the end of the tracks element. + CHECK_INIT_FOUND, + + // Skip mSkipBytes of data before resuming parse at mNextState. + SKIP_DATA, + }; + + // Current state machine action. + State mState; + + // Next state machine action. SKIP_DATA and READ_VINT_REST advance to + // mNextState when the current action completes. + State mNextState; + + struct VInt { + VInt() : mValue(0), mLength(0) {} + uint64_t mValue; + uint64_t mLength; + }; + + struct EBMLElement { + uint64_t Length() { return mID.mLength + mSize.mLength; } + VInt mID; + VInt mSize; + }; + + EBMLElement mElement; + + VInt mVInt; + + bool mVIntRaw; + + // EBML start offset. This indicates the start of the last init segment + // parsed. Will only be set if an EBML element has been found. + int64_t mLastInitStartOffset; + + // EBML element size. This indicates the size of the body of the last init + // segment parsed. Will only be set if an EBML element has been found. + uint32_t mLastInitSize; + + // EBML max id length is the max number of bytes allowed for an element id + // vint. + uint8_t mEBMLMaxIdLength; + + // EBML max size length is the max number of bytes allowed for an element size + // vint. + uint8_t mEBMLMaxSizeLength; + + // Current match position within CLUSTER_SYNC_ID. Used to find sync + // within arbitrary data. + uint32_t mClusterSyncPos; + + // Number of bytes of mVInt left to read. mVInt is complete once this + // reaches 0. + uint32_t mVIntLeft; + + // Size of the block currently being parsed. Any unused data within the + // block is skipped once the block timecode has been parsed. + uint64_t mBlockSize; + + // Cluster-level timecode. + uint64_t mClusterTimecode; + + // Start offset of the cluster currently being parsed. Used as the sync + // point offset for the offset-to-time mapping as each block timecode is + // been parsed. -1 if unknown. + int64_t mClusterOffset; + + // End offset of the cluster currently being parsed. -1 if unknown. + int64_t mClusterEndOffset; + + // Start offset of the block currently being parsed. Used as the byte + // offset for the offset-to-time mapping once the block timecode has been + // parsed. + int64_t mBlockOffset; + + // Block-level timecode. This is summed with mClusterTimecode to produce + // an absolute timecode for the offset-to-time mapping. + int16_t mBlockTimecode; + + // Number of bytes of mBlockTimecode left to read. + uint32_t mBlockTimecodeLength; + + // Count of bytes left to skip before resuming parse at mNextState. + // Mostly used to skip block payload data after reading a block timecode. + uint32_t mSkipBytes; + + // Timecode scale read from the segment info and used to scale absolute + // timecodes. + uint32_t mTimecodeScale; + + // True if we read the timecode scale from the segment info or have + // confirmed that the default value is to be used. + bool mGotTimecodeScale; + + // True if we've read the cluster time code. + bool mGotClusterTimecode; +}; + +class WebMBufferedState final { + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(WebMBufferedState) + + public: + WebMBufferedState() : mMutex("WebMBufferedState"), mLastBlockOffset(-1) { + MOZ_COUNT_CTOR(WebMBufferedState); + } + + void NotifyDataArrived(const unsigned char* aBuffer, uint32_t aLength, + int64_t aOffset); + void Reset(); + void UpdateIndex(const MediaByteRangeSet& aRanges, MediaResource* aResource); + bool CalculateBufferedForRange(int64_t aStartOffset, int64_t aEndOffset, + uint64_t* aStartTime, uint64_t* aEndTime); + + // Returns true if mTimeMapping is not empty and sets aOffset to + // the latest offset for which decoding can resume without data + // dependencies to arrive at aTime. aTime will be clamped to the start + // of mTimeMapping if it is earlier than the first element, and to the end + // if later than the last + bool GetOffsetForTime(uint64_t aTime, int64_t* aOffset); + + // Returns end offset of init segment or -1 if none found. + int64_t GetInitEndOffset(); + // Returns the end offset of the last complete block or -1 if none found. + int64_t GetLastBlockOffset(); + + // Returns start time + bool GetStartTime(uint64_t* aTime); + + // Returns keyframe for time + bool GetNextKeyframeTime(uint64_t aTime, uint64_t* aKeyframeTime); + + private: + // Private destructor, to discourage deletion outside of Release(): + MOZ_COUNTED_DTOR(WebMBufferedState) + + // Synchronizes access to the mTimeMapping array and mLastBlockOffset. + Mutex mMutex; + + // Sorted (by offset) map of data offsets to timecodes. Populated + // on the main thread as data is received and parsed by WebMBufferedParsers. + nsTArray<WebMTimeDataOffset> mTimeMapping MOZ_GUARDED_BY(mMutex); + // The last complete block parsed. -1 if not set. + int64_t mLastBlockOffset MOZ_GUARDED_BY(mMutex); + + // Sorted (by offset) live parser instances. Main thread only. + nsTArray<WebMBufferedParser> mRangeParsers; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/webm/WebMDecoder.cpp b/dom/media/webm/WebMDecoder.cpp new file mode 100644 index 0000000000..045141240c --- /dev/null +++ b/dom/media/webm/WebMDecoder.cpp @@ -0,0 +1,125 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "WebMDecoder.h" + +#include <utility> + +#include "mozilla/Preferences.h" +#include "VPXDecoder.h" +#include "mozilla/StaticPrefs_media.h" +#ifdef MOZ_AV1 +# include "AOMDecoder.h" +#endif +#include "MediaContainerType.h" +#include "PDMFactory.h" +#include "PlatformDecoderModule.h" +#include "VideoUtils.h" + +namespace mozilla { + +/* static */ +nsTArray<UniquePtr<TrackInfo>> WebMDecoder::GetTracksInfo( + const MediaContainerType& aType, MediaResult& aError) { + nsTArray<UniquePtr<TrackInfo>> tracks; + const bool isVideo = aType.Type() == MEDIAMIMETYPE("video/webm"); + + if (aType.Type() != MEDIAMIMETYPE("audio/webm") && !isVideo) { + aError = MediaResult( + NS_ERROR_DOM_MEDIA_FATAL_ERR, + RESULT_DETAIL("Invalid type:%s", aType.Type().AsString().get())); + return tracks; + } + + aError = NS_OK; + + const MediaCodecs& codecs = aType.ExtendedType().Codecs(); + if (codecs.IsEmpty()) { + return tracks; + } + + for (const auto& codec : codecs.Range()) { + if (codec.EqualsLiteral("opus") || codec.EqualsLiteral("vorbis")) { + tracks.AppendElement( + CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters( + "audio/"_ns + NS_ConvertUTF16toUTF8(codec), aType)); + continue; + } + if (isVideo) { + UniquePtr<TrackInfo> trackInfo; + if (IsVP9CodecString(codec)) { + trackInfo = CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters( + "video/vp9"_ns, aType); + } else if (IsVP8CodecString(codec)) { + trackInfo = CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters( + "video/vp8"_ns, aType); + } + if (trackInfo) { + VPXDecoder::SetVideoInfo(trackInfo->GetAsVideoInfo(), codec); + tracks.AppendElement(std::move(trackInfo)); + continue; + } + } +#ifdef MOZ_AV1 + if (StaticPrefs::media_av1_enabled() && IsAV1CodecString(codec)) { + auto trackInfo = + CreateTrackInfoWithMIMETypeAndContainerTypeExtraParameters( + "video/av1"_ns, aType); + AOMDecoder::SetVideoInfo(trackInfo->GetAsVideoInfo(), codec); + tracks.AppendElement(std::move(trackInfo)); + continue; + } +#endif + // Unknown codec + aError = MediaResult( + NS_ERROR_DOM_MEDIA_FATAL_ERR, + RESULT_DETAIL("Unknown codec:%s", NS_ConvertUTF16toUTF8(codec).get())); + } + return tracks; +} + +/* static */ +bool WebMDecoder::IsSupportedType(const MediaContainerType& aContainerType) { + if (!StaticPrefs::media_webm_enabled()) { + return false; + } + + MediaResult rv = NS_OK; + auto tracks = GetTracksInfo(aContainerType, rv); + + if (NS_FAILED(rv)) { + return false; + } + + if (tracks.IsEmpty()) { + // WebM guarantees that the only codecs it contained are vp8, vp9, opus or + // vorbis. + return true; + } + + // Verify that we have a PDM that supports the whitelisted types, include + // color depth + RefPtr<PDMFactory> platform = new PDMFactory(); + for (const auto& track : tracks) { + if (!track || + platform + ->Supports(SupportDecoderParams(*track), nullptr /* diagnostic */) + .isEmpty()) { + return false; + } + } + + return true; +} + +/* static */ +nsTArray<UniquePtr<TrackInfo>> WebMDecoder::GetTracksInfo( + const MediaContainerType& aType) { + MediaResult rv = NS_OK; + return GetTracksInfo(aType, rv); +} + +} // namespace mozilla diff --git a/dom/media/webm/WebMDecoder.h b/dom/media/webm/WebMDecoder.h new file mode 100644 index 0000000000..9bbe5ce4d2 --- /dev/null +++ b/dom/media/webm/WebMDecoder.h @@ -0,0 +1,35 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#if !defined(WebMDecoder_h_) +# define WebMDecoder_h_ + +# include "mozilla/UniquePtr.h" +# include "nsTArray.h" + +namespace mozilla { + +class MediaContainerType; +class MediaResult; +class TrackInfo; + +class WebMDecoder { + public: + // Returns true if aContainerType is a WebM type that we think we can render + // with an enabled platform decoder backend. + // If provided, codecs are checked for support. + static bool IsSupportedType(const MediaContainerType& aContainerType); + + static nsTArray<UniquePtr<TrackInfo>> GetTracksInfo( + const MediaContainerType& aType); + + private: + static nsTArray<UniquePtr<TrackInfo>> GetTracksInfo( + const MediaContainerType& aType, MediaResult& aError); +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/webm/WebMDemuxer.cpp b/dom/media/webm/WebMDemuxer.cpp new file mode 100644 index 0000000000..da14118205 --- /dev/null +++ b/dom/media/webm/WebMDemuxer.cpp @@ -0,0 +1,1361 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsError.h" +#include "MediaResource.h" +#ifdef MOZ_AV1 +# include "AOMDecoder.h" +#endif +#include "VPXDecoder.h" +#include "WebMDemuxer.h" +#include "WebMBufferedParser.h" +#include "gfx2DGlue.h" +#include "gfxUtils.h" +#include "mozilla/EndianUtils.h" +#include "mozilla/SharedThreadPool.h" +#include "MediaDataDemuxer.h" +#include "nsAutoRef.h" +#include "NesteggPacketHolder.h" +#include "XiphExtradata.h" +#include "prprf.h" // leaving it for PR_vsnprintf() +#include "mozilla/IntegerPrintfMacros.h" +#include "mozilla/Sprintf.h" +#include "VideoUtils.h" + +#include <algorithm> +#include <numeric> +#include <stdint.h> + +#define WEBM_DEBUG(arg, ...) \ + DDMOZ_LOG(gMediaDemuxerLog, mozilla::LogLevel::Debug, "::%s: " arg, \ + __func__, ##__VA_ARGS__) +extern mozilla::LazyLogModule gMediaDemuxerLog; + +namespace mozilla { + +using namespace gfx; +using media::TimeUnit; + +LazyLogModule gNesteggLog("Nestegg"); + +// How far ahead will we look when searching future keyframe. In microseconds. +// This value is based on what appears to be a reasonable value as most webm +// files encountered appear to have keyframes located < 4s. +#define MAX_LOOK_AHEAD 10000000 + +// Functions for reading and seeking using WebMDemuxer required for +// nestegg_io. The 'user data' passed to these functions is the +// demuxer. +static int webmdemux_read(void* aBuffer, size_t aLength, void* aUserData) { + MOZ_ASSERT(aUserData); + MOZ_ASSERT(aLength < UINT32_MAX); + WebMDemuxer::NestEggContext* context = + reinterpret_cast<WebMDemuxer::NestEggContext*>(aUserData); + uint32_t count = aLength; + if (context->IsMediaSource()) { + int64_t length = context->GetEndDataOffset(); + int64_t position = context->GetResource()->Tell(); + MOZ_ASSERT(position <= context->GetResource()->GetLength()); + MOZ_ASSERT(position <= length); + if (length >= 0 && count + position > length) { + count = length - position; + } + MOZ_ASSERT(count <= aLength); + } + uint32_t bytes = 0; + nsresult rv = + context->GetResource()->Read(static_cast<char*>(aBuffer), count, &bytes); + bool eof = bytes < aLength; + return NS_FAILED(rv) ? -1 : eof ? 0 : 1; +} + +static int webmdemux_seek(int64_t aOffset, int aWhence, void* aUserData) { + MOZ_ASSERT(aUserData); + WebMDemuxer::NestEggContext* context = + reinterpret_cast<WebMDemuxer::NestEggContext*>(aUserData); + nsresult rv = context->GetResource()->Seek(aWhence, aOffset); + return NS_SUCCEEDED(rv) ? 0 : -1; +} + +static int64_t webmdemux_tell(void* aUserData) { + MOZ_ASSERT(aUserData); + WebMDemuxer::NestEggContext* context = + reinterpret_cast<WebMDemuxer::NestEggContext*>(aUserData); + return context->GetResource()->Tell(); +} + +static void webmdemux_log(nestegg* aContext, unsigned int aSeverity, + char const* aFormat, ...) { + if (!MOZ_LOG_TEST(gNesteggLog, LogLevel::Debug)) { + return; + } + + va_list args; + char msg[256]; + const char* sevStr; + + switch (aSeverity) { + case NESTEGG_LOG_DEBUG: + sevStr = "DBG"; + break; + case NESTEGG_LOG_INFO: + sevStr = "INF"; + break; + case NESTEGG_LOG_WARNING: + sevStr = "WRN"; + break; + case NESTEGG_LOG_ERROR: + sevStr = "ERR"; + break; + case NESTEGG_LOG_CRITICAL: + sevStr = "CRT"; + break; + default: + sevStr = "UNK"; + break; + } + + va_start(args, aFormat); + + SprintfLiteral(msg, "%p [Nestegg-%s] ", aContext, sevStr); + PR_vsnprintf(msg + strlen(msg), sizeof(msg) - strlen(msg), aFormat, args); + MOZ_LOG(gNesteggLog, LogLevel::Debug, ("%s", msg)); + + va_end(args); +} + +WebMDemuxer::NestEggContext::~NestEggContext() { + if (mContext) { + nestegg_destroy(mContext); + } +} + +int WebMDemuxer::NestEggContext::Init() { + nestegg_io io; + io.read = webmdemux_read; + io.seek = webmdemux_seek; + io.tell = webmdemux_tell; + io.userdata = this; + + // While reading the metadata, we do not really care about which nestegg + // context is being used so long that they are both initialised. + // For reading the metadata however, we will use mVideoContext. + return nestegg_init(&mContext, io, &webmdemux_log, + mParent->IsMediaSource() ? mResource.GetLength() : -1); +} + +WebMDemuxer::WebMDemuxer(MediaResource* aResource) + : WebMDemuxer(aResource, false) {} + +WebMDemuxer::WebMDemuxer(MediaResource* aResource, bool aIsMediaSource) + : mVideoContext(this, aResource), + mAudioContext(this, aResource), + mBufferedState(nullptr), + mInitData(nullptr), + mVideoTrack(0), + mAudioTrack(0), + mSeekPreroll(0), + mAudioCodec(-1), + mVideoCodec(-1), + mHasVideo(false), + mHasAudio(false), + mNeedReIndex(true), + mLastWebMBlockOffset(-1), + mIsMediaSource(aIsMediaSource) { + DDLINKCHILD("resource", aResource); + // Audio/video contexts hold a MediaResourceIndex. + DDLINKCHILD("video context", mVideoContext.GetResource()); + DDLINKCHILD("audio context", mAudioContext.GetResource()); +} + +WebMDemuxer::~WebMDemuxer() { + Reset(TrackInfo::kVideoTrack); + Reset(TrackInfo::kAudioTrack); +} + +RefPtr<WebMDemuxer::InitPromise> WebMDemuxer::Init() { + InitBufferedState(); + + if (NS_FAILED(ReadMetadata())) { + return InitPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_METADATA_ERR, + __func__); + } + + if (!GetNumberTracks(TrackInfo::kAudioTrack) && + !GetNumberTracks(TrackInfo::kVideoTrack)) { + return InitPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_METADATA_ERR, + __func__); + } + + return InitPromise::CreateAndResolve(NS_OK, __func__); +} + +void WebMDemuxer::InitBufferedState() { + MOZ_ASSERT(!mBufferedState); + mBufferedState = new WebMBufferedState; +} + +uint32_t WebMDemuxer::GetNumberTracks(TrackInfo::TrackType aType) const { + switch (aType) { + case TrackInfo::kAudioTrack: + return mHasAudio ? 1 : 0; + case TrackInfo::kVideoTrack: + return mHasVideo ? 1 : 0; + default: + return 0; + } +} + +UniquePtr<TrackInfo> WebMDemuxer::GetTrackInfo(TrackInfo::TrackType aType, + size_t aTrackNumber) const { + switch (aType) { + case TrackInfo::kAudioTrack: + return mInfo.mAudio.Clone(); + case TrackInfo::kVideoTrack: + return mInfo.mVideo.Clone(); + default: + return nullptr; + } +} + +already_AddRefed<MediaTrackDemuxer> WebMDemuxer::GetTrackDemuxer( + TrackInfo::TrackType aType, uint32_t aTrackNumber) { + if (GetNumberTracks(aType) <= aTrackNumber) { + return nullptr; + } + RefPtr<WebMTrackDemuxer> e = new WebMTrackDemuxer(this, aType, aTrackNumber); + DDLINKCHILD("track demuxer", e.get()); + mDemuxers.AppendElement(e); + + return e.forget(); +} + +void WebMDemuxer::Reset(TrackInfo::TrackType aType) { + mProcessedDiscardPadding = false; + if (aType == TrackInfo::kVideoTrack) { + mVideoPackets.Reset(); + } else { + mAudioPackets.Reset(); + } +} + +nsresult WebMDemuxer::ReadMetadata() { + int r = mVideoContext.Init(); + if (r == -1) { + WEBM_DEBUG("mVideoContext::Init failure"); + return NS_ERROR_FAILURE; + } + if (mAudioContext.Init() == -1) { + WEBM_DEBUG("mAudioContext::Init failure"); + return NS_ERROR_FAILURE; + } + + // For reading the metadata we can only use the video resource/context. + MediaResourceIndex& resource = Resource(TrackInfo::kVideoTrack); + nestegg* context = Context(TrackInfo::kVideoTrack); + + { + // Check how much data nestegg read and force feed it to BufferedState. + RefPtr<MediaByteBuffer> buffer = resource.MediaReadAt(0, resource.Tell()); + if (!buffer) { + WEBM_DEBUG("resource.MediaReadAt error"); + return NS_ERROR_FAILURE; + } + mBufferedState->NotifyDataArrived(buffer->Elements(), buffer->Length(), 0); + if (mBufferedState->GetInitEndOffset() < 0) { + WEBM_DEBUG("Couldn't find init end"); + return NS_ERROR_FAILURE; + } + MOZ_ASSERT(mBufferedState->GetInitEndOffset() <= resource.Tell()); + } + mInitData = resource.MediaReadAt(0, mBufferedState->GetInitEndOffset()); + if (!mInitData || + mInitData->Length() != size_t(mBufferedState->GetInitEndOffset())) { + WEBM_DEBUG("Couldn't read init data"); + return NS_ERROR_FAILURE; + } + + unsigned int ntracks = 0; + r = nestegg_track_count(context, &ntracks); + if (r == -1) { + WEBM_DEBUG("nestegg_track_count error"); + return NS_ERROR_FAILURE; + } + + for (unsigned int track = 0; track < ntracks; ++track) { + int id = nestegg_track_codec_id(context, track); + if (id == -1) { + WEBM_DEBUG("nestegg_track_codec_id error"); + return NS_ERROR_FAILURE; + } + int type = nestegg_track_type(context, track); + if (type == NESTEGG_TRACK_VIDEO && !mHasVideo) { + nestegg_video_params params; + r = nestegg_track_video_params(context, track, ¶ms); + if (r == -1) { + WEBM_DEBUG("nestegg_track_video_params error"); + return NS_ERROR_FAILURE; + } + mVideoCodec = nestegg_track_codec_id(context, track); + switch (mVideoCodec) { + case NESTEGG_CODEC_VP8: + mInfo.mVideo.mMimeType = "video/vp8"; + break; + case NESTEGG_CODEC_VP9: + mInfo.mVideo.mMimeType = "video/vp9"; + break; + case NESTEGG_CODEC_AV1: + mInfo.mVideo.mMimeType = "video/av1"; + break; + default: + NS_WARNING("Unknown WebM video codec"); + return NS_ERROR_FAILURE; + } + + mInfo.mVideo.mColorPrimaries = gfxUtils::CicpToColorPrimaries( + static_cast<gfx::CICP::ColourPrimaries>(params.primaries), + gMediaDemuxerLog); + + // For VPX, this is our only chance to capture the transfer + // characteristics, which we can't get from a VPX bitstream later. + // We only need this value if the video is using the BT2020 + // colorspace, which will be determined on a per-frame basis later. + mInfo.mVideo.mTransferFunction = gfxUtils::CicpToTransferFunction( + static_cast<gfx::CICP::TransferCharacteristics>( + params.transfer_characteristics)); + + // Picture region, taking into account cropping, before scaling + // to the display size. + unsigned int cropH = params.crop_right + params.crop_left; + unsigned int cropV = params.crop_bottom + params.crop_top; + gfx::IntRect pictureRect(params.crop_left, params.crop_top, + params.width - cropH, params.height - cropV); + + // If the cropping data appears invalid then use the frame data + if (pictureRect.width <= 0 || pictureRect.height <= 0 || + pictureRect.x < 0 || pictureRect.y < 0) { + pictureRect.x = 0; + pictureRect.y = 0; + pictureRect.width = params.width; + pictureRect.height = params.height; + } + + // Validate the container-reported frame and pictureRect sizes. This + // ensures that our video frame creation code doesn't overflow. + gfx::IntSize displaySize(params.display_width, params.display_height); + gfx::IntSize frameSize(params.width, params.height); + if (!IsValidVideoRegion(frameSize, pictureRect, displaySize)) { + // Video track's frame sizes will overflow. Ignore the video track. + continue; + } + + mVideoTrack = track; + mHasVideo = true; + + mInfo.mVideo.mDisplay = displaySize; + mInfo.mVideo.mImage = frameSize; + mInfo.mVideo.SetImageRect(pictureRect); + mInfo.mVideo.SetAlpha(params.alpha_mode); + + switch (params.stereo_mode) { + case NESTEGG_VIDEO_MONO: + mInfo.mVideo.mStereoMode = StereoMode::MONO; + break; + case NESTEGG_VIDEO_STEREO_LEFT_RIGHT: + mInfo.mVideo.mStereoMode = StereoMode::LEFT_RIGHT; + break; + case NESTEGG_VIDEO_STEREO_BOTTOM_TOP: + mInfo.mVideo.mStereoMode = StereoMode::BOTTOM_TOP; + break; + case NESTEGG_VIDEO_STEREO_TOP_BOTTOM: + mInfo.mVideo.mStereoMode = StereoMode::TOP_BOTTOM; + break; + case NESTEGG_VIDEO_STEREO_RIGHT_LEFT: + mInfo.mVideo.mStereoMode = StereoMode::RIGHT_LEFT; + break; + } + uint64_t duration = 0; + r = nestegg_duration(context, &duration); + if (!r) { + mInfo.mVideo.mDuration = TimeUnit::FromNanoseconds(duration); + } + WEBM_DEBUG("stream duration: %lf\n", mInfo.mVideo.mDuration.ToSeconds()); + mInfo.mVideo.mCrypto = GetTrackCrypto(TrackInfo::kVideoTrack, track); + if (mInfo.mVideo.mCrypto.IsEncrypted()) { + MOZ_ASSERT(mInfo.mVideo.mCrypto.mCryptoScheme == CryptoScheme::Cenc, + "WebM should only use cenc scheme"); + mCrypto.AddInitData(u"webm"_ns, mInfo.mVideo.mCrypto.mKeyId); + } + } else if (type == NESTEGG_TRACK_AUDIO && !mHasAudio) { + nestegg_audio_params params; + r = nestegg_track_audio_params(context, track, ¶ms); + if (r == -1) { + WEBM_DEBUG("nestegg_track_audio_params error"); + return NS_ERROR_FAILURE; + } + if (params.rate > + static_cast<decltype(params.rate)>(AudioInfo::MAX_RATE) || + params.rate <= static_cast<decltype(params.rate)>(0) || + params.channels > AudioConfig::ChannelLayout::MAX_CHANNELS) { + WEBM_DEBUG("Invalid audio param rate: %lf channel count: %d", + params.rate, params.channels); + return NS_ERROR_DOM_MEDIA_METADATA_ERR; + } + + mAudioTrack = track; + mHasAudio = true; + mAudioCodec = nestegg_track_codec_id(context, track); + if (mAudioCodec == NESTEGG_CODEC_VORBIS) { + mInfo.mAudio.mCodecSpecificConfig = + AudioCodecSpecificVariant{VorbisCodecSpecificData{}}; + mInfo.mAudio.mMimeType = "audio/vorbis"; + } else if (mAudioCodec == NESTEGG_CODEC_OPUS) { + uint64_t codecDelayUs = params.codec_delay / 1000; + mInfo.mAudio.mMimeType = "audio/opus"; + OpusCodecSpecificData opusCodecSpecificData; + opusCodecSpecificData.mContainerCodecDelayFrames = + AssertedCast<int64_t>(USECS_PER_S * codecDelayUs / 48000); + WEBM_DEBUG("Preroll for Opus: %" PRIu64 " frames", + opusCodecSpecificData.mContainerCodecDelayFrames); + mInfo.mAudio.mCodecSpecificConfig = + AudioCodecSpecificVariant{std::move(opusCodecSpecificData)}; + } + mSeekPreroll = params.seek_preroll; + mInfo.mAudio.mRate = AssertedCast<uint32_t>(params.rate); + mInfo.mAudio.mChannels = params.channels; + + unsigned int nheaders = 0; + r = nestegg_track_codec_data_count(context, track, &nheaders); + if (r == -1) { + WEBM_DEBUG("nestegg_track_codec_data_count error"); + return NS_ERROR_FAILURE; + } + + AutoTArray<const unsigned char*, 4> headers; + AutoTArray<size_t, 4> headerLens; + for (uint32_t header = 0; header < nheaders; ++header) { + unsigned char* data = 0; + size_t length = 0; + r = nestegg_track_codec_data(context, track, header, &data, &length); + if (r == -1) { + WEBM_DEBUG("nestegg_track_codec_data error"); + return NS_ERROR_FAILURE; + } + headers.AppendElement(data); + headerLens.AppendElement(length); + } + + // Vorbis has 3 headers, convert to Xiph extradata format to send them to + // the demuxer. + // TODO: This is already the format WebM stores them in. Would be nice + // to avoid having libnestegg split them only for us to pack them again, + // but libnestegg does not give us an API to access this data directly. + RefPtr<MediaByteBuffer> audioCodecSpecificBlob = + GetAudioCodecSpecificBlob(mInfo.mAudio.mCodecSpecificConfig); + if (nheaders > 1) { + if (!XiphHeadersToExtradata(audioCodecSpecificBlob, headers, + headerLens)) { + WEBM_DEBUG("Couldn't parse Xiph headers"); + return NS_ERROR_FAILURE; + } + } else { + audioCodecSpecificBlob->AppendElements(headers[0], headerLens[0]); + } + uint64_t duration = 0; + r = nestegg_duration(context, &duration); + if (!r) { + mInfo.mAudio.mDuration = TimeUnit::FromNanoseconds(duration); + WEBM_DEBUG("audio track duration: %lf", + mInfo.mAudio.mDuration.ToSeconds()); + } + mInfo.mAudio.mCrypto = GetTrackCrypto(TrackInfo::kAudioTrack, track); + if (mInfo.mAudio.mCrypto.IsEncrypted()) { + MOZ_ASSERT(mInfo.mAudio.mCrypto.mCryptoScheme == CryptoScheme::Cenc, + "WebM should only use cenc scheme"); + mCrypto.AddInitData(u"webm"_ns, mInfo.mAudio.mCrypto.mKeyId); + } + } + } + WEBM_DEBUG("Read metadata OK"); + return NS_OK; +} + +bool WebMDemuxer::IsSeekable() const { + return Context(TrackInfo::kVideoTrack) && + nestegg_has_cues(Context(TrackInfo::kVideoTrack)); +} + +bool WebMDemuxer::IsSeekableOnlyInBufferedRanges() const { + return Context(TrackInfo::kVideoTrack) && + !nestegg_has_cues(Context(TrackInfo::kVideoTrack)); +} + +void WebMDemuxer::EnsureUpToDateIndex() { + if (!mNeedReIndex || !mInitData) { + return; + } + AutoPinned<MediaResource> resource( + Resource(TrackInfo::kVideoTrack).GetResource()); + MediaByteRangeSet byteRanges; + nsresult rv = resource->GetCachedRanges(byteRanges); + if (NS_FAILED(rv) || byteRanges.IsEmpty()) { + return; + } + mBufferedState->UpdateIndex(byteRanges, resource); + + mNeedReIndex = false; + + if (!mIsMediaSource) { + return; + } + mLastWebMBlockOffset = mBufferedState->GetLastBlockOffset(); + MOZ_ASSERT(mLastWebMBlockOffset <= resource->GetLength()); +} + +void WebMDemuxer::NotifyDataArrived() { + WEBM_DEBUG(""); + mNeedReIndex = true; +} + +void WebMDemuxer::NotifyDataRemoved() { + mBufferedState->Reset(); + if (mInitData) { + mBufferedState->NotifyDataArrived(mInitData->Elements(), + mInitData->Length(), 0); + } + mNeedReIndex = true; +} + +UniquePtr<EncryptionInfo> WebMDemuxer::GetCrypto() { + return mCrypto.IsEncrypted() ? MakeUnique<EncryptionInfo>(mCrypto) : nullptr; +} + +CryptoTrack WebMDemuxer::GetTrackCrypto(TrackInfo::TrackType aType, + size_t aTrackNumber) { + const int WEBM_IV_SIZE = 16; + const unsigned char* contentEncKeyId; + size_t contentEncKeyIdLength; + CryptoTrack crypto; + nestegg* context = Context(aType); + + int r = nestegg_track_content_enc_key_id( + context, aTrackNumber, &contentEncKeyId, &contentEncKeyIdLength); + + if (r == -1) { + WEBM_DEBUG("nestegg_track_content_enc_key_id failed r=%d", r); + return crypto; + } + + uint32_t i; + nsTArray<uint8_t> initData; + for (i = 0; i < contentEncKeyIdLength; i++) { + initData.AppendElement(contentEncKeyId[i]); + } + + if (!initData.IsEmpty()) { + // Webm only uses a cenc style scheme. + crypto.mCryptoScheme = CryptoScheme::Cenc; + crypto.mIVSize = WEBM_IV_SIZE; + crypto.mKeyId = std::move(initData); + } + + return crypto; +} + +nsresult WebMDemuxer::GetNextPacket(TrackInfo::TrackType aType, + MediaRawDataQueue* aSamples) { + if (mIsMediaSource) { + // To ensure mLastWebMBlockOffset is properly up to date. + EnsureUpToDateIndex(); + } + + RefPtr<NesteggPacketHolder> holder; + nsresult rv = NextPacket(aType, holder); + + if (NS_FAILED(rv)) { + return rv; + } + + int r = 0; + unsigned int count = 0; + r = nestegg_packet_count(holder->Packet(), &count); + if (r == -1) { + WEBM_DEBUG("nestegg_packet_count: error"); + return NS_ERROR_DOM_MEDIA_DEMUXER_ERR; + } + int64_t tstamp = holder->Timestamp(); + int64_t duration = holder->Duration(); + + // The end time of this frame is the start time of the next frame. Fetch + // the timestamp of the next packet for this track. If we've reached the + // end of the resource, use the file's duration as the end time of this + // video frame. + RefPtr<NesteggPacketHolder> next_holder; + rv = NextPacket(aType, next_holder); + if (NS_FAILED(rv) && rv != NS_ERROR_DOM_MEDIA_END_OF_STREAM) { + WEBM_DEBUG("NextPacket: error"); + return rv; + } + + int64_t next_tstamp = INT64_MIN; + auto calculateNextTimestamp = [&](auto&& pushPacket, auto&& lastFrameTime, + int64_t trackEndTime) { + if (next_holder) { + next_tstamp = next_holder->Timestamp(); + (this->*pushPacket)(next_holder); + } else if (duration >= 0) { + next_tstamp = tstamp + duration; + } else if (lastFrameTime.isSome()) { + next_tstamp = tstamp + (tstamp - lastFrameTime.ref()); + } else if (mIsMediaSource) { + (this->*pushPacket)(holder); + } else { + // If we can't get frame's duration, it means either we need to wait for + // more data for MSE case or this is the last frame for file resource + // case. + if (tstamp > trackEndTime) { + // This shouldn't happen, but some muxers give incorrect durations to + // segments, then have samples appear beyond those durations. + WEBM_DEBUG("Found tstamp=%" PRIi64 " > trackEndTime=%" PRIi64 + " while calculating next timestamp! Indicates a bad mux! " + "Will use tstamp value.", + tstamp, trackEndTime); + } + next_tstamp = std::max<int64_t>(tstamp, trackEndTime); + } + lastFrameTime = Some(tstamp); + }; + + if (aType == TrackInfo::kAudioTrack) { + calculateNextTimestamp(&WebMDemuxer::PushAudioPacket, mLastAudioFrameTime, + mInfo.mAudio.mDuration.ToMicroseconds()); + } else { + calculateNextTimestamp(&WebMDemuxer::PushVideoPacket, mLastVideoFrameTime, + mInfo.mVideo.mDuration.ToMicroseconds()); + } + + if (mIsMediaSource && next_tstamp == INT64_MIN) { + WEBM_DEBUG("WebM is a media source, and next timestamp computation filed."); + return NS_ERROR_DOM_MEDIA_END_OF_STREAM; + } + + int64_t discardPadding = 0; + if (aType == TrackInfo::kAudioTrack) { + (void)nestegg_packet_discard_padding(holder->Packet(), &discardPadding); + } + + int packetEncryption = nestegg_packet_encryption(holder->Packet()); + + for (uint32_t i = 0; i < count; ++i) { + unsigned char* data = nullptr; + size_t length; + r = nestegg_packet_data(holder->Packet(), i, &data, &length); + if (r == -1) { + WEBM_DEBUG("nestegg_packet_data failed r=%d", r); + return NS_ERROR_DOM_MEDIA_DEMUXER_ERR; + } + unsigned char* alphaData = nullptr; + size_t alphaLength = 0; + // Check packets for alpha information if file has declared alpha frames + // may be present. + if (mInfo.mVideo.HasAlpha()) { + r = nestegg_packet_additional_data(holder->Packet(), 1, &alphaData, + &alphaLength); + if (r == -1) { + WEBM_DEBUG( + "nestegg_packet_additional_data failed to retrieve alpha data r=%d", + r); + } + } + bool isKeyframe = false; + if (aType == TrackInfo::kAudioTrack) { + isKeyframe = true; + } else if (aType == TrackInfo::kVideoTrack) { + if (packetEncryption == NESTEGG_PACKET_HAS_SIGNAL_BYTE_ENCRYPTED || + packetEncryption == NESTEGG_PACKET_HAS_SIGNAL_BYTE_PARTITIONED) { + // Packet is encrypted, can't peek, use packet info + isKeyframe = nestegg_packet_has_keyframe(holder->Packet()) == + NESTEGG_PACKET_HAS_KEYFRAME_TRUE; + } else { + MOZ_ASSERT( + packetEncryption == NESTEGG_PACKET_HAS_SIGNAL_BYTE_UNENCRYPTED || + packetEncryption == NESTEGG_PACKET_HAS_SIGNAL_BYTE_FALSE, + "Unencrypted packet expected"); + auto sample = Span(data, length); + auto alphaSample = Span(alphaData, alphaLength); + + switch (mVideoCodec) { + case NESTEGG_CODEC_VP8: + isKeyframe = VPXDecoder::IsKeyframe(sample, VPXDecoder::Codec::VP8); + if (isKeyframe && alphaLength) { + isKeyframe = + VPXDecoder::IsKeyframe(alphaSample, VPXDecoder::Codec::VP8); + } + break; + case NESTEGG_CODEC_VP9: + isKeyframe = VPXDecoder::IsKeyframe(sample, VPXDecoder::Codec::VP9); + if (isKeyframe && alphaLength) { + isKeyframe = + VPXDecoder::IsKeyframe(alphaSample, VPXDecoder::Codec::VP9); + } + break; +#ifdef MOZ_AV1 + case NESTEGG_CODEC_AV1: + isKeyframe = AOMDecoder::IsKeyframe(sample); + if (isKeyframe && alphaLength) { + isKeyframe = AOMDecoder::IsKeyframe(alphaSample); + } + break; +#endif + default: + NS_WARNING("Cannot detect keyframes in unknown WebM video codec"); + return NS_ERROR_FAILURE; + } + } + } + + WEBM_DEBUG("push sample tstamp: %" PRId64 " next_tstamp: %" PRId64 + " length: %zu kf: %d", + tstamp, next_tstamp, length, isKeyframe); + RefPtr<MediaRawData> sample; + if (mInfo.mVideo.HasAlpha() && alphaLength != 0) { + sample = new MediaRawData(data, length, alphaData, alphaLength); + if ((length && !sample->Data()) || + (alphaLength && !sample->AlphaData())) { + WEBM_DEBUG("Couldn't allocate MediaRawData: OOM"); + return NS_ERROR_OUT_OF_MEMORY; + } + } else { + sample = new MediaRawData(data, length); + if (length && !sample->Data()) { + WEBM_DEBUG("Couldn't allocate MediaRawData: OOM"); + return NS_ERROR_OUT_OF_MEMORY; + } + } + sample->mTimecode = TimeUnit::FromMicroseconds(tstamp); + sample->mTime = TimeUnit::FromMicroseconds(tstamp); + if (next_tstamp > tstamp) { + sample->mDuration = TimeUnit::FromMicroseconds(next_tstamp - tstamp); + } + sample->mOffset = holder->Offset(); + sample->mKeyframe = isKeyframe; + if (discardPadding && i == count - 1) { + sample->mOriginalPresentationWindow = + Some(media::TimeInterval{sample->mTime, sample->GetEndTime()}); + if (discardPadding < 0) { + // This will ensure decoding will error out, and the file is rejected. + sample->mDuration = TimeUnit::Invalid(); + } else { + TimeUnit padding = TimeUnit::FromNanoseconds(discardPadding); + if (padding > sample->mDuration || mProcessedDiscardPadding) { + WEBM_DEBUG( + "Padding frames larger than packet size, flagging the packet for " + "error (padding: %s, duration: %s, already processed: %s)", + padding.ToString().get(), sample->mDuration.ToString().get(), + mProcessedDiscardPadding ? "true" : "false"); + sample->mDuration = TimeUnit::Invalid(); + } else { + sample->mDuration -= padding; + } + } + mProcessedDiscardPadding = true; + } + + if (packetEncryption == NESTEGG_PACKET_HAS_SIGNAL_BYTE_ENCRYPTED || + packetEncryption == NESTEGG_PACKET_HAS_SIGNAL_BYTE_PARTITIONED) { + UniquePtr<MediaRawDataWriter> writer(sample->CreateWriter()); + unsigned char const* iv; + size_t ivLength; + nestegg_packet_iv(holder->Packet(), &iv, &ivLength); + writer->mCrypto.mCryptoScheme = CryptoScheme::Cenc; + writer->mCrypto.mIVSize = ivLength; + if (ivLength == 0) { + // Frame is not encrypted. This shouldn't happen as it means the + // encryption bit is set on a frame with no IV, but we gracefully + // handle incase. + MOZ_ASSERT_UNREACHABLE( + "Unencrypted packets should not have the encryption bit set!"); + WEBM_DEBUG("Unencrypted packet with encryption bit set"); + writer->mCrypto.mPlainSizes.AppendElement(length); + writer->mCrypto.mEncryptedSizes.AppendElement(0); + } else { + // Frame is encrypted + writer->mCrypto.mIV.AppendElements(iv, 8); + // Iv from a sample is 64 bits, must be padded with 64 bits more 0s + // in compliance with spec + for (uint32_t i = 0; i < 8; i++) { + writer->mCrypto.mIV.AppendElement(0); + } + + if (packetEncryption == NESTEGG_PACKET_HAS_SIGNAL_BYTE_ENCRYPTED) { + writer->mCrypto.mPlainSizes.AppendElement(0); + writer->mCrypto.mEncryptedSizes.AppendElement(length); + } else if (packetEncryption == + NESTEGG_PACKET_HAS_SIGNAL_BYTE_PARTITIONED) { + uint8_t numPartitions = 0; + const uint32_t* partitions = NULL; + nestegg_packet_offsets(holder->Packet(), &partitions, &numPartitions); + + // WebM stores a list of 'partitions' in the data, which alternate + // clear, encrypted. The data in the first partition is always clear. + // So, and sample might look as follows: + // 00|XXXX|000|XX, where | represents a partition, 0 a clear byte and + // X an encrypted byte. If the first bytes in sample are unencrypted, + // the first partition will be at zero |XXXX|000|XX. + // + // As GMP expects the lengths of the clear and encrypted chunks of + // data, we calculate these from the difference between the last two + // partitions. + uint32_t lastOffset = 0; + bool encrypted = false; + + for (uint8_t i = 0; i < numPartitions; i++) { + uint32_t partition = partitions[i]; + uint32_t currentLength = partition - lastOffset; + + if (encrypted) { + writer->mCrypto.mEncryptedSizes.AppendElement(currentLength); + } else { + writer->mCrypto.mPlainSizes.AppendElement(currentLength); + } + + encrypted = !encrypted; + lastOffset = partition; + + MOZ_ASSERT(lastOffset <= length); + } + + // Add the data between the last offset and the end of the data. + // 000|XXX|000 + // ^---^ + if (encrypted) { + writer->mCrypto.mEncryptedSizes.AppendElement(length - lastOffset); + } else { + writer->mCrypto.mPlainSizes.AppendElement(length - lastOffset); + } + + // Make sure we have an equal number of encrypted and plain sizes (GMP + // expects this). This simple check is sufficient as there are two + // possible cases at this point: + // 1. The number of samples are even (so we don't need to do anything) + // 2. There is one more clear sample than encrypted samples, so add a + // zero length encrypted chunk. + // There can never be more encrypted partitions than clear partitions + // due to the alternating structure of the WebM samples and the + // restriction that the first chunk is always clear. + if (numPartitions % 2 == 0) { + writer->mCrypto.mEncryptedSizes.AppendElement(0); + } + + // Assert that the lengths of the encrypted and plain samples add to + // the length of the data. + MOZ_ASSERT( + ((size_t)(std::accumulate(writer->mCrypto.mPlainSizes.begin(), + writer->mCrypto.mPlainSizes.end(), 0) + + std::accumulate(writer->mCrypto.mEncryptedSizes.begin(), + writer->mCrypto.mEncryptedSizes.end(), + 0)) == length)); + } + } + } + aSamples->Push(sample); + } + return NS_OK; +} + +nsresult WebMDemuxer::NextPacket(TrackInfo::TrackType aType, + RefPtr<NesteggPacketHolder>& aPacket) { + bool isVideo = aType == TrackInfo::kVideoTrack; + + // Flag to indicate that we do need to playback these types of + // packets. + bool hasType = isVideo ? mHasVideo : mHasAudio; + + if (!hasType) { + WEBM_DEBUG("No media type found"); + return NS_ERROR_DOM_MEDIA_DEMUXER_ERR; + } + + // The packet queue for the type that we are interested in. + WebMPacketQueue& packets = isVideo ? mVideoPackets : mAudioPackets; + + if (packets.GetSize() > 0) { + aPacket = packets.PopFront(); + return NS_OK; + } + + // Track we are interested in + uint32_t ourTrack = isVideo ? mVideoTrack : mAudioTrack; + + do { + RefPtr<NesteggPacketHolder> holder; + nsresult rv = DemuxPacket(aType, holder); + if (NS_FAILED(rv)) { + return rv; + } + if (!holder) { + WEBM_DEBUG("Couldn't demux packet"); + return NS_ERROR_DOM_MEDIA_DEMUXER_ERR; + } + + if (ourTrack == holder->Track()) { + aPacket = holder; + return NS_OK; + } + } while (true); +} + +nsresult WebMDemuxer::DemuxPacket(TrackInfo::TrackType aType, + RefPtr<NesteggPacketHolder>& aPacket) { + nestegg_packet* packet; + int r = nestegg_read_packet(Context(aType), &packet); + if (r == 0) { + nestegg_read_reset(Context(aType)); + WEBM_DEBUG("EOS"); + return NS_ERROR_DOM_MEDIA_END_OF_STREAM; + } else if (r < 0) { + WEBM_DEBUG("nestegg_read_packet: error"); + return NS_ERROR_DOM_MEDIA_DEMUXER_ERR; + } + + unsigned int track = 0; + r = nestegg_packet_track(packet, &track); + if (r == -1) { + WEBM_DEBUG("nestegg_packet_track: error"); + return NS_ERROR_DOM_MEDIA_DEMUXER_ERR; + } + + int64_t offset = Resource(aType).Tell(); + RefPtr<NesteggPacketHolder> holder = new NesteggPacketHolder(); + if (!holder->Init(packet, offset, track, false)) { + WEBM_DEBUG("NesteggPacketHolder::Init: error"); + return NS_ERROR_DOM_MEDIA_DEMUXER_ERR; + } + + aPacket = holder; + return NS_OK; +} + +void WebMDemuxer::PushAudioPacket(NesteggPacketHolder* aItem) { + mAudioPackets.PushFront(aItem); +} + +void WebMDemuxer::PushVideoPacket(NesteggPacketHolder* aItem) { + mVideoPackets.PushFront(aItem); +} + +nsresult WebMDemuxer::SeekInternal(TrackInfo::TrackType aType, + const TimeUnit& aTarget) { + EnsureUpToDateIndex(); + uint32_t trackToSeek = mHasVideo ? mVideoTrack : mAudioTrack; + MOZ_ASSERT(aTarget.ToNanoseconds() >= 0, "Seek time can't be negative"); + uint64_t target = static_cast<uint64_t>(aTarget.ToNanoseconds()); + WEBM_DEBUG("Seeking to %lf", aTarget.ToSeconds()); + + Reset(aType); + + if (mSeekPreroll) { + uint64_t startTime = 0; + if (!mBufferedState->GetStartTime(&startTime)) { + startTime = 0; + } + WEBM_DEBUG("Seek Target: %f", + TimeUnit::FromNanoseconds(target).ToSeconds()); + if (target < mSeekPreroll || target - mSeekPreroll < startTime) { + target = startTime; + } else { + target -= mSeekPreroll; + } + WEBM_DEBUG("SeekPreroll: %f StartTime: %f Adjusted Target: %f", + TimeUnit::FromNanoseconds(mSeekPreroll).ToSeconds(), + TimeUnit::FromNanoseconds(startTime).ToSeconds(), + TimeUnit::FromNanoseconds(target).ToSeconds()); + } + int r = nestegg_track_seek(Context(aType), trackToSeek, target); + if (r == -1) { + WEBM_DEBUG("track_seek for track %u to %f failed, r=%d", trackToSeek, + TimeUnit::FromNanoseconds(target).ToSeconds(), r); + // Try seeking directly based on cluster information in memory. + int64_t offset = 0; + bool rv = mBufferedState->GetOffsetForTime(target, &offset); + if (!rv) { + WEBM_DEBUG("mBufferedState->GetOffsetForTime failed too"); + return NS_ERROR_FAILURE; + } + + if (offset < 0) { + WEBM_DEBUG("Unknow byte offset time for seek target %" PRIu64 "ns", + target); + return NS_ERROR_FAILURE; + } + + r = nestegg_offset_seek(Context(aType), static_cast<uint64_t>(offset)); + if (r == -1) { + WEBM_DEBUG("and nestegg_offset_seek to %" PRIu64 " failed", offset); + return NS_ERROR_FAILURE; + } + WEBM_DEBUG("got offset from buffered state: %" PRIu64 "", offset); + } + + if (aType == TrackInfo::kAudioTrack) { + mLastAudioFrameTime.reset(); + } else { + mLastVideoFrameTime.reset(); + } + + return NS_OK; +} + +bool WebMDemuxer::IsBufferedIntervalValid(uint64_t start, uint64_t end) { + if (start > end) { + // Buffered ranges are clamped to the media's start time and duration. Any + // frames with timestamps outside that range are ignored, see bug 1697641 + // for more info. + WEBM_DEBUG("Ignoring range %" PRIu64 "-%" PRIu64 + ", due to invalid interval (start > end).", + start, end); + return false; + } + + auto startTime = TimeUnit::FromNanoseconds(start); + auto endTime = TimeUnit::FromNanoseconds(end); + + if (startTime.IsNegative() || endTime.IsNegative()) { + // We can get timestamps that are conceptually valid, but become + // negative due to uint64 -> int64 conversion from TimeUnit. We should + // not get negative timestamps, so guard against them. + WEBM_DEBUG( + "Invalid range %f-%f, likely result of uint64 -> int64 conversion.", + startTime.ToSeconds(), endTime.ToSeconds()); + return false; + } + + return true; +} + +media::TimeIntervals WebMDemuxer::GetBuffered() { + EnsureUpToDateIndex(); + AutoPinned<MediaResource> resource( + Resource(TrackInfo::kVideoTrack).GetResource()); + + media::TimeIntervals buffered; + + MediaByteRangeSet ranges; + nsresult rv = resource->GetCachedRanges(ranges); + if (NS_FAILED(rv)) { + return media::TimeIntervals(); + } + uint64_t duration = 0; + uint64_t startOffset = 0; + if (!nestegg_duration(Context(TrackInfo::kVideoTrack), &duration)) { + if (mBufferedState->GetStartTime(&startOffset)) { + duration += startOffset; + } + WEBM_DEBUG("Duration: %f StartTime: %f", + TimeUnit::FromNanoseconds(duration).ToSeconds(), + TimeUnit::FromNanoseconds(startOffset).ToSeconds()); + } + for (uint32_t index = 0; index < ranges.Length(); index++) { + uint64_t start, end; + bool rv = mBufferedState->CalculateBufferedForRange( + ranges[index].mStart, ranges[index].mEnd, &start, &end); + if (rv) { + NS_ASSERTION(startOffset <= start, + "startOffset negative or larger than start time"); + + if (duration && end > duration) { + WEBM_DEBUG("limit range to duration, end: %f duration: %f", + TimeUnit::FromNanoseconds(end).ToSeconds(), + TimeUnit::FromNanoseconds(duration).ToSeconds()); + end = duration; + } + + if (!IsBufferedIntervalValid(start, end)) { + WEBM_DEBUG("Invalid interval, bailing"); + break; + } + + auto startTime = TimeUnit::FromNanoseconds(start); + auto endTime = TimeUnit::FromNanoseconds(end); + + WEBM_DEBUG("add range %f-%f", startTime.ToSeconds(), endTime.ToSeconds()); + buffered += media::TimeInterval(startTime, endTime); + } + } + return buffered; +} + +bool WebMDemuxer::GetOffsetForTime(uint64_t aTime, int64_t* aOffset) { + EnsureUpToDateIndex(); + return mBufferedState && mBufferedState->GetOffsetForTime(aTime, aOffset); +} + +// WebMTrackDemuxer +WebMTrackDemuxer::WebMTrackDemuxer(WebMDemuxer* aParent, + TrackInfo::TrackType aType, + uint32_t aTrackNumber) + : mParent(aParent), mType(aType), mNeedKeyframe(true) { + mInfo = mParent->GetTrackInfo(aType, aTrackNumber); + MOZ_ASSERT(mInfo); +} + +WebMTrackDemuxer::~WebMTrackDemuxer() { mSamples.Reset(); } + +UniquePtr<TrackInfo> WebMTrackDemuxer::GetInfo() const { + return mInfo->Clone(); +} + +RefPtr<WebMTrackDemuxer::SeekPromise> WebMTrackDemuxer::Seek( + const TimeUnit& aTime) { + // Seeks to aTime. Upon success, SeekPromise will be resolved with the + // actual time seeked to. Typically the random access point time + + auto seekTime = aTime; + bool keyframe = false; + + mNeedKeyframe = true; + + do { + mSamples.Reset(); + mParent->SeekInternal(mType, seekTime); + nsresult rv = mParent->GetNextPacket(mType, &mSamples); + if (NS_FAILED(rv)) { + if (rv == NS_ERROR_DOM_MEDIA_END_OF_STREAM) { + // Ignore the error for now, the next GetSample will be rejected with + // EOS. + return SeekPromise::CreateAndResolve(TimeUnit::Zero(), __func__); + } + return SeekPromise::CreateAndReject(rv, __func__); + } + + // Check what time we actually seeked to. + if (mSamples.GetSize() == 0) { + // We can't determine if the seek succeeded at this stage, so break the + // loop. + break; + } + + for (const auto& sample : mSamples) { + seekTime = sample->mTime; + keyframe = sample->mKeyframe; + if (keyframe) { + break; + } + } + if (mType == TrackInfo::kVideoTrack && + !mInfo->GetAsVideoInfo()->HasAlpha()) { + // We only perform a search for a keyframe on videos with alpha layer to + // prevent potential regression for normal video (even though invalid) + break; + } + if (!keyframe) { + // We didn't find any keyframe, attempt to seek to the previous cluster. + seekTime = mSamples.First()->mTime - TimeUnit::FromMicroseconds(1); + } + } while (!keyframe && seekTime >= TimeUnit::Zero()); + + SetNextKeyFrameTime(); + + return SeekPromise::CreateAndResolve(seekTime, __func__); +} + +nsresult WebMTrackDemuxer::NextSample(RefPtr<MediaRawData>& aData) { + nsresult rv = NS_ERROR_DOM_MEDIA_END_OF_STREAM; + while (mSamples.GetSize() < 1 && + NS_SUCCEEDED((rv = mParent->GetNextPacket(mType, &mSamples)))) { + } + if (mSamples.GetSize()) { + aData = mSamples.PopFront(); + return NS_OK; + } + WEBM_DEBUG("WebMTrackDemuxer::NextSample: error"); + return rv; +} + +RefPtr<WebMTrackDemuxer::SamplesPromise> WebMTrackDemuxer::GetSamples( + int32_t aNumSamples) { + RefPtr<SamplesHolder> samples = new SamplesHolder; + MOZ_ASSERT(aNumSamples); + + nsresult rv = NS_ERROR_DOM_MEDIA_END_OF_STREAM; + + while (aNumSamples) { + RefPtr<MediaRawData> sample; + rv = NextSample(sample); + if (NS_FAILED(rv)) { + break; + } + // Ignore empty samples. + if (sample->Size() == 0) { + WEBM_DEBUG( + "0 sized sample encountered while getting samples, skipping it"); + continue; + } + if (mNeedKeyframe && !sample->mKeyframe) { + continue; + } + if (!sample->HasValidTime()) { + return SamplesPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_DEMUXER_ERR, + __func__); + } + mNeedKeyframe = false; + samples->AppendSample(sample); + aNumSamples--; + } + + if (samples->GetSamples().IsEmpty()) { + return SamplesPromise::CreateAndReject(rv, __func__); + } else { + UpdateSamples(samples->GetSamples()); + return SamplesPromise::CreateAndResolve(samples, __func__); + } +} + +void WebMTrackDemuxer::SetNextKeyFrameTime() { + if (mType != TrackInfo::kVideoTrack || mParent->IsMediaSource()) { + return; + } + + auto frameTime = TimeUnit::Invalid(); + + mNextKeyframeTime.reset(); + + MediaRawDataQueue skipSamplesQueue; + bool foundKeyframe = false; + while (!foundKeyframe && mSamples.GetSize()) { + RefPtr<MediaRawData> sample = mSamples.PopFront(); + if (sample->mKeyframe) { + frameTime = sample->mTime; + foundKeyframe = true; + } + skipSamplesQueue.Push(sample.forget()); + } + Maybe<int64_t> startTime; + if (skipSamplesQueue.GetSize()) { + const RefPtr<MediaRawData>& sample = skipSamplesQueue.First(); + startTime.emplace(sample->mTimecode.ToMicroseconds()); + } + // Demux and buffer frames until we find a keyframe. + RefPtr<MediaRawData> sample; + nsresult rv = NS_OK; + while (!foundKeyframe && NS_SUCCEEDED((rv = NextSample(sample)))) { + if (sample->mKeyframe) { + frameTime = sample->mTime; + foundKeyframe = true; + } + int64_t sampleTimecode = sample->mTimecode.ToMicroseconds(); + skipSamplesQueue.Push(sample.forget()); + if (!startTime) { + startTime.emplace(sampleTimecode); + } else if (!foundKeyframe && + sampleTimecode > startTime.ref() + MAX_LOOK_AHEAD) { + WEBM_DEBUG("Couldn't find keyframe in a reasonable time, aborting"); + break; + } + } + // We may have demuxed more than intended, so ensure that all frames are kept + // in the right order. + mSamples.PushFront(std::move(skipSamplesQueue)); + + if (frameTime.IsValid()) { + mNextKeyframeTime.emplace(frameTime); + WEBM_DEBUG( + "Next Keyframe %f (%u queued %.02fs)", + mNextKeyframeTime.value().ToSeconds(), uint32_t(mSamples.GetSize()), + (mSamples.Last()->mTimecode - mSamples.First()->mTimecode).ToSeconds()); + } else { + WEBM_DEBUG("Couldn't determine next keyframe time (%u queued)", + uint32_t(mSamples.GetSize())); + } +} + +void WebMTrackDemuxer::Reset() { + mSamples.Reset(); + media::TimeIntervals buffered = GetBuffered(); + mNeedKeyframe = true; + if (!buffered.IsEmpty()) { + WEBM_DEBUG("Seek to start point: %f", buffered.Start(0).ToSeconds()); + mParent->SeekInternal(mType, buffered.Start(0)); + SetNextKeyFrameTime(); + } else { + mNextKeyframeTime.reset(); + } +} + +void WebMTrackDemuxer::UpdateSamples( + const nsTArray<RefPtr<MediaRawData>>& aSamples) { + for (const auto& sample : aSamples) { + if (sample->mCrypto.IsEncrypted()) { + UniquePtr<MediaRawDataWriter> writer(sample->CreateWriter()); + writer->mCrypto.mIVSize = mInfo->mCrypto.mIVSize; + writer->mCrypto.mKeyId.AppendElements(mInfo->mCrypto.mKeyId); + } + } + if (mNextKeyframeTime.isNothing() || + aSamples.LastElement()->mTime >= mNextKeyframeTime.value()) { + SetNextKeyFrameTime(); + } +} + +nsresult WebMTrackDemuxer::GetNextRandomAccessPoint(TimeUnit* aTime) { + if (mNextKeyframeTime.isNothing()) { + // There's no next key frame. + *aTime = TimeUnit::FromInfinity(); + } else { + *aTime = mNextKeyframeTime.ref(); + } + return NS_OK; +} + +RefPtr<WebMTrackDemuxer::SkipAccessPointPromise> +WebMTrackDemuxer::SkipToNextRandomAccessPoint(const TimeUnit& aTimeThreshold) { + uint32_t parsed = 0; + bool found = false; + RefPtr<MediaRawData> sample; + nsresult rv = NS_OK; + + WEBM_DEBUG("TimeThreshold: %f", aTimeThreshold.ToSeconds()); + while (!found && NS_SUCCEEDED((rv = NextSample(sample)))) { + parsed++; + if (sample->mKeyframe && sample->mTime >= aTimeThreshold) { + WEBM_DEBUG("next sample: %f (parsed: %d)", sample->mTime.ToSeconds(), + parsed); + found = true; + mSamples.Reset(); + mSamples.PushFront(sample.forget()); + } + } + if (NS_SUCCEEDED(rv)) { + SetNextKeyFrameTime(); + } + if (found) { + return SkipAccessPointPromise::CreateAndResolve(parsed, __func__); + } else { + SkipFailureHolder failure(NS_ERROR_DOM_MEDIA_END_OF_STREAM, parsed); + return SkipAccessPointPromise::CreateAndReject(std::move(failure), + __func__); + } +} + +media::TimeIntervals WebMTrackDemuxer::GetBuffered() { + return mParent->GetBuffered(); +} + +void WebMTrackDemuxer::BreakCycles() { mParent = nullptr; } + +int64_t WebMTrackDemuxer::GetEvictionOffset(const TimeUnit& aTime) { + int64_t offset; + int64_t nanos = aTime.ToNanoseconds(); + if (nanos < 0 || + !mParent->GetOffsetForTime(static_cast<uint64_t>(nanos), &offset)) { + return 0; + } + + return offset; +} +} // namespace mozilla + +#undef WEBM_DEBUG diff --git a/dom/media/webm/WebMDemuxer.h b/dom/media/webm/WebMDemuxer.h new file mode 100644 index 0000000000..3b3bdc21e2 --- /dev/null +++ b/dom/media/webm/WebMDemuxer.h @@ -0,0 +1,293 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef WebMDemuxer_h_ +#define WebMDemuxer_h_ + +#include "nsTArray.h" +#include "MediaDataDemuxer.h" +#include "MediaResource.h" +#include "NesteggPacketHolder.h" + +#include <deque> +#include <stdint.h> +#include <utility> + +typedef struct nestegg nestegg; + +namespace mozilla { + +class WebMBufferedState; + +// Queue for holding MediaRawData samples +class MediaRawDataQueue { + typedef std::deque<RefPtr<MediaRawData>> ContainerType; + + public: + uint32_t GetSize() { return mQueue.size(); } + + void Push(MediaRawData* aItem) { mQueue.push_back(aItem); } + + void Push(already_AddRefed<MediaRawData>&& aItem) { + mQueue.push_back(std::move(aItem)); + } + + void PushFront(MediaRawData* aItem) { mQueue.push_front(aItem); } + + void PushFront(already_AddRefed<MediaRawData>&& aItem) { + mQueue.push_front(std::move(aItem)); + } + + void PushFront(MediaRawDataQueue&& aOther) { + while (!aOther.mQueue.empty()) { + PushFront(aOther.Pop()); + } + } + + already_AddRefed<MediaRawData> PopFront() { + RefPtr<MediaRawData> result = std::move(mQueue.front()); + mQueue.pop_front(); + return result.forget(); + } + + already_AddRefed<MediaRawData> Pop() { + RefPtr<MediaRawData> result = std::move(mQueue.back()); + mQueue.pop_back(); + return result.forget(); + } + + void Reset() { + while (!mQueue.empty()) { + mQueue.pop_front(); + } + } + + MediaRawDataQueue& operator=(const MediaRawDataQueue& aOther) = delete; + + const RefPtr<MediaRawData>& First() const { return mQueue.front(); } + + const RefPtr<MediaRawData>& Last() const { return mQueue.back(); } + + // Methods for range-based for loops. + ContainerType::iterator begin() { return mQueue.begin(); } + + ContainerType::const_iterator begin() const { return mQueue.begin(); } + + ContainerType::iterator end() { return mQueue.end(); } + + ContainerType::const_iterator end() const { return mQueue.end(); } + + private: + ContainerType mQueue; +}; + +class WebMTrackDemuxer; + +DDLoggedTypeDeclNameAndBase(WebMDemuxer, MediaDataDemuxer); +DDLoggedTypeNameAndBase(WebMTrackDemuxer, MediaTrackDemuxer); + +class WebMDemuxer : public MediaDataDemuxer, + public DecoderDoctorLifeLogger<WebMDemuxer> { + public: + explicit WebMDemuxer(MediaResource* aResource); + // Indicate if the WebMDemuxer is to be used with MediaSource. In which + // case the demuxer will stop reads to the last known complete block. + WebMDemuxer(MediaResource* aResource, bool aIsMediaSource); + + RefPtr<InitPromise> Init() override; + + uint32_t GetNumberTracks(TrackInfo::TrackType aType) const override; + + UniquePtr<TrackInfo> GetTrackInfo(TrackInfo::TrackType aType, + size_t aTrackNumber) const; + + already_AddRefed<MediaTrackDemuxer> GetTrackDemuxer( + TrackInfo::TrackType aType, uint32_t aTrackNumber) override; + + bool IsSeekable() const override; + + bool IsSeekableOnlyInBufferedRanges() const override; + + UniquePtr<EncryptionInfo> GetCrypto() override; + + bool GetOffsetForTime(uint64_t aTime, int64_t* aOffset); + + // Demux next WebM packet and append samples to MediaRawDataQueue + nsresult GetNextPacket(TrackInfo::TrackType aType, + MediaRawDataQueue* aSamples); + + void Reset(TrackInfo::TrackType aType); + + // Pushes a packet to the front of the audio packet queue. + void PushAudioPacket(NesteggPacketHolder* aItem); + + // Pushes a packet to the front of the video packet queue. + void PushVideoPacket(NesteggPacketHolder* aItem); + + // Public accessor for nestegg callbacks + bool IsMediaSource() const { return mIsMediaSource; } + + int64_t LastWebMBlockOffset() const { return mLastWebMBlockOffset; } + + struct NestEggContext { + NestEggContext(WebMDemuxer* aParent, MediaResource* aResource) + : mParent(aParent), mResource(aResource), mContext(nullptr) {} + + ~NestEggContext(); + + int Init(); + + // Public accessor for nestegg callbacks + + bool IsMediaSource() const { return mParent->IsMediaSource(); } + MediaResourceIndex* GetResource() { return &mResource; } + + int64_t GetEndDataOffset() const { + return (!mParent->IsMediaSource() || mParent->LastWebMBlockOffset() < 0) + ? mResource.GetLength() + : mParent->LastWebMBlockOffset(); + } + + WebMDemuxer* mParent; + MediaResourceIndex mResource; + nestegg* mContext; + }; + + private: + friend class WebMTrackDemuxer; + + ~WebMDemuxer(); + void InitBufferedState(); + nsresult ReadMetadata(); + void NotifyDataArrived() override; + void NotifyDataRemoved() override; + void EnsureUpToDateIndex(); + + // A helper to catch bad intervals during `GetBuffered`. + // Verifies if the interval given by start and end is valid, returning true if + // it is, or false if not. Logs failure reason if the interval is invalid. + bool IsBufferedIntervalValid(uint64_t start, uint64_t end); + + media::TimeIntervals GetBuffered(); + nsresult SeekInternal(TrackInfo::TrackType aType, + const media::TimeUnit& aTarget); + CryptoTrack GetTrackCrypto(TrackInfo::TrackType aType, size_t aTrackNumber); + + // Read a packet from the nestegg file. Returns nullptr if all packets for + // the particular track have been read. Pass TrackInfo::kVideoTrack or + // TrackInfo::kVideoTrack to indicate the type of the packet we want to read. + nsresult NextPacket(TrackInfo::TrackType aType, + RefPtr<NesteggPacketHolder>& aPacket); + + // Internal method that demuxes the next packet from the stream. The caller + // is responsible for making sure it doesn't get lost. + nsresult DemuxPacket(TrackInfo::TrackType aType, + RefPtr<NesteggPacketHolder>& aPacket); + + // libnestegg audio and video context for webm container. + // Access on reader's thread only. + NestEggContext mVideoContext; + NestEggContext mAudioContext; + MediaResourceIndex& Resource(TrackInfo::TrackType aType) { + return aType == TrackInfo::kVideoTrack ? mVideoContext.mResource + : mAudioContext.mResource; + } + nestegg* Context(TrackInfo::TrackType aType) const { + return aType == TrackInfo::kVideoTrack ? mVideoContext.mContext + : mAudioContext.mContext; + } + + MediaInfo mInfo; + nsTArray<RefPtr<WebMTrackDemuxer>> mDemuxers; + + // Parser state and computed offset-time mappings. Shared by multiple + // readers when decoder has been cloned. Main thread only. + RefPtr<WebMBufferedState> mBufferedState; + RefPtr<MediaByteBuffer> mInitData; + + // Queue of video and audio packets that have been read but not decoded. + WebMPacketQueue mVideoPackets; + WebMPacketQueue mAudioPackets; + + // Index of video and audio track to play + uint32_t mVideoTrack; + uint32_t mAudioTrack; + + // Nanoseconds to discard after seeking. + uint64_t mSeekPreroll; + + // Calculate the frame duration from the last decodeable frame using the + // previous frame's timestamp. In NS. + Maybe<int64_t> mLastAudioFrameTime; + Maybe<int64_t> mLastVideoFrameTime; + + // Codec ID of audio track + int mAudioCodec; + // Codec ID of video track + int mVideoCodec; + + // Booleans to indicate if we have audio and/or video data + bool mHasVideo; + bool mHasAudio; + bool mNeedReIndex; + + // The last complete block parsed by the WebMBufferedState. -1 if not set. + // We cache those values rather than retrieving them for performance reasons + // as nestegg only performs 1-byte read at a time. + int64_t mLastWebMBlockOffset; + const bool mIsMediaSource; + // Discard padding in WebM cannot occur more than once. This is set to true if + // a discard padding element has been found and processed, and the decoding is + // expected to error out if another discard padding element is found + // subsequently in the byte stream. + bool mProcessedDiscardPadding = false; + + EncryptionInfo mCrypto; +}; + +class WebMTrackDemuxer : public MediaTrackDemuxer, + public DecoderDoctorLifeLogger<WebMTrackDemuxer> { + public: + WebMTrackDemuxer(WebMDemuxer* aParent, TrackInfo::TrackType aType, + uint32_t aTrackNumber); + + UniquePtr<TrackInfo> GetInfo() const override; + + RefPtr<SeekPromise> Seek(const media::TimeUnit& aTime) override; + + RefPtr<SamplesPromise> GetSamples(int32_t aNumSamples = 1) override; + + void Reset() override; + + nsresult GetNextRandomAccessPoint(media::TimeUnit* aTime) override; + + RefPtr<SkipAccessPointPromise> SkipToNextRandomAccessPoint( + const media::TimeUnit& aTimeThreshold) override; + + media::TimeIntervals GetBuffered() override; + + int64_t GetEvictionOffset(const media::TimeUnit& aTime) override; + + void BreakCycles() override; + + private: + friend class WebMDemuxer; + ~WebMTrackDemuxer(); + void UpdateSamples(const nsTArray<RefPtr<MediaRawData>>& aSamples); + void SetNextKeyFrameTime(); + nsresult NextSample(RefPtr<MediaRawData>& aData); + RefPtr<WebMDemuxer> mParent; + TrackInfo::TrackType mType; + UniquePtr<TrackInfo> mInfo; + Maybe<media::TimeUnit> mNextKeyframeTime; + bool mNeedKeyframe; + + // Queued samples extracted by the demuxer, but not yet returned. + MediaRawDataQueue mSamples; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/webm/WebMWriter.cpp b/dom/media/webm/WebMWriter.cpp new file mode 100644 index 0000000000..568c9b8e4d --- /dev/null +++ b/dom/media/webm/WebMWriter.cpp @@ -0,0 +1,111 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "WebMWriter.h" +#include "EbmlComposer.h" +#include "mozilla/ProfilerLabels.h" +#include "OpusTrackEncoder.h" + +namespace mozilla { + +WebMWriter::WebMWriter() : mEbmlComposer(new EbmlComposer()) {} + +WebMWriter::~WebMWriter() { + // Out-of-line dtor so mEbmlComposer UniquePtr can delete a complete type. +} + +nsresult WebMWriter::WriteEncodedTrack( + const nsTArray<RefPtr<EncodedFrame>>& aData, uint32_t aFlags) { + AUTO_PROFILER_LABEL("WebMWriter::WriteEncodedTrack", OTHER); + for (uint32_t i = 0; i < aData.Length(); i++) { + nsresult rv = mEbmlComposer->WriteSimpleBlock(aData.ElementAt(i).get()); + NS_ENSURE_SUCCESS(rv, rv); + } + return NS_OK; +} + +nsresult WebMWriter::GetContainerData(nsTArray<nsTArray<uint8_t>>* aOutputBufs, + uint32_t aFlags) { + AUTO_PROFILER_LABEL("WebMWriter::GetContainerData", OTHER); + mEbmlComposer->ExtractBuffer(aOutputBufs, aFlags); + if (aFlags & ContainerWriter::FLUSH_NEEDED) { + mIsWritingComplete = true; + } + return NS_OK; +} + +nsresult WebMWriter::SetMetadata( + const nsTArray<RefPtr<TrackMetadataBase>>& aMetadata) { + AUTO_PROFILER_LABEL("WebMWriter::SetMetadata", OTHER); + MOZ_DIAGNOSTIC_ASSERT(!aMetadata.IsEmpty()); + + // Integrity checks + bool bad = false; + for (const RefPtr<TrackMetadataBase>& metadata : aMetadata) { + MOZ_ASSERT(metadata); + + if (metadata->GetKind() == TrackMetadataBase::METADATA_VP8) { + VP8Metadata* meta = static_cast<VP8Metadata*>(metadata.get()); + if (meta->mWidth == 0 || meta->mHeight == 0 || meta->mDisplayWidth == 0 || + meta->mDisplayHeight == 0) { + bad = true; + } + } + + if (metadata->GetKind() == TrackMetadataBase::METADATA_VORBIS) { + VorbisMetadata* meta = static_cast<VorbisMetadata*>(metadata.get()); + if (meta->mSamplingFrequency == 0 || meta->mChannels == 0 || + meta->mData.IsEmpty()) { + bad = true; + } + } + + if (metadata->GetKind() == TrackMetadataBase::METADATA_OPUS) { + OpusMetadata* meta = static_cast<OpusMetadata*>(metadata.get()); + if (meta->mSamplingFrequency == 0 || meta->mChannels == 0 || + meta->mIdHeader.IsEmpty()) { + bad = true; + } + } + } + if (bad) { + return NS_ERROR_FAILURE; + } + + // Storing + DebugOnly<bool> hasAudio = false; + DebugOnly<bool> hasVideo = false; + for (const RefPtr<TrackMetadataBase>& metadata : aMetadata) { + MOZ_ASSERT(metadata); + + if (metadata->GetKind() == TrackMetadataBase::METADATA_VP8) { + MOZ_ASSERT(!hasVideo); + VP8Metadata* meta = static_cast<VP8Metadata*>(metadata.get()); + mEbmlComposer->SetVideoConfig(meta->mWidth, meta->mHeight, + meta->mDisplayWidth, meta->mDisplayHeight); + hasVideo = true; + } + + if (metadata->GetKind() == TrackMetadataBase::METADATA_VORBIS) { + MOZ_ASSERT(!hasAudio); + VorbisMetadata* meta = static_cast<VorbisMetadata*>(metadata.get()); + mEbmlComposer->SetAudioConfig(meta->mSamplingFrequency, meta->mChannels); + mEbmlComposer->SetAudioCodecPrivateData(meta->mData); + hasAudio = true; + } + + if (metadata->GetKind() == TrackMetadataBase::METADATA_OPUS) { + MOZ_ASSERT(!hasAudio); + OpusMetadata* meta = static_cast<OpusMetadata*>(metadata.get()); + mEbmlComposer->SetAudioConfig(meta->mSamplingFrequency, meta->mChannels); + mEbmlComposer->SetAudioCodecPrivateData(meta->mIdHeader); + hasAudio = true; + } + } + mEbmlComposer->GenerateHeader(); + return NS_OK; +} + +} // namespace mozilla diff --git a/dom/media/webm/WebMWriter.h b/dom/media/webm/WebMWriter.h new file mode 100644 index 0000000000..71d2e18311 --- /dev/null +++ b/dom/media/webm/WebMWriter.h @@ -0,0 +1,69 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef WebMWriter_h_ +#define WebMWriter_h_ + +#include "ContainerWriter.h" + +namespace mozilla { + +class EbmlComposer; + +// Vorbis meta data structure +class VorbisMetadata : public TrackMetadataBase { + public: + nsTArray<uint8_t> mData; + int32_t mChannels; + float mSamplingFrequency; + MetadataKind GetKind() const override { return METADATA_VORBIS; } +}; + +// VP8 meta data structure +class VP8Metadata : public TrackMetadataBase { + public: + int32_t mWidth; + int32_t mHeight; + int32_t mDisplayWidth; + int32_t mDisplayHeight; + MetadataKind GetKind() const override { return METADATA_VP8; } +}; + +/** + * WebM writer helper + * This class accepts encoder to set audio or video meta data or + * encoded data to ebml Composer, and get muxing data through GetContainerData. + * The ctor/dtor run in the MediaRecorder thread, others run in MediaEncoder + * thread. + */ +class WebMWriter : public ContainerWriter { + public: + // Run in MediaRecorder thread + WebMWriter(); + virtual ~WebMWriter(); + + // WriteEncodedTrack inserts raw packets into WebM stream. Does not accept + // any flags: any specified will be ignored. Writing is finalized via + // flushing via GetContainerData(). + nsresult WriteEncodedTrack(const nsTArray<RefPtr<EncodedFrame>>& aData, + uint32_t aFlags = 0) override; + + // GetContainerData outputs multiplexing data. + // aFlags indicates the muxer should enter into finished stage and flush out + // queue data. + nsresult GetContainerData(nsTArray<nsTArray<uint8_t>>* aOutputBufs, + uint32_t aFlags = 0) override; + + // Assign metadata into muxer + nsresult SetMetadata( + const nsTArray<RefPtr<TrackMetadataBase>>& aMetadata) override; + + private: + UniquePtr<EbmlComposer> mEbmlComposer; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/webm/moz.build b/dom/media/webm/moz.build new file mode 100644 index 0000000000..f65fe5bc6b --- /dev/null +++ b/dom/media/webm/moz.build @@ -0,0 +1,28 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +EXPORTS += [ + "NesteggPacketHolder.h", + "WebMBufferedParser.h", + "WebMDecoder.h", + "WebMDemuxer.h", + "WebMWriter.h", +] + +UNIFIED_SOURCES += [ + "EbmlComposer.cpp", + "WebMBufferedParser.cpp", + "WebMDecoder.cpp", + "WebMDemuxer.cpp", + "WebMWriter.cpp", +] + +CXXFLAGS += CONFIG["MOZ_LIBVPX_CFLAGS"] + +FINAL_LIBRARY = "xul" + +# Add libFuzzer configuration directives +include("/tools/fuzzing/libfuzzer-config.mozbuild") |