diff options
Diffstat (limited to 'dom/media/encoder')
-rw-r--r-- | dom/media/encoder/ContainerWriter.h | 75 | ||||
-rw-r--r-- | dom/media/encoder/EncodedFrame.h | 64 | ||||
-rw-r--r-- | dom/media/encoder/MediaEncoder.cpp | 942 | ||||
-rw-r--r-- | dom/media/encoder/MediaEncoder.h | 297 | ||||
-rw-r--r-- | dom/media/encoder/Muxer.cpp | 218 | ||||
-rw-r--r-- | dom/media/encoder/Muxer.h | 73 | ||||
-rw-r--r-- | dom/media/encoder/OpusTrackEncoder.cpp | 454 | ||||
-rw-r--r-- | dom/media/encoder/OpusTrackEncoder.h | 121 | ||||
-rw-r--r-- | dom/media/encoder/TrackEncoder.cpp | 764 | ||||
-rw-r--r-- | dom/media/encoder/TrackEncoder.h | 520 | ||||
-rw-r--r-- | dom/media/encoder/TrackMetadataBase.h | 76 | ||||
-rw-r--r-- | dom/media/encoder/VP8TrackEncoder.cpp | 583 | ||||
-rw-r--r-- | dom/media/encoder/VP8TrackEncoder.h | 108 | ||||
-rw-r--r-- | dom/media/encoder/moz.build | 50 |
14 files changed, 4345 insertions, 0 deletions
diff --git a/dom/media/encoder/ContainerWriter.h b/dom/media/encoder/ContainerWriter.h new file mode 100644 index 0000000000..724c8b90c9 --- /dev/null +++ b/dom/media/encoder/ContainerWriter.h @@ -0,0 +1,75 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ContainerWriter_h_ +#define ContainerWriter_h_ + +#include "nsTArray.h" +#include "EncodedFrame.h" +#include "TrackMetadataBase.h" + +namespace mozilla { +/** + * ContainerWriter packs encoded track data into a specific media container. + */ +class ContainerWriter { + public: + ContainerWriter() : mInitialized(false), mIsWritingComplete(false) {} + virtual ~ContainerWriter() {} + // Mapping to DOMMediaStream::TrackTypeHints + enum { + CREATE_AUDIO_TRACK = 1 << 0, + CREATE_VIDEO_TRACK = 1 << 1, + }; + enum { END_OF_STREAM = 1 << 0 }; + + /** + * Writes encoded track data from aData into the internal stream of container + * writer. aFlags is used to signal the impl of different conditions + * such as END_OF_STREAM. Each impl may handle different flags, and should be + * documented accordingly. Currently, WriteEncodedTrack doesn't support + * explicit track specification, though each impl may provide logic to + * allocate frames into different tracks. + */ + virtual nsresult WriteEncodedTrack( + const nsTArray<RefPtr<EncodedFrame>>& aData, uint32_t aFlags = 0) = 0; + + /** + * Stores the metadata for all given tracks to the muxer. + * + * This method checks the integrity of aMetadata. + * If the metadata isn't well formatted, this method returns NS_ERROR_FAILURE. + * If the metadata is well formatted, it stores the metadata and returns + * NS_OK. + */ + virtual nsresult SetMetadata( + const nsTArray<RefPtr<TrackMetadataBase>>& aMetadata) = 0; + + /** + * Indicate if the writer has finished to output data + */ + virtual bool IsWritingComplete() { return mIsWritingComplete; } + + enum { FLUSH_NEEDED = 1 << 0, GET_HEADER = 1 << 1 }; + + /** + * Copies the final container data to a buffer if it has accumulated enough + * packets from WriteEncodedTrack. This buffer of data is appended to + * aOutputBufs, and existing elements of aOutputBufs should not be modified. + * aFlags is true with FLUSH_NEEDED will force OggWriter to flush an ogg page + * even it is not full, and copy these container data to a buffer for + * aOutputBufs to append. + */ + virtual nsresult GetContainerData(nsTArray<nsTArray<uint8_t>>* aOutputBufs, + uint32_t aFlags = 0) = 0; + + protected: + bool mInitialized; + bool mIsWritingComplete; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/encoder/EncodedFrame.h b/dom/media/encoder/EncodedFrame.h new file mode 100644 index 0000000000..3c75e0fde0 --- /dev/null +++ b/dom/media/encoder/EncodedFrame.h @@ -0,0 +1,64 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef EncodedFrame_h_ +#define EncodedFrame_h_ + +#include "nsISupportsImpl.h" +#include "mozilla/media/MediaUtils.h" +#include "TimeUnits.h" +#include "VideoUtils.h" + +namespace mozilla { + +// Represent an encoded frame emitted by an encoder +class EncodedFrame final { + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(EncodedFrame) + public: + enum FrameType { + VP8_I_FRAME, // VP8 intraframe + VP8_P_FRAME, // VP8 predicted frame + OPUS_AUDIO_FRAME, // Opus audio frame + UNKNOWN // FrameType not set + }; + using ConstFrameData = const media::Refcountable<nsTArray<uint8_t>>; + using FrameData = media::Refcountable<nsTArray<uint8_t>>; + EncodedFrame(const media::TimeUnit& aTime, uint64_t aDuration, + uint64_t aDurationBase, FrameType aFrameType, + RefPtr<ConstFrameData> aData) + : mTime(aTime), + mDuration(aDuration), + mDurationBase(aDurationBase), + mFrameType(aFrameType), + mFrameData(std::move(aData)) { + MOZ_ASSERT(mFrameData); + MOZ_ASSERT_IF(mFrameType == VP8_I_FRAME, mDurationBase == PR_USEC_PER_SEC); + MOZ_ASSERT_IF(mFrameType == VP8_P_FRAME, mDurationBase == PR_USEC_PER_SEC); + MOZ_ASSERT_IF(mFrameType == OPUS_AUDIO_FRAME, mDurationBase == 48000); + } + // Timestamp in microseconds + const media::TimeUnit mTime; + // The playback duration of this packet in mDurationBase. + const uint64_t mDuration; + // The time base of mDuration. + const uint64_t mDurationBase; + // Represent what is in the FrameData + const FrameType mFrameType; + // Encoded data + const RefPtr<ConstFrameData> mFrameData; + + // The end time of the frame in microseconds. + media::TimeUnit GetEndTime() const { + return mTime + FramesToTimeUnit(mDuration, mDurationBase); + } + + private: + // Private destructor, to discourage deletion outside of Release(): + ~EncodedFrame() = default; +}; + +} // namespace mozilla + +#endif // EncodedFrame_h_ diff --git a/dom/media/encoder/MediaEncoder.cpp b/dom/media/encoder/MediaEncoder.cpp new file mode 100644 index 0000000000..1b96468e21 --- /dev/null +++ b/dom/media/encoder/MediaEncoder.cpp @@ -0,0 +1,942 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "MediaEncoder.h" + +#include <algorithm> +#include "AudioNodeEngine.h" +#include "AudioNodeTrack.h" +#include "DriftCompensation.h" +#include "GeckoProfiler.h" +#include "MediaDecoder.h" +#include "MediaTrackGraphImpl.h" +#include "MediaTrackListener.h" +#include "mozilla/dom/AudioNode.h" +#include "mozilla/dom/AudioStreamTrack.h" +#include "mozilla/dom/MediaStreamTrack.h" +#include "mozilla/dom/VideoStreamTrack.h" +#include "mozilla/gfx/Point.h" // IntSize +#include "mozilla/Logging.h" +#include "mozilla/Preferences.h" +#include "mozilla/StaticPrefs_media.h" +#include "mozilla/StaticPtr.h" +#include "mozilla/TaskQueue.h" +#include "mozilla/Unused.h" +#include "Muxer.h" +#include "nsMimeTypes.h" +#include "nsThreadUtils.h" +#include "OggWriter.h" +#include "OpusTrackEncoder.h" +#include "TimeUnits.h" +#include "Tracing.h" + +#ifdef MOZ_WEBM_ENCODER +# include "VP8TrackEncoder.h" +# include "WebMWriter.h" +#endif + +mozilla::LazyLogModule gMediaEncoderLog("MediaEncoder"); +#define LOG(type, msg) MOZ_LOG(gMediaEncoderLog, type, msg) + +namespace mozilla { + +using namespace dom; +using namespace media; + +class MediaEncoder::AudioTrackListener : public DirectMediaTrackListener { + public: + AudioTrackListener(DriftCompensator* aDriftCompensator, + AudioTrackEncoder* aEncoder, TaskQueue* aEncoderThread) + : mDirectConnected(false), + mInitialized(false), + mRemoved(false), + mDriftCompensator(aDriftCompensator), + mEncoder(aEncoder), + mEncoderThread(aEncoderThread), + mShutdownPromise(mShutdownHolder.Ensure(__func__)) { + MOZ_ASSERT(mEncoder); + MOZ_ASSERT(mEncoderThread); + } + + void NotifyDirectListenerInstalled(InstallationResult aResult) override { + if (aResult == InstallationResult::SUCCESS) { + LOG(LogLevel::Info, ("Audio track direct listener installed")); + mDirectConnected = true; + } else { + LOG(LogLevel::Info, ("Audio track failed to install direct listener")); + MOZ_ASSERT(!mDirectConnected); + } + } + + void NotifyDirectListenerUninstalled() override { + mDirectConnected = false; + + if (mRemoved) { + mEncoder = nullptr; + mEncoderThread = nullptr; + } + } + + void NotifyQueuedChanges(MediaTrackGraph* aGraph, TrackTime aTrackOffset, + const MediaSegment& aQueuedMedia) override { + TRACE_COMMENT("Encoder %p", mEncoder.get()); + MOZ_ASSERT(mEncoder); + MOZ_ASSERT(mEncoderThread); + + if (!mInitialized) { + mDriftCompensator->NotifyAudioStart(TimeStamp::Now()); + mInitialized = true; + } + + mDriftCompensator->NotifyAudio(aQueuedMedia.GetDuration()); + + const AudioSegment& audio = static_cast<const AudioSegment&>(aQueuedMedia); + + AudioSegment copy; + copy.AppendSlice(audio, 0, audio.GetDuration()); + + nsresult rv = mEncoderThread->Dispatch( + NewRunnableMethod<StoreCopyPassByRRef<AudioSegment>>( + "mozilla::AudioTrackEncoder::AppendAudioSegment", mEncoder, + &AudioTrackEncoder::AppendAudioSegment, std::move(copy))); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + } + + void NotifyEnded(MediaTrackGraph* aGraph) override { + MOZ_ASSERT(mEncoder); + MOZ_ASSERT(mEncoderThread); + + nsresult rv = mEncoderThread->Dispatch( + NewRunnableMethod("mozilla::AudioTrackEncoder::NotifyEndOfStream", + mEncoder, &AudioTrackEncoder::NotifyEndOfStream)); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + } + + void NotifyRemoved(MediaTrackGraph* aGraph) override { + nsresult rv = mEncoderThread->Dispatch( + NewRunnableMethod("mozilla::AudioTrackEncoder::NotifyEndOfStream", + mEncoder, &AudioTrackEncoder::NotifyEndOfStream)); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + + mRemoved = true; + + if (!mDirectConnected) { + mEncoder = nullptr; + mEncoderThread = nullptr; + } + + mShutdownHolder.Resolve(true, __func__); + } + + const RefPtr<GenericNonExclusivePromise>& OnShutdown() const { + return mShutdownPromise; + } + + private: + bool mDirectConnected; + bool mInitialized; + bool mRemoved; + const RefPtr<DriftCompensator> mDriftCompensator; + RefPtr<AudioTrackEncoder> mEncoder; + RefPtr<TaskQueue> mEncoderThread; + MozPromiseHolder<GenericNonExclusivePromise> mShutdownHolder; + const RefPtr<GenericNonExclusivePromise> mShutdownPromise; +}; + +class MediaEncoder::VideoTrackListener : public DirectMediaTrackListener { + public: + VideoTrackListener(VideoTrackEncoder* aEncoder, TaskQueue* aEncoderThread) + : mDirectConnected(false), + mInitialized(false), + mRemoved(false), + mEncoder(aEncoder), + mEncoderThread(aEncoderThread), + mShutdownPromise(mShutdownHolder.Ensure(__func__)) { + MOZ_ASSERT(mEncoder); + MOZ_ASSERT(mEncoderThread); + } + + void NotifyDirectListenerInstalled(InstallationResult aResult) override { + if (aResult == InstallationResult::SUCCESS) { + LOG(LogLevel::Info, ("Video track direct listener installed")); + mDirectConnected = true; + } else { + LOG(LogLevel::Info, ("Video track failed to install direct listener")); + MOZ_ASSERT(!mDirectConnected); + return; + } + } + + void NotifyDirectListenerUninstalled() override { + mDirectConnected = false; + + if (mRemoved) { + mEncoder = nullptr; + mEncoderThread = nullptr; + } + } + + void NotifyQueuedChanges(MediaTrackGraph* aGraph, TrackTime aTrackOffset, + const MediaSegment& aQueuedMedia) override { + TRACE_COMMENT("Encoder %p", mEncoder.get()); + MOZ_ASSERT(mEncoder); + MOZ_ASSERT(mEncoderThread); + + const TimeStamp now = TimeStamp::Now(); + if (!mInitialized) { + nsresult rv = mEncoderThread->Dispatch(NewRunnableMethod<TimeStamp>( + "mozilla::VideoTrackEncoder::SetStartOffset", mEncoder, + &VideoTrackEncoder::SetStartOffset, now)); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + mInitialized = true; + } + + nsresult rv = mEncoderThread->Dispatch(NewRunnableMethod<TimeStamp>( + "mozilla::VideoTrackEncoder::AdvanceCurrentTime", mEncoder, + &VideoTrackEncoder::AdvanceCurrentTime, now)); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + } + + void NotifyRealtimeTrackData(MediaTrackGraph* aGraph, TrackTime aTrackOffset, + const MediaSegment& aMedia) override { + TRACE_COMMENT("Encoder %p", mEncoder.get()); + MOZ_ASSERT(mEncoder); + MOZ_ASSERT(mEncoderThread); + MOZ_ASSERT(aMedia.GetType() == MediaSegment::VIDEO); + + const VideoSegment& video = static_cast<const VideoSegment&>(aMedia); + VideoSegment copy; + for (VideoSegment::ConstChunkIterator iter(video); !iter.IsEnded(); + iter.Next()) { + copy.AppendFrame(do_AddRef(iter->mFrame.GetImage()), + iter->mFrame.GetIntrinsicSize(), + iter->mFrame.GetPrincipalHandle(), + iter->mFrame.GetForceBlack(), iter->mTimeStamp); + } + + nsresult rv = mEncoderThread->Dispatch( + NewRunnableMethod<StoreCopyPassByRRef<VideoSegment>>( + "mozilla::VideoTrackEncoder::AppendVideoSegment", mEncoder, + &VideoTrackEncoder::AppendVideoSegment, std::move(copy))); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + } + + void NotifyEnabledStateChanged(MediaTrackGraph* aGraph, + bool aEnabled) override { + MOZ_ASSERT(mEncoder); + MOZ_ASSERT(mEncoderThread); + + nsresult rv; + if (aEnabled) { + rv = mEncoderThread->Dispatch(NewRunnableMethod<TimeStamp>( + "mozilla::VideoTrackEncoder::Enable", mEncoder, + &VideoTrackEncoder::Enable, TimeStamp::Now())); + } else { + rv = mEncoderThread->Dispatch(NewRunnableMethod<TimeStamp>( + "mozilla::VideoTrackEncoder::Disable", mEncoder, + &VideoTrackEncoder::Disable, TimeStamp::Now())); + } + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + } + + void NotifyEnded(MediaTrackGraph* aGraph) override { + MOZ_ASSERT(mEncoder); + MOZ_ASSERT(mEncoderThread); + + nsresult rv = mEncoderThread->Dispatch( + NewRunnableMethod("mozilla::VideoTrackEncoder::NotifyEndOfStream", + mEncoder, &VideoTrackEncoder::NotifyEndOfStream)); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + } + + void NotifyRemoved(MediaTrackGraph* aGraph) override { + nsresult rv = mEncoderThread->Dispatch( + NewRunnableMethod("mozilla::VideoTrackEncoder::NotifyEndOfStream", + mEncoder, &VideoTrackEncoder::NotifyEndOfStream)); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + + mRemoved = true; + + if (!mDirectConnected) { + mEncoder = nullptr; + mEncoderThread = nullptr; + } + + mShutdownHolder.Resolve(true, __func__); + } + + const RefPtr<GenericNonExclusivePromise>& OnShutdown() const { + return mShutdownPromise; + } + + private: + bool mDirectConnected; + bool mInitialized; + bool mRemoved; + RefPtr<VideoTrackEncoder> mEncoder; + RefPtr<TaskQueue> mEncoderThread; + MozPromiseHolder<GenericNonExclusivePromise> mShutdownHolder; + const RefPtr<GenericNonExclusivePromise> mShutdownPromise; +}; + +class MediaEncoder::EncoderListener : public TrackEncoderListener { + public: + EncoderListener(TaskQueue* aEncoderThread, MediaEncoder* aEncoder) + : mEncoderThread(aEncoderThread), + mEncoder(aEncoder), + mPendingDataAvailable(false) {} + + void Forget() { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + mEncoder = nullptr; + } + + void Initialized(TrackEncoder* aTrackEncoder) override { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + MOZ_ASSERT(aTrackEncoder->IsInitialized()); + + if (!mEncoder) { + return; + } + + nsresult rv = mEncoderThread->Dispatch( + NewRunnableMethod("mozilla::MediaEncoder::NotifyInitialized", mEncoder, + &MediaEncoder::NotifyInitialized)); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + } + + void DataAvailable(TrackEncoder* aTrackEncoder) override { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + MOZ_ASSERT(aTrackEncoder->IsInitialized()); + + if (!mEncoder) { + return; + } + + if (mPendingDataAvailable) { + return; + } + + nsresult rv = mEncoderThread->Dispatch(NewRunnableMethod( + "mozilla::MediaEncoder::EncoderListener::DataAvailableImpl", this, + &EncoderListener::DataAvailableImpl)); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + + mPendingDataAvailable = true; + } + + void DataAvailableImpl() { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + + if (!mEncoder) { + return; + } + + mEncoder->NotifyDataAvailable(); + mPendingDataAvailable = false; + } + + void Error(TrackEncoder* aTrackEncoder) override { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + + if (!mEncoder) { + return; + } + + nsresult rv = mEncoderThread->Dispatch(NewRunnableMethod( + "mozilla::MediaEncoder::SetError", mEncoder, &MediaEncoder::SetError)); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + } + + protected: + RefPtr<TaskQueue> mEncoderThread; + RefPtr<MediaEncoder> mEncoder; + bool mPendingDataAvailable; +}; + +MediaEncoder::MediaEncoder(TaskQueue* aEncoderThread, + RefPtr<DriftCompensator> aDriftCompensator, + UniquePtr<ContainerWriter> aWriter, + AudioTrackEncoder* aAudioEncoder, + VideoTrackEncoder* aVideoEncoder, + TrackRate aTrackRate, const nsAString& aMIMEType) + : mEncoderThread(aEncoderThread), + mMuxer(MakeUnique<Muxer>(std::move(aWriter))), + mAudioEncoder(aAudioEncoder), + mVideoEncoder(aVideoEncoder), + mEncoderListener(MakeAndAddRef<EncoderListener>(mEncoderThread, this)), + mStartTime(TimeStamp::Now()), + mMIMEType(aMIMEType), + mInitialized(false), + mCompleted(false), + mError(false) { + if (mAudioEncoder) { + mAudioListener = MakeAndAddRef<AudioTrackListener>( + aDriftCompensator, mAudioEncoder, mEncoderThread); + nsresult rv = + mEncoderThread->Dispatch(NewRunnableMethod<RefPtr<EncoderListener>>( + "mozilla::AudioTrackEncoder::RegisterListener", mAudioEncoder, + &AudioTrackEncoder::RegisterListener, mEncoderListener)); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + } + if (mVideoEncoder) { + mVideoListener = + MakeAndAddRef<VideoTrackListener>(mVideoEncoder, mEncoderThread); + nsresult rv = + mEncoderThread->Dispatch(NewRunnableMethod<RefPtr<EncoderListener>>( + "mozilla::VideoTrackEncoder::RegisterListener", mVideoEncoder, + &VideoTrackEncoder::RegisterListener, mEncoderListener)); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + } +} + +MediaEncoder::~MediaEncoder() { + MOZ_ASSERT(mListeners.IsEmpty()); + MOZ_ASSERT(!mAudioTrack); + MOZ_ASSERT(!mVideoTrack); + MOZ_ASSERT(!mAudioNode); + MOZ_ASSERT(!mInputPort); + MOZ_ASSERT(!mPipeTrack); +} + +void MediaEncoder::EnsureGraphTrackFrom(MediaTrack* aTrack) { + if (mGraphTrack) { + return; + } + MOZ_DIAGNOSTIC_ASSERT(!aTrack->IsDestroyed()); + mGraphTrack = MakeAndAddRef<SharedDummyTrack>( + aTrack->GraphImpl()->CreateSourceTrack(MediaSegment::VIDEO)); +} + +void MediaEncoder::RunOnGraph(already_AddRefed<Runnable> aRunnable) { + MOZ_ASSERT(mGraphTrack); + class Message : public ControlMessage { + public: + explicit Message(already_AddRefed<Runnable> aRunnable) + : ControlMessage(nullptr), mRunnable(aRunnable) {} + void Run() override { mRunnable->Run(); } + const RefPtr<Runnable> mRunnable; + }; + mGraphTrack->mTrack->GraphImpl()->AppendMessage( + MakeUnique<Message>(std::move(aRunnable))); +} + +void MediaEncoder::Suspend() { + RunOnGraph(NS_NewRunnableFunction( + "MediaEncoder::Suspend (graph)", + [thread = mEncoderThread, audio = mAudioEncoder, video = mVideoEncoder] { + if (NS_FAILED(thread->Dispatch( + NS_NewRunnableFunction("MediaEncoder::Suspend (encoder)", + [audio, video, now = TimeStamp::Now()] { + if (audio) { + audio->Suspend(); + } + if (video) { + video->Suspend(now); + } + })))) { + // RunOnGraph added an extra async step, and now `thread` has shut + // down. + return; + } + })); +} + +void MediaEncoder::Resume() { + RunOnGraph(NS_NewRunnableFunction( + "MediaEncoder::Resume (graph)", + [thread = mEncoderThread, audio = mAudioEncoder, video = mVideoEncoder] { + if (NS_FAILED(thread->Dispatch( + NS_NewRunnableFunction("MediaEncoder::Resume (encoder)", + [audio, video, now = TimeStamp::Now()] { + if (audio) { + audio->Resume(); + } + if (video) { + video->Resume(now); + } + })))) { + // RunOnGraph added an extra async step, and now `thread` has shut + // down. + return; + } + })); +} + +void MediaEncoder::ConnectAudioNode(AudioNode* aNode, uint32_t aOutput) { + MOZ_ASSERT(NS_IsMainThread()); + + if (mAudioNode) { + MOZ_ASSERT(false, "Only one audio node supported"); + return; + } + + // Only AudioNodeTrack of kind EXTERNAL_OUTPUT stores output audio data in + // the track (see AudioNodeTrack::AdvanceOutputSegment()). That means + // forwarding input track in recorder session won't be able to copy data from + // the track of non-destination node. Create a pipe track in this case. + if (aNode->NumberOfOutputs() > 0) { + AudioContext* ctx = aNode->Context(); + AudioNodeEngine* engine = new AudioNodeEngine(nullptr); + AudioNodeTrack::Flags flags = AudioNodeTrack::EXTERNAL_OUTPUT | + AudioNodeTrack::NEED_MAIN_THREAD_ENDED; + mPipeTrack = AudioNodeTrack::Create(ctx, engine, flags, ctx->Graph()); + AudioNodeTrack* ns = aNode->GetTrack(); + if (ns) { + mInputPort = mPipeTrack->AllocateInputPort(aNode->GetTrack(), 0, aOutput); + } + } + + mAudioNode = aNode; + + if (mPipeTrack) { + mPipeTrack->AddListener(mAudioListener); + EnsureGraphTrackFrom(mPipeTrack); + } else { + mAudioNode->GetTrack()->AddListener(mAudioListener); + EnsureGraphTrackFrom(mAudioNode->GetTrack()); + } +} + +void MediaEncoder::ConnectMediaStreamTrack(MediaStreamTrack* aTrack) { + MOZ_ASSERT(NS_IsMainThread()); + + if (aTrack->Ended()) { + MOZ_ASSERT_UNREACHABLE("Cannot connect ended track"); + return; + } + + EnsureGraphTrackFrom(aTrack->GetTrack()); + + if (AudioStreamTrack* audio = aTrack->AsAudioStreamTrack()) { + if (!mAudioEncoder) { + // No audio encoder for this audio track. It could be disabled. + LOG(LogLevel::Warning, ("Cannot connect to audio track - no encoder")); + return; + } + + MOZ_ASSERT(!mAudioTrack, "Only one audio track supported."); + MOZ_ASSERT(mAudioListener, "No audio listener for this audio track"); + + LOG(LogLevel::Info, ("Connected to audio track %p", aTrack)); + + mAudioTrack = audio; + audio->AddListener(mAudioListener); + } else if (VideoStreamTrack* video = aTrack->AsVideoStreamTrack()) { + if (!mVideoEncoder) { + // No video encoder for this video track. It could be disabled. + LOG(LogLevel::Warning, ("Cannot connect to video track - no encoder")); + return; + } + + MOZ_ASSERT(!mVideoTrack, "Only one video track supported."); + MOZ_ASSERT(mVideoListener, "No video listener for this video track"); + + LOG(LogLevel::Info, ("Connected to video track %p", aTrack)); + + mVideoTrack = video; + video->AddDirectListener(mVideoListener); + video->AddListener(mVideoListener); + } else { + MOZ_ASSERT(false, "Unknown track type"); + } +} + +void MediaEncoder::RemoveMediaStreamTrack(MediaStreamTrack* aTrack) { + if (!aTrack) { + MOZ_ASSERT(false); + return; + } + + if (AudioStreamTrack* audio = aTrack->AsAudioStreamTrack()) { + if (audio != mAudioTrack) { + MOZ_ASSERT(false, "Not connected to this audio track"); + return; + } + + if (mAudioListener) { + audio->RemoveDirectListener(mAudioListener); + audio->RemoveListener(mAudioListener); + } + mAudioTrack = nullptr; + } else if (VideoStreamTrack* video = aTrack->AsVideoStreamTrack()) { + if (video != mVideoTrack) { + MOZ_ASSERT(false, "Not connected to this video track"); + return; + } + + if (mVideoListener) { + video->RemoveDirectListener(mVideoListener); + video->RemoveListener(mVideoListener); + } + mVideoTrack = nullptr; + } +} + +/* static */ +already_AddRefed<MediaEncoder> MediaEncoder::CreateEncoder( + TaskQueue* aEncoderThread, const nsAString& aMIMEType, + uint32_t aAudioBitrate, uint32_t aVideoBitrate, uint8_t aTrackTypes, + TrackRate aTrackRate) { + AUTO_PROFILER_LABEL("MediaEncoder::CreateEncoder", OTHER); + + UniquePtr<ContainerWriter> writer; + RefPtr<AudioTrackEncoder> audioEncoder; + RefPtr<VideoTrackEncoder> videoEncoder; + auto driftCompensator = + MakeRefPtr<DriftCompensator>(aEncoderThread, aTrackRate); + + Maybe<MediaContainerType> mimeType = MakeMediaContainerType(aMIMEType); + if (!mimeType) { + return nullptr; + } + + for (const auto& codec : mimeType->ExtendedType().Codecs().Range()) { + if (codec.EqualsLiteral("opus")) { + MOZ_ASSERT(!audioEncoder); + audioEncoder = MakeAndAddRef<OpusTrackEncoder>(aTrackRate); + } else if (codec.EqualsLiteral("vp8") || codec.EqualsLiteral("vp8.0")) { + MOZ_ASSERT(!videoEncoder); + if (Preferences::GetBool("media.recorder.video.frame_drops", true)) { + videoEncoder = MakeAndAddRef<VP8TrackEncoder>( + driftCompensator, aTrackRate, FrameDroppingMode::ALLOW); + } else { + videoEncoder = MakeAndAddRef<VP8TrackEncoder>( + driftCompensator, aTrackRate, FrameDroppingMode::DISALLOW); + } + } else { + MOZ_CRASH("Unknown codec"); + } + } + + if (mimeType->Type() == MEDIAMIMETYPE(VIDEO_WEBM) || + mimeType->Type() == MEDIAMIMETYPE(AUDIO_WEBM)) { +#ifdef MOZ_WEBM_ENCODER + MOZ_ASSERT_IF(mimeType->Type() == MEDIAMIMETYPE(AUDIO_WEBM), !videoEncoder); + writer = MakeUnique<WebMWriter>(); +#else + MOZ_CRASH("Webm cannot be selected if not supported"); +#endif // MOZ_WEBM_ENCODER + } else if (mimeType->Type() == MEDIAMIMETYPE(AUDIO_OGG)) { + MOZ_ASSERT(audioEncoder); + MOZ_ASSERT(!videoEncoder); + writer = MakeUnique<OggWriter>(); + } + NS_ENSURE_TRUE(writer, nullptr); + + LOG(LogLevel::Info, + ("Create encoder result:a[%p](%u bps) v[%p](%u bps) w[%p] mimeType = " + "%s.", + audioEncoder.get(), aAudioBitrate, videoEncoder.get(), aVideoBitrate, + writer.get(), NS_ConvertUTF16toUTF8(aMIMEType).get())); + + if (audioEncoder) { + audioEncoder->SetWorkerThread(aEncoderThread); + if (aAudioBitrate != 0) { + audioEncoder->SetBitrate(aAudioBitrate); + } + } + if (videoEncoder) { + videoEncoder->SetWorkerThread(aEncoderThread); + if (aVideoBitrate != 0) { + videoEncoder->SetBitrate(aVideoBitrate); + } + } + return MakeAndAddRef<MediaEncoder>( + aEncoderThread, std::move(driftCompensator), std::move(writer), + audioEncoder, videoEncoder, aTrackRate, aMIMEType); +} + +nsresult MediaEncoder::GetEncodedData( + nsTArray<nsTArray<uint8_t>>* aOutputBufs) { + AUTO_PROFILER_LABEL("MediaEncoder::GetEncodedData", OTHER); + + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + MOZ_ASSERT(mInitialized); + MOZ_ASSERT_IF(mAudioEncoder, mAudioEncoder->IsInitialized()); + MOZ_ASSERT_IF(mVideoEncoder, mVideoEncoder->IsInitialized()); + + nsresult rv; + LOG(LogLevel::Verbose, + ("GetEncodedData TimeStamp = %f", GetEncodeTimeStamp())); + + if (mMuxer->NeedsMetadata()) { + nsTArray<RefPtr<TrackMetadataBase>> meta; + if (mAudioEncoder && !*meta.AppendElement(mAudioEncoder->GetMetadata())) { + LOG(LogLevel::Error, ("Audio metadata is null")); + SetError(); + return NS_ERROR_ABORT; + } + if (mVideoEncoder && !*meta.AppendElement(mVideoEncoder->GetMetadata())) { + LOG(LogLevel::Error, ("Video metadata is null")); + SetError(); + return NS_ERROR_ABORT; + } + + rv = mMuxer->SetMetadata(meta); + if (NS_FAILED(rv)) { + LOG(LogLevel::Error, ("SetMetadata failed")); + SetError(); + return rv; + } + } + + // First, feed encoded data from encoders to muxer. + + if (mVideoEncoder && !mVideoEncoder->IsEncodingComplete()) { + nsTArray<RefPtr<EncodedFrame>> videoFrames; + rv = mVideoEncoder->GetEncodedTrack(videoFrames); + if (NS_FAILED(rv)) { + // Encoding might be canceled. + LOG(LogLevel::Error, ("Failed to get encoded data from video encoder.")); + return rv; + } + for (const RefPtr<EncodedFrame>& frame : videoFrames) { + mMuxer->AddEncodedVideoFrame(frame); + } + if (mVideoEncoder->IsEncodingComplete()) { + mMuxer->VideoEndOfStream(); + } + } + + if (mAudioEncoder && !mAudioEncoder->IsEncodingComplete()) { + nsTArray<RefPtr<EncodedFrame>> audioFrames; + rv = mAudioEncoder->GetEncodedTrack(audioFrames); + if (NS_FAILED(rv)) { + // Encoding might be canceled. + LOG(LogLevel::Error, ("Failed to get encoded data from audio encoder.")); + return rv; + } + for (const RefPtr<EncodedFrame>& frame : audioFrames) { + mMuxer->AddEncodedAudioFrame(frame); + } + if (mAudioEncoder->IsEncodingComplete()) { + mMuxer->AudioEndOfStream(); + } + } + + // Second, get data from muxer. This will do the actual muxing. + + rv = mMuxer->GetData(aOutputBufs); + if (mMuxer->IsFinished()) { + mCompleted = true; + Shutdown(); + } + + LOG(LogLevel::Verbose, + ("END GetEncodedData TimeStamp=%f " + "mCompleted=%d, aComplete=%d, vComplete=%d", + GetEncodeTimeStamp(), mCompleted, + !mAudioEncoder || mAudioEncoder->IsEncodingComplete(), + !mVideoEncoder || mVideoEncoder->IsEncodingComplete())); + + return rv; +} + +RefPtr<GenericNonExclusivePromise::AllPromiseType> MediaEncoder::Shutdown() { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + if (mShutdownPromise) { + return mShutdownPromise; + } + + LOG(LogLevel::Info, ("MediaEncoder is shutting down.")); + if (mAudioEncoder) { + mAudioEncoder->UnregisterListener(mEncoderListener); + } + if (mVideoEncoder) { + mVideoEncoder->UnregisterListener(mEncoderListener); + } + mEncoderListener->Forget(); + + for (auto& l : mListeners.Clone()) { + // We dispatch here since this method is typically called from + // a DataAvailable() handler. + nsresult rv = mEncoderThread->Dispatch( + NewRunnableMethod("mozilla::MediaEncoderListener::Shutdown", l, + &MediaEncoderListener::Shutdown)); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + } + + AutoTArray<RefPtr<GenericNonExclusivePromise>, 2> shutdownPromises; + if (mAudioListener) { + shutdownPromises.AppendElement(mAudioListener->OnShutdown()); + } + if (mVideoListener) { + shutdownPromises.AppendElement(mVideoListener->OnShutdown()); + } + + return mShutdownPromise = + GenericNonExclusivePromise::All(mEncoderThread, shutdownPromises); +} + +RefPtr<GenericNonExclusivePromise::AllPromiseType> MediaEncoder::Cancel() { + MOZ_ASSERT(NS_IsMainThread()); + + Stop(); + + return InvokeAsync(mEncoderThread, __func__, + [self = RefPtr<MediaEncoder>(this), this]() { + if (mAudioEncoder) { + mAudioEncoder->Cancel(); + } + if (mVideoEncoder) { + mVideoEncoder->Cancel(); + } + return Shutdown(); + }); +} + +bool MediaEncoder::HasError() { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + return mError; +} + +void MediaEncoder::SetError() { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + + if (mError) { + return; + } + + mError = true; + for (auto& l : mListeners.Clone()) { + l->Error(); + } +} + +void MediaEncoder::Stop() { + MOZ_ASSERT(NS_IsMainThread()); + + if (mAudioNode) { + mAudioNode->GetTrack()->RemoveListener(mAudioListener); + if (mInputPort) { + mInputPort->Destroy(); + mInputPort = nullptr; + } + if (mPipeTrack) { + mPipeTrack->RemoveListener(mAudioListener); + mPipeTrack->Destroy(); + mPipeTrack = nullptr; + } + mAudioNode = nullptr; + } + + if (mAudioTrack) { + RemoveMediaStreamTrack(mAudioTrack); + } + + if (mVideoTrack) { + RemoveMediaStreamTrack(mVideoTrack); + } +} + +bool MediaEncoder::IsWebMEncoderEnabled() { +#ifdef MOZ_WEBM_ENCODER + return StaticPrefs::media_encoder_webm_enabled(); +#else + return false; +#endif +} + +const nsString& MediaEncoder::MimeType() const { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + return mMIMEType; +} + +void MediaEncoder::NotifyInitialized() { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + + if (mInitialized) { + // This could happen if an encoder re-inits due to a resolution change. + return; + } + + if (mAudioEncoder && !mAudioEncoder->IsInitialized()) { + return; + } + + if (mVideoEncoder && !mVideoEncoder->IsInitialized()) { + return; + } + + mInitialized = true; + + for (auto& l : mListeners.Clone()) { + l->Initialized(); + } +} + +void MediaEncoder::NotifyDataAvailable() { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + + if (!mInitialized) { + return; + } + + for (auto& l : mListeners.Clone()) { + l->DataAvailable(); + } +} + +void MediaEncoder::RegisterListener(MediaEncoderListener* aListener) { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + MOZ_ASSERT(!mListeners.Contains(aListener)); + mListeners.AppendElement(aListener); +} + +bool MediaEncoder::UnregisterListener(MediaEncoderListener* aListener) { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + return mListeners.RemoveElement(aListener); +} + +/* + * SizeOfExcludingThis measures memory being used by the Media Encoder. + * Currently it measures the size of the Encoder buffer and memory occupied + * by mAudioEncoder and mVideoEncoder. + */ +size_t MediaEncoder::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + + size_t size = 0; + if (mAudioEncoder) { + size += mAudioEncoder->SizeOfExcludingThis(aMallocSizeOf); + } + if (mVideoEncoder) { + size += mVideoEncoder->SizeOfExcludingThis(aMallocSizeOf); + } + return size; +} + +void MediaEncoder::SetVideoKeyFrameInterval(uint32_t aVideoKeyFrameInterval) { + if (!mVideoEncoder) { + return; + } + + MOZ_ASSERT(mEncoderThread); + nsresult rv = mEncoderThread->Dispatch(NewRunnableMethod<uint32_t>( + "mozilla::VideoTrackEncoder::SetKeyFrameInterval", mVideoEncoder, + &VideoTrackEncoder::SetKeyFrameInterval, aVideoKeyFrameInterval)); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; +} + +} // namespace mozilla + +#undef LOG diff --git a/dom/media/encoder/MediaEncoder.h b/dom/media/encoder/MediaEncoder.h new file mode 100644 index 0000000000..1ac3c0f449 --- /dev/null +++ b/dom/media/encoder/MediaEncoder.h @@ -0,0 +1,297 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MediaEncoder_h_ +#define MediaEncoder_h_ + +#include "ContainerWriter.h" +#include "CubebUtils.h" +#include "MediaQueue.h" +#include "MediaTrackGraph.h" +#include "MediaTrackListener.h" +#include "mozilla/DebugOnly.h" +#include "mozilla/MemoryReporting.h" +#include "mozilla/UniquePtr.h" +#include "nsIMemoryReporter.h" +#include "TrackEncoder.h" + +namespace mozilla { + +class DriftCompensator; +class Muxer; +class Runnable; +class TaskQueue; + +namespace dom { +class AudioNode; +class AudioStreamTrack; +class MediaStreamTrack; +class VideoStreamTrack; +} // namespace dom + +class DriftCompensator; +class MediaEncoder; + +class MediaEncoderListener { + public: + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MediaEncoderListener) + virtual void Initialized() = 0; + virtual void DataAvailable() = 0; + virtual void Error() = 0; + virtual void Shutdown() = 0; + + protected: + virtual ~MediaEncoderListener() = default; +}; + +/** + * MediaEncoder is the framework of encoding module, it controls and manages + * procedures between ContainerWriter and TrackEncoder. ContainerWriter packs + * the encoded track data with a specific container (e.g. ogg, webm). + * AudioTrackEncoder and VideoTrackEncoder are subclasses of TrackEncoder, and + * are responsible for encoding raw data coming from MediaTrackGraph. + * + * MediaEncoder solves threading issues by doing message passing to a TaskQueue + * (the "encoder thread") as passed in to the constructor. Each + * MediaStreamTrack to be recorded is set up with a MediaTrackListener. + * Typically there are a non-direct track listeners for audio, direct listeners + * for video, and there is always a non-direct listener on each track for + * time-keeping. The listeners forward data to their corresponding TrackEncoders + * on the encoder thread. + * + * The MediaEncoder listens to events from all TrackEncoders, and in turn + * signals events to interested parties. Typically a MediaRecorder::Session. + * The event that there's data available in the TrackEncoders is what typically + * drives the extraction and muxing of data. + * + * MediaEncoder is designed to be a passive component, neither does it own or is + * in charge of managing threads. Instead this is done by its owner. + * + * For example, usage from MediaRecorder of this component would be: + * 1) Create an encoder with a valid MIME type. + * => encoder = MediaEncoder::CreateEncoder(aMIMEType); + * It then creates a ContainerWriter according to the MIME type + * + * 2) Connect a MediaEncoderListener to be notified when the MediaEncoder has + * been initialized and when there's data available. + * => encoder->RegisterListener(listener); + * + * 3) Connect the sources to be recorded. Either through: + * => encoder->ConnectAudioNode(node); + * or + * => encoder->ConnectMediaStreamTrack(track); + * These should not be mixed. When connecting MediaStreamTracks there is + * support for at most one of each kind. + * + * 4) When the MediaEncoderListener is notified that the MediaEncoder has + * data available, we can encode data. This also encodes metadata on its + * first invocation. + * => encoder->GetEncodedData(...); + * + * 5) To stop encoding, there are multiple options: + * + * 5.1) Stop() for a graceful stop. + * => encoder->Stop(); + * + * 5.2) Cancel() for an immediate stop, if you don't need the data currently + * buffered. + * => encoder->Cancel(); + * + * 5.3) When all input tracks end, the MediaEncoder will automatically stop + * and shut down. + */ +class MediaEncoder { + private: + class AudioTrackListener; + class VideoTrackListener; + class EncoderListener; + + public: + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MediaEncoder) + + MediaEncoder(TaskQueue* aEncoderThread, + RefPtr<DriftCompensator> aDriftCompensator, + UniquePtr<ContainerWriter> aWriter, + AudioTrackEncoder* aAudioEncoder, + VideoTrackEncoder* aVideoEncoder, TrackRate aTrackRate, + const nsAString& aMIMEType); + + /** + * Called on main thread from MediaRecorder::Pause. + */ + void Suspend(); + + /** + * Called on main thread from MediaRecorder::Resume. + */ + void Resume(); + + /** + * Stops the current encoding, and disconnects the input tracks. + */ + void Stop(); + + /** + * Connects an AudioNode with the appropriate encoder. + */ + void ConnectAudioNode(dom::AudioNode* aNode, uint32_t aOutput); + + /** + * Connects a MediaStreamTrack with the appropriate encoder. + */ + void ConnectMediaStreamTrack(dom::MediaStreamTrack* aTrack); + + /** + * Removes a connected MediaStreamTrack. + */ + void RemoveMediaStreamTrack(dom::MediaStreamTrack* aTrack); + + /** + * Creates an encoder with a given MIME type. Returns null if we are unable + * to create the encoder. For now, default aMIMEType to "audio/ogg" and use + * Ogg+Opus if it is empty. + */ + static already_AddRefed<MediaEncoder> CreateEncoder( + TaskQueue* aEncoderThread, const nsAString& aMIMEType, + uint32_t aAudioBitrate, uint32_t aVideoBitrate, uint8_t aTrackTypes, + TrackRate aTrackRate); + + /** + * Encodes raw data for all tracks to aOutputBufs. The buffer of container + * data is allocated in ContainerWriter::GetContainerData(). + * + * On its first call, metadata is also encoded. TrackEncoders must have been + * initialized before this is called. + */ + nsresult GetEncodedData(nsTArray<nsTArray<uint8_t>>* aOutputBufs); + + /** + * Asserts that Shutdown() has been called. Reasons are encoding + * complete, encounter an error, or being canceled by its caller. + */ + void AssertShutdownCalled() { MOZ_ASSERT(mShutdownPromise); } + + /** + * Cancels the encoding and shuts down the encoder using Shutdown(). + */ + RefPtr<GenericNonExclusivePromise::AllPromiseType> Cancel(); + + bool HasError(); + + static bool IsWebMEncoderEnabled(); + + const nsString& MimeType() const; + + /** + * Notifies listeners that this MediaEncoder has been initialized. + */ + void NotifyInitialized(); + + /** + * Notifies listeners that this MediaEncoder has data available in some + * TrackEncoders. + */ + void NotifyDataAvailable(); + + /** + * Registers a listener to events from this MediaEncoder. + * We hold a strong reference to the listener. + */ + void RegisterListener(MediaEncoderListener* aListener); + + /** + * Unregisters a listener from events from this MediaEncoder. + * The listener will stop receiving events synchronously. + */ + bool UnregisterListener(MediaEncoderListener* aListener); + + MOZ_DEFINE_MALLOC_SIZE_OF(MallocSizeOf) + /* + * Measure the size of the buffer, and heap memory in bytes occupied by + * mAudioEncoder and mVideoEncoder. + */ + size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf); + + /** + * Set desired video keyframe interval defined in milliseconds. + */ + void SetVideoKeyFrameInterval(uint32_t aVideoKeyFrameInterval); + + protected: + ~MediaEncoder(); + + private: + /** + * Sets mGraphTrack if not already set, using a new stream from aTrack's + * graph. + */ + void EnsureGraphTrackFrom(MediaTrack* aTrack); + + /** + * Takes a regular runnable and dispatches it to the graph wrapped in a + * ControlMessage. + */ + void RunOnGraph(already_AddRefed<Runnable> aRunnable); + + /** + * Shuts down the MediaEncoder and cleans up track encoders. + * Listeners will be notified of the shutdown unless we were Cancel()ed first. + */ + RefPtr<GenericNonExclusivePromise::AllPromiseType> Shutdown(); + + /** + * Sets mError to true, notifies listeners of the error if mError changed, + * and stops encoding. + */ + void SetError(); + + const RefPtr<TaskQueue> mEncoderThread; + const RefPtr<DriftCompensator> mDriftCompensator; + + UniquePtr<Muxer> mMuxer; + RefPtr<AudioTrackEncoder> mAudioEncoder; + RefPtr<AudioTrackListener> mAudioListener; + RefPtr<VideoTrackEncoder> mVideoEncoder; + RefPtr<VideoTrackListener> mVideoListener; + RefPtr<EncoderListener> mEncoderListener; + nsTArray<RefPtr<MediaEncoderListener>> mListeners; + + // The AudioNode we are encoding. + // Will be null when input is media stream or destination node. + RefPtr<dom::AudioNode> mAudioNode; + // Pipe-track for allowing a track listener on a non-destination AudioNode. + // Will be null when input is media stream or destination node. + RefPtr<AudioNodeTrack> mPipeTrack; + // Input port that connect mAudioNode to mPipeTrack. + // Will be null when input is media stream or destination node. + RefPtr<MediaInputPort> mInputPort; + // An audio track that we are encoding. Will be null if the input stream + // doesn't contain audio on start() or if the input is an AudioNode. + RefPtr<dom::AudioStreamTrack> mAudioTrack; + // A video track that we are encoding. Will be null if the input stream + // doesn't contain video on start() or if the input is an AudioNode. + RefPtr<dom::VideoStreamTrack> mVideoTrack; + + // A stream to keep the MediaTrackGraph alive while we're recording. + RefPtr<SharedDummyTrack> mGraphTrack; + + TimeStamp mStartTime; + const nsString mMIMEType; + bool mInitialized; + bool mCompleted; + bool mError; + // Set when shutdown starts. + RefPtr<GenericNonExclusivePromise::AllPromiseType> mShutdownPromise; + // Get duration from create encoder, for logging purpose + double GetEncodeTimeStamp() { + TimeDuration decodeTime; + decodeTime = TimeStamp::Now() - mStartTime; + return decodeTime.ToMilliseconds(); + } +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/encoder/Muxer.cpp b/dom/media/encoder/Muxer.cpp new file mode 100644 index 0000000000..d434e2a4fd --- /dev/null +++ b/dom/media/encoder/Muxer.cpp @@ -0,0 +1,218 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "Muxer.h" + +#include "ContainerWriter.h" + +namespace mozilla { + +LazyLogModule gMuxerLog("Muxer"); +#define LOG(type, ...) MOZ_LOG(gMuxerLog, type, (__VA_ARGS__)) + +Muxer::Muxer(UniquePtr<ContainerWriter> aWriter) + : mWriter(std::move(aWriter)) {} + +bool Muxer::IsFinished() { return mWriter->IsWritingComplete(); } + +nsresult Muxer::SetMetadata( + const nsTArray<RefPtr<TrackMetadataBase>>& aMetadata) { + MOZ_DIAGNOSTIC_ASSERT(!mMetadataSet); + MOZ_DIAGNOSTIC_ASSERT(!mHasAudio); + MOZ_DIAGNOSTIC_ASSERT(!mHasVideo); + nsresult rv = mWriter->SetMetadata(aMetadata); + if (NS_FAILED(rv)) { + LOG(LogLevel::Error, "%p Setting metadata failed, tracks=%zu", this, + aMetadata.Length()); + return rv; + } + + for (const auto& track : aMetadata) { + switch (track->GetKind()) { + case TrackMetadataBase::METADATA_OPUS: + case TrackMetadataBase::METADATA_VORBIS: + case TrackMetadataBase::METADATA_AAC: + case TrackMetadataBase::METADATA_AMR: + case TrackMetadataBase::METADATA_EVRC: + MOZ_ASSERT(!mHasAudio, "Only one audio track supported"); + mHasAudio = true; + break; + case TrackMetadataBase::METADATA_VP8: + MOZ_ASSERT(!mHasVideo, "Only one video track supported"); + mHasVideo = true; + break; + default: + MOZ_CRASH("Unknown codec metadata"); + }; + } + mMetadataSet = true; + MOZ_ASSERT(mHasAudio || mHasVideo); + if (!mHasAudio) { + mEncodedAudioFrames.Finish(); + MOZ_ASSERT(mEncodedAudioFrames.AtEndOfStream()); + } + if (!mHasVideo) { + mEncodedVideoFrames.Finish(); + MOZ_ASSERT(mEncodedVideoFrames.AtEndOfStream()); + } + LOG(LogLevel::Info, "%p Metadata set; audio=%d, video=%d", this, mHasAudio, + mHasVideo); + return rv; +} + +void Muxer::AddEncodedAudioFrame(EncodedFrame* aFrame) { + MOZ_ASSERT(mMetadataSet); + MOZ_ASSERT(mHasAudio); + mEncodedAudioFrames.Push(aFrame); + LOG(LogLevel::Verbose, + "%p Added audio frame of type %u, [start %.2f, end %.2f)", this, + aFrame->mFrameType, aFrame->mTime.ToSeconds(), + aFrame->GetEndTime().ToSeconds()); +} + +void Muxer::AddEncodedVideoFrame(EncodedFrame* aFrame) { + MOZ_ASSERT(mMetadataSet); + MOZ_ASSERT(mHasVideo); + mEncodedVideoFrames.Push(aFrame); + LOG(LogLevel::Verbose, + "%p Added audio frame of type %u, [start %.2f, end %.2f)", this, + aFrame->mFrameType, aFrame->mTime.ToSeconds(), + aFrame->GetEndTime().ToSeconds()); +} + +void Muxer::AudioEndOfStream() { + MOZ_ASSERT(mMetadataSet); + MOZ_ASSERT(mHasAudio); + LOG(LogLevel::Info, "%p Reached audio EOS", this); + mEncodedAudioFrames.Finish(); +} + +void Muxer::VideoEndOfStream() { + MOZ_ASSERT(mMetadataSet); + MOZ_ASSERT(mHasVideo); + LOG(LogLevel::Info, "%p Reached video EOS", this); + mEncodedVideoFrames.Finish(); +} + +nsresult Muxer::GetData(nsTArray<nsTArray<uint8_t>>* aOutputBuffers) { + MOZ_ASSERT(mMetadataSet); + MOZ_ASSERT(mHasAudio || mHasVideo); + + nsresult rv; + if (!mMetadataEncoded) { + rv = mWriter->GetContainerData(aOutputBuffers, ContainerWriter::GET_HEADER); + if (NS_FAILED(rv)) { + LOG(LogLevel::Error, "%p Failed getting metadata from writer", this); + return rv; + } + mMetadataEncoded = true; + } + + if (mEncodedAudioFrames.GetSize() == 0 && !mEncodedAudioFrames.IsFinished() && + mEncodedVideoFrames.GetSize() == 0 && !mEncodedVideoFrames.IsFinished()) { + // Nothing to mux. + return NS_OK; + } + + rv = Mux(); + if (NS_FAILED(rv)) { + LOG(LogLevel::Error, "%p Failed muxing data into writer", this); + return rv; + } + + MOZ_ASSERT_IF( + mEncodedAudioFrames.IsFinished() && mEncodedVideoFrames.IsFinished(), + mEncodedAudioFrames.AtEndOfStream()); + MOZ_ASSERT_IF( + mEncodedAudioFrames.IsFinished() && mEncodedVideoFrames.IsFinished(), + mEncodedVideoFrames.AtEndOfStream()); + uint32_t flags = + mEncodedAudioFrames.AtEndOfStream() && mEncodedVideoFrames.AtEndOfStream() + ? ContainerWriter::FLUSH_NEEDED + : 0; + + if (mEncodedAudioFrames.AtEndOfStream() && + mEncodedVideoFrames.AtEndOfStream()) { + LOG(LogLevel::Info, "%p All data written", this); + } + + return mWriter->GetContainerData(aOutputBuffers, flags); +} + +nsresult Muxer::Mux() { + MOZ_ASSERT(mMetadataSet); + MOZ_ASSERT(mHasAudio || mHasVideo); + + nsTArray<RefPtr<EncodedFrame>> frames; + // The times at which we expect our next video and audio frames. These are + // based on the time + duration (GetEndTime()) of the last seen frames. + // Assumes that the encoders write the correct duration for frames.; + media::TimeUnit expectedNextVideoTime; + media::TimeUnit expectedNextAudioTime; + // Interleave frames until we're out of audio or video + while (mEncodedVideoFrames.GetSize() > 0 && + mEncodedAudioFrames.GetSize() > 0) { + RefPtr<EncodedFrame> videoFrame = mEncodedVideoFrames.PeekFront(); + RefPtr<EncodedFrame> audioFrame = mEncodedAudioFrames.PeekFront(); + // For any expected time our frames should occur at or after that time. + MOZ_ASSERT(videoFrame->mTime >= expectedNextVideoTime); + MOZ_ASSERT(audioFrame->mTime >= expectedNextAudioTime); + if (videoFrame->mTime <= audioFrame->mTime) { + expectedNextVideoTime = videoFrame->GetEndTime(); + RefPtr<EncodedFrame> frame = mEncodedVideoFrames.PopFront(); + frames.AppendElement(frame); + } else { + expectedNextAudioTime = audioFrame->GetEndTime(); + RefPtr<EncodedFrame> frame = mEncodedAudioFrames.PopFront(); + frames.AppendElement(frame); + } + } + + // If we're out of audio we still may be able to add more video... + if (mEncodedAudioFrames.GetSize() == 0) { + while (mEncodedVideoFrames.GetSize() > 0) { + if (!mEncodedAudioFrames.AtEndOfStream() && + mEncodedVideoFrames.PeekFront()->mTime > expectedNextAudioTime) { + // Audio encoding is not complete and since the video frame comes + // after our next audio frame we cannot safely add it. + break; + } + frames.AppendElement(mEncodedVideoFrames.PopFront()); + } + } + + // If we're out of video we still may be able to add more audio... + if (mEncodedVideoFrames.GetSize() == 0) { + while (mEncodedAudioFrames.GetSize() > 0) { + if (!mEncodedVideoFrames.AtEndOfStream() && + mEncodedAudioFrames.PeekFront()->mTime > expectedNextVideoTime) { + // Video encoding is not complete and since the audio frame comes + // after our next video frame we cannot safely add it. + break; + } + frames.AppendElement(mEncodedAudioFrames.PopFront()); + } + } + + LOG(LogLevel::Debug, + "%p Muxed data, remaining-audio=%zu, remaining-video=%zu", this, + mEncodedAudioFrames.GetSize(), mEncodedVideoFrames.GetSize()); + + // If encoding is complete for both encoders we should signal end of stream, + // otherwise we keep going. + uint32_t flags = + mEncodedVideoFrames.AtEndOfStream() && mEncodedAudioFrames.AtEndOfStream() + ? ContainerWriter::END_OF_STREAM + : 0; + nsresult rv = mWriter->WriteEncodedTrack(frames, flags); + if (NS_FAILED(rv)) { + LOG(LogLevel::Error, "Error! Failed to write muxed data to the container"); + } + return rv; +} + +} // namespace mozilla + +#undef LOG diff --git a/dom/media/encoder/Muxer.h b/dom/media/encoder/Muxer.h new file mode 100644 index 0000000000..ae1a2bb348 --- /dev/null +++ b/dom/media/encoder/Muxer.h @@ -0,0 +1,73 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef DOM_MEDIA_ENCODER_MUXER_H_ +#define DOM_MEDIA_ENCODER_MUXER_H_ + +#include "MediaQueue.h" + +namespace mozilla { + +class ContainerWriter; +class EncodedFrame; +class TrackMetadataBase; + +// Generic Muxer class that helps pace the output from track encoders to the +// ContainerWriter, so time never appears to go backwards. +// Note that the entire class is written for single threaded access. +class Muxer { + public: + explicit Muxer(UniquePtr<ContainerWriter> aWriter); + ~Muxer() = default; + + // Returns true when all tracks have ended, and all data has been muxed and + // fetched. + bool IsFinished(); + + // Returns true if this muxer has not been given metadata yet. + bool NeedsMetadata() const { return !mMetadataSet; } + + // Sets metadata for all tracks. This may only be called once. + nsresult SetMetadata(const nsTArray<RefPtr<TrackMetadataBase>>& aMetadata); + + // Adds an encoded audio frame for muxing + void AddEncodedAudioFrame(EncodedFrame* aFrame); + + // Adds an encoded video frame for muxing + void AddEncodedVideoFrame(EncodedFrame* aFrame); + + // Marks the audio track as ended. Once all tracks for which we have metadata + // have ended, GetData() will drain and the muxer will be marked as finished. + void AudioEndOfStream(); + + // Marks the video track as ended. Once all tracks for which we have metadata + // have ended, GetData() will drain and the muxer will be marked as finished. + void VideoEndOfStream(); + + // Gets the data that has been muxed and written into the container so far. + nsresult GetData(nsTArray<nsTArray<uint8_t>>* aOutputBuffers); + + private: + // Writes data in MediaQueues to the ContainerWriter. + nsresult Mux(); + + // Audio frames that have been encoded and are pending write to the muxer. + MediaQueue<EncodedFrame> mEncodedAudioFrames; + // Video frames that have been encoded and are pending write to the muxer. + MediaQueue<EncodedFrame> mEncodedVideoFrames; + // The writer for the specific container we're recording into. + UniquePtr<ContainerWriter> mWriter; + // True once metadata has been set in the muxer. + bool mMetadataSet = false; + // True once metadata has been written to file. + bool mMetadataEncoded = false; + // True if metadata is set and contains an audio track. + bool mHasAudio = false; + // True if metadata is set and contains a video track. + bool mHasVideo = false; +}; +} // namespace mozilla + +#endif diff --git a/dom/media/encoder/OpusTrackEncoder.cpp b/dom/media/encoder/OpusTrackEncoder.cpp new file mode 100644 index 0000000000..f637ae51d3 --- /dev/null +++ b/dom/media/encoder/OpusTrackEncoder.cpp @@ -0,0 +1,454 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "OpusTrackEncoder.h" +#include "nsString.h" +#include "GeckoProfiler.h" +#include "mozilla/CheckedInt.h" +#include "VideoUtils.h" + +#include <opus/opus.h> + +#define LOG(args, ...) + +namespace mozilla { + +// The Opus format supports up to 8 channels, and supports multitrack audio up +// to 255 channels, but the current implementation supports only mono and +// stereo, and downmixes any more than that. +static const int MAX_SUPPORTED_AUDIO_CHANNELS = 8; + +// http://www.opus-codec.org/docs/html_api-1.0.2/group__opus__encoder.html +// In section "opus_encoder_init", channels must be 1 or 2 of input signal. +static const int MAX_CHANNELS = 2; + +// A maximum data bytes for Opus to encode. +static const int MAX_DATA_BYTES = 4096; + +// http://tools.ietf.org/html/draft-ietf-codec-oggopus-00#section-4 +// Second paragraph, " The granule position of an audio data page is in units +// of PCM audio samples at a fixed rate of 48 kHz." +static const int kOpusSamplingRate = 48000; + +// The duration of an Opus frame, and it must be 2.5, 5, 10, 20, 40 or 60 ms. +static const int kFrameDurationMs = 20; + +// The supported sampling rate of input signal (Hz), +// must be one of the following. Will resampled to 48kHz otherwise. +static const int kOpusSupportedInputSamplingRates[] = {8000, 12000, 16000, + 24000, 48000}; + +namespace { + +// An endian-neutral serialization of integers. Serializing T in little endian +// format to aOutput, where T is a 16 bits or 32 bits integer. +template <typename T> +static void SerializeToBuffer(T aValue, nsTArray<uint8_t>* aOutput) { + for (uint32_t i = 0; i < sizeof(T); i++) { + aOutput->AppendElement((uint8_t)(0x000000ff & (aValue >> (i * 8)))); + } +} + +static inline void SerializeToBuffer(const nsCString& aComment, + nsTArray<uint8_t>* aOutput) { + // Format of serializing a string to buffer is, the length of string (32 bits, + // little endian), and the string. + SerializeToBuffer((uint32_t)(aComment.Length()), aOutput); + aOutput->AppendElements(aComment.get(), aComment.Length()); +} + +static void SerializeOpusIdHeader(uint8_t aChannelCount, uint16_t aPreskip, + uint32_t aInputSampleRate, + nsTArray<uint8_t>* aOutput) { + // The magic signature, null terminator has to be stripped off from strings. + static const uint8_t magic[] = "OpusHead"; + aOutput->AppendElements(magic, sizeof(magic) - 1); + + // The version must always be 1 (8 bits, unsigned). + aOutput->AppendElement(1); + + // Number of output channels (8 bits, unsigned). + aOutput->AppendElement(aChannelCount); + + // Number of samples (at 48 kHz) to discard from the decoder output when + // starting playback (16 bits, unsigned, little endian). + SerializeToBuffer(aPreskip, aOutput); + + // The sampling rate of input source (32 bits, unsigned, little endian). + SerializeToBuffer(aInputSampleRate, aOutput); + + // Output gain, an encoder should set this field to zero (16 bits, signed, + // little endian). + SerializeToBuffer((int16_t)0, aOutput); + + // Channel mapping family. Family 0 allows only 1 or 2 channels (8 bits, + // unsigned). + aOutput->AppendElement(0); +} + +static void SerializeOpusCommentHeader(const nsCString& aVendor, + const nsTArray<nsCString>& aComments, + nsTArray<uint8_t>* aOutput) { + // The magic signature, null terminator has to be stripped off. + static const uint8_t magic[] = "OpusTags"; + aOutput->AppendElements(magic, sizeof(magic) - 1); + + // The vendor; Should append in the following order: + // vendor string length (32 bits, unsigned, little endian) + // vendor string. + SerializeToBuffer(aVendor, aOutput); + + // Add comments; Should append in the following order: + // comment list length (32 bits, unsigned, little endian) + // comment #0 string length (32 bits, unsigned, little endian) + // comment #0 string + // comment #1 string length (32 bits, unsigned, little endian) + // comment #1 string ... + SerializeToBuffer((uint32_t)aComments.Length(), aOutput); + for (uint32_t i = 0; i < aComments.Length(); ++i) { + SerializeToBuffer(aComments[i], aOutput); + } +} + +bool IsSampleRateSupported(TrackRate aSampleRate) { + // According to www.opus-codec.org, creating an opus encoder requires the + // sampling rate of source signal be one of 8000, 12000, 16000, 24000, or + // 48000. If this constraint is not satisfied, we resample the input to 48kHz. + AutoTArray<int, 5> supportedSamplingRates; + supportedSamplingRates.AppendElements( + kOpusSupportedInputSamplingRates, + ArrayLength(kOpusSupportedInputSamplingRates)); + return supportedSamplingRates.Contains(aSampleRate); +} + +} // Anonymous namespace. + +OpusTrackEncoder::OpusTrackEncoder(TrackRate aTrackRate) + : AudioTrackEncoder(aTrackRate), + mOutputSampleRate(IsSampleRateSupported(aTrackRate) ? aTrackRate + : kOpusSamplingRate), + mEncoder(nullptr), + mLookahead(0), + mLookaheadWritten(0), + mResampler(nullptr), + mNumOutputFrames(0) {} + +OpusTrackEncoder::~OpusTrackEncoder() { + if (mEncoder) { + opus_encoder_destroy(mEncoder); + } + if (mResampler) { + speex_resampler_destroy(mResampler); + mResampler = nullptr; + } +} + +nsresult OpusTrackEncoder::Init(int aChannels) { + NS_ENSURE_TRUE((aChannels <= MAX_SUPPORTED_AUDIO_CHANNELS) && (aChannels > 0), + NS_ERROR_FAILURE); + + // This version of encoder API only support 1 or 2 channels, + // So set the mChannels less or equal 2 and + // let InterleaveTrackData downmix pcm data. + mChannels = aChannels > MAX_CHANNELS ? MAX_CHANNELS : aChannels; + + // Reject non-audio sample rates. + NS_ENSURE_TRUE(mTrackRate >= 8000, NS_ERROR_INVALID_ARG); + NS_ENSURE_TRUE(mTrackRate <= 192000, NS_ERROR_INVALID_ARG); + + if (NeedsResampler()) { + int error; + mResampler = speex_resampler_init(mChannels, mTrackRate, kOpusSamplingRate, + SPEEX_RESAMPLER_QUALITY_DEFAULT, &error); + + if (error != RESAMPLER_ERR_SUCCESS) { + return NS_ERROR_FAILURE; + } + } + + int error = 0; + mEncoder = opus_encoder_create(mOutputSampleRate, mChannels, + OPUS_APPLICATION_AUDIO, &error); + + if (error != OPUS_OK) { + return NS_ERROR_FAILURE; + } + + if (mAudioBitrate) { + error = opus_encoder_ctl(mEncoder, + OPUS_SET_BITRATE(static_cast<int>(mAudioBitrate))); + if (error != OPUS_OK) { + return NS_ERROR_FAILURE; + } + } + + // In the case of Opus we need to calculate the codec delay based on the + // pre-skip. For more information see: + // https://tools.ietf.org/html/rfc7845#section-4.2 + error = opus_encoder_ctl(mEncoder, OPUS_GET_LOOKAHEAD(&mLookahead)); + if (error != OPUS_OK) { + mLookahead = 0; + return NS_ERROR_FAILURE; + } + + SetInitialized(); + + return NS_OK; +} + +int OpusTrackEncoder::GetLookahead() const { + return mLookahead * kOpusSamplingRate / mOutputSampleRate; +} + +int OpusTrackEncoder::NumInputFramesPerPacket() const { + return mTrackRate * kFrameDurationMs / 1000; +} + +int OpusTrackEncoder::NumOutputFramesPerPacket() const { + return mOutputSampleRate * kFrameDurationMs / 1000; +} + +bool OpusTrackEncoder::NeedsResampler() const { + // A resampler is needed when mTrackRate is not supported by the opus encoder. + // This is equivalent to !IsSampleRateSupported(mTrackRate) but less cycles. + return mTrackRate != mOutputSampleRate && + mOutputSampleRate == kOpusSamplingRate; +} + +already_AddRefed<TrackMetadataBase> OpusTrackEncoder::GetMetadata() { + AUTO_PROFILER_LABEL("OpusTrackEncoder::GetMetadata", OTHER); + + MOZ_ASSERT(mInitialized || mCanceled); + + if (mCanceled || mEncodingComplete) { + return nullptr; + } + + if (!mInitialized) { + return nullptr; + } + + RefPtr<OpusMetadata> meta = new OpusMetadata(); + meta->mChannels = mChannels; + meta->mSamplingFrequency = mTrackRate; + + // Ogg and Webm timestamps are always sampled at 48k for Opus. + SerializeOpusIdHeader(mChannels, + mLookahead * (kOpusSamplingRate / mOutputSampleRate), + mTrackRate, &meta->mIdHeader); + + nsCString vendor; + vendor.AppendASCII(opus_get_version_string()); + + nsTArray<nsCString> comments; + comments.AppendElement( + nsLiteralCString("ENCODER=Mozilla" MOZ_APP_UA_VERSION)); + + SerializeOpusCommentHeader(vendor, comments, &meta->mCommentHeader); + + return meta.forget(); +} + +nsresult OpusTrackEncoder::GetEncodedTrack( + nsTArray<RefPtr<EncodedFrame>>& aData) { + AUTO_PROFILER_LABEL("OpusTrackEncoder::GetEncodedTrack", OTHER); + + MOZ_ASSERT(mInitialized || mCanceled); + + if (mCanceled || mEncodingComplete) { + return NS_ERROR_FAILURE; + } + + if (!mInitialized) { + // calculation below depends on the truth that mInitialized is true. + return NS_ERROR_FAILURE; + } + + TakeTrackData(mSourceSegment); + + int result = 0; + // Loop until we run out of packets of input data + while (result >= 0 && !mEncodingComplete) { + // re-sampled frames left last time which didn't fit into an Opus packet + // duration. + const int framesLeft = mResampledLeftover.Length() / mChannels; + MOZ_ASSERT(NumOutputFramesPerPacket() >= framesLeft); + // Fetch input frames such that there will be n frames where (n + + // framesLeft) >= NumOutputFramesPerPacket() after re-sampling. + const int framesToFetch = NumInputFramesPerPacket() - + (framesLeft * mTrackRate / kOpusSamplingRate) + + (NeedsResampler() ? 1 : 0); + + if (!mEndOfStream && mSourceSegment.GetDuration() < framesToFetch) { + // Not enough raw data + return NS_OK; + } + + // Start encoding data. + AutoTArray<AudioDataValue, 9600> pcm; + pcm.SetLength(NumOutputFramesPerPacket() * mChannels); + + int frameCopied = 0; + + for (AudioSegment::ChunkIterator iter(mSourceSegment); + !iter.IsEnded() && frameCopied < framesToFetch; iter.Next()) { + AudioChunk chunk = *iter; + + // Chunk to the required frame size. + TrackTime frameToCopy = + std::min(chunk.GetDuration(), + static_cast<TrackTime>(framesToFetch - frameCopied)); + + // Possible greatest value of framesToFetch = 3844: see + // https://bugzilla.mozilla.org/show_bug.cgi?id=1349421#c8. frameToCopy + // should not be able to exceed this value. + MOZ_ASSERT(frameToCopy <= 3844, "frameToCopy exceeded expected range"); + + if (!chunk.IsNull()) { + // Append the interleaved data to the end of pcm buffer. + AudioTrackEncoder::InterleaveTrackData( + chunk, frameToCopy, mChannels, + pcm.Elements() + frameCopied * mChannels); + } else { + CheckedInt<int> memsetLength = + CheckedInt<int>(frameToCopy) * mChannels * sizeof(AudioDataValue); + if (!memsetLength.isValid()) { + // This should never happen, but we use a defensive check because + // we really don't want a bad memset + MOZ_ASSERT_UNREACHABLE("memsetLength invalid!"); + return NS_ERROR_FAILURE; + } + memset(pcm.Elements() + frameCopied * mChannels, 0, + memsetLength.value()); + } + + frameCopied += frameToCopy; + } + + // Possible greatest value of framesToFetch = 3844: see + // https://bugzilla.mozilla.org/show_bug.cgi?id=1349421#c8. frameCopied + // should not be able to exceed this value. + MOZ_ASSERT(frameCopied <= 3844, "frameCopied exceeded expected range"); + + int framesInPCM = frameCopied; + if (mResampler) { + AutoTArray<AudioDataValue, 9600> resamplingDest; + uint32_t inframes = frameCopied; + uint32_t outframes = inframes * kOpusSamplingRate / mTrackRate + 1; + + // We want to consume all the input data, so we slightly oversize the + // resampled data buffer so we can fit the output data in. We cannot + // really predict the output frame count at each call. + resamplingDest.SetLength(outframes * mChannels); + +#if MOZ_SAMPLE_TYPE_S16 + short* in = reinterpret_cast<short*>(pcm.Elements()); + short* out = reinterpret_cast<short*>(resamplingDest.Elements()); + speex_resampler_process_interleaved_int(mResampler, in, &inframes, out, + &outframes); +#else + float* in = reinterpret_cast<float*>(pcm.Elements()); + float* out = reinterpret_cast<float*>(resamplingDest.Elements()); + speex_resampler_process_interleaved_float(mResampler, in, &inframes, out, + &outframes); +#endif + + MOZ_ASSERT(pcm.Length() >= mResampledLeftover.Length()); + PodCopy(pcm.Elements(), mResampledLeftover.Elements(), + mResampledLeftover.Length()); + + uint32_t outframesToCopy = std::min( + outframes, + static_cast<uint32_t>(NumOutputFramesPerPacket() - framesLeft)); + + MOZ_ASSERT(pcm.Length() - mResampledLeftover.Length() >= + outframesToCopy * mChannels); + PodCopy(pcm.Elements() + mResampledLeftover.Length(), + resamplingDest.Elements(), outframesToCopy * mChannels); + int frameLeftover = outframes - outframesToCopy; + mResampledLeftover.SetLength(frameLeftover * mChannels); + PodCopy(mResampledLeftover.Elements(), + resamplingDest.Elements() + outframesToCopy * mChannels, + mResampledLeftover.Length()); + // This is always at 48000Hz. + framesInPCM = framesLeft + outframesToCopy; + } + + // Remove the raw data which has been pulled to pcm buffer. + // The value of frameCopied should be equal to (or smaller than, if eos) + // NumOutputFramesPerPacket(). + mSourceSegment.RemoveLeading(frameCopied); + + // Has reached the end of input stream and all queued data has pulled for + // encoding. + if (mSourceSegment.GetDuration() == 0 && mEndOfStream && + framesInPCM < NumOutputFramesPerPacket()) { + // Pad |mLookahead| samples to the end of the track to prevent loss of + // original data. + const int toWrite = std::min(mLookahead - mLookaheadWritten, + NumOutputFramesPerPacket() - framesInPCM); + PodZero(pcm.Elements() + framesInPCM * mChannels, toWrite * mChannels); + mLookaheadWritten += toWrite; + framesInPCM += toWrite; + if (mLookaheadWritten == mLookahead) { + mEncodingComplete = true; + LOG("[Opus] Done encoding."); + } + } + + MOZ_ASSERT_IF(!mEncodingComplete, + framesInPCM == NumOutputFramesPerPacket()); + + // Append null data to pcm buffer if the leftover data is not enough for + // opus encoder. + if (framesInPCM < NumOutputFramesPerPacket() && mEncodingComplete) { + PodZero(pcm.Elements() + framesInPCM * mChannels, + (NumOutputFramesPerPacket() - framesInPCM) * mChannels); + } + auto frameData = MakeRefPtr<EncodedFrame::FrameData>(); + // Encode the data with Opus Encoder. + frameData->SetLength(MAX_DATA_BYTES); + // result is returned as opus error code if it is negative. + result = 0; +#ifdef MOZ_SAMPLE_TYPE_S16 + const opus_int16* pcmBuf = static_cast<opus_int16*>(pcm.Elements()); + result = opus_encode(mEncoder, pcmBuf, NumOutputFramesPerPacket(), + frameData->Elements(), MAX_DATA_BYTES); +#else + const float* pcmBuf = static_cast<float*>(pcm.Elements()); + result = opus_encode_float(mEncoder, pcmBuf, NumOutputFramesPerPacket(), + frameData->Elements(), MAX_DATA_BYTES); +#endif + frameData->SetLength(result >= 0 ? result : 0); + + if (result < 0) { + LOG("[Opus] Fail to encode data! Result: %s.", opus_strerror(result)); + } + if (mEncodingComplete) { + if (mResampler) { + speex_resampler_destroy(mResampler); + mResampler = nullptr; + } + mResampledLeftover.SetLength(0); + } + + // timestamp should be the time of the first sample + aData.AppendElement(MakeRefPtr<EncodedFrame>( + FramesToTimeUnit(mNumOutputFrames + mLookahead, mOutputSampleRate), + static_cast<uint64_t>(framesInPCM) * kOpusSamplingRate / + mOutputSampleRate, + kOpusSamplingRate, EncodedFrame::OPUS_AUDIO_FRAME, + std::move(frameData))); + + mNumOutputFrames += NumOutputFramesPerPacket(); + LOG("[Opus] mOutputTimeStamp %.3f.", + FramesToTimeUnit(mNumOutputFrames, mOutputSampleRate).ToSeconds()); + } + + return result >= 0 ? NS_OK : NS_ERROR_FAILURE; +} + +} // namespace mozilla + +#undef LOG diff --git a/dom/media/encoder/OpusTrackEncoder.h b/dom/media/encoder/OpusTrackEncoder.h new file mode 100644 index 0000000000..9d8f9c6277 --- /dev/null +++ b/dom/media/encoder/OpusTrackEncoder.h @@ -0,0 +1,121 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef OpusTrackEncoder_h_ +#define OpusTrackEncoder_h_ + +#include <stdint.h> +#include <speex/speex_resampler.h> +#include "TimeUnits.h" +#include "TrackEncoder.h" + +struct OpusEncoder; + +namespace mozilla { + +// Opus meta data structure +class OpusMetadata : public TrackMetadataBase { + public: + // The ID Header of OggOpus. refer to http://wiki.xiph.org/OggOpus. + nsTArray<uint8_t> mIdHeader; + // The Comment Header of OggOpus. + nsTArray<uint8_t> mCommentHeader; + int32_t mChannels; + float mSamplingFrequency; + MetadataKind GetKind() const override { return METADATA_OPUS; } +}; + +class OpusTrackEncoder : public AudioTrackEncoder { + public: + explicit OpusTrackEncoder(TrackRate aTrackRate); + virtual ~OpusTrackEncoder(); + + already_AddRefed<TrackMetadataBase> GetMetadata() override; + + nsresult GetEncodedTrack(nsTArray<RefPtr<EncodedFrame>>& aData) override; + + /** + * The encoder lookahead at 48k rate. + */ + int GetLookahead() const; + + protected: + /** + * The number of frames, in the input rate mTrackRate, needed to fill an + * encoded opus packet. A frame is a sample per channel. + */ + int NumInputFramesPerPacket() const override; + + nsresult Init(int aChannels) override; + + /** + * The number of frames, in the output rate (see GetOutputSampleRate), needed + * to fill an encoded opus packet. A frame is a sample per channel. + */ + int NumOutputFramesPerPacket() const; + + /** + * True if the input needs to be resampled to be fed to the underlying opus + * encoder. + */ + bool NeedsResampler() const; + + public: + /** + * Get the samplerate of the data to be fed to the Opus encoder. This might be + * different from the input samplerate if resampling occurs. + */ + const TrackRate mOutputSampleRate; + + private: + /** + * The Opus encoder from libopus. + */ + OpusEncoder* mEncoder; + + /** + * A local segment queue which takes the raw data out from mRawSegment in the + * call of GetEncodedTrack(). Opus encoder only accepts GetPacketDuration() + * samples from mSourceSegment every encoding cycle, thus it needs to be + * global in order to store the leftover segments taken from mRawSegment. + */ + AudioSegment mSourceSegment; + + /** + * Total samples of delay added by codec (in rate mOutputSampleRate), can + * be queried by the encoder. From the perspective of decoding, real data + * begins this many samples late, so the encoder needs to append this many + * null samples to the end of stream, in order to align the time of input and + * output. + */ + int mLookahead; + + /** + * Number of mLookahead samples that has been written. When non-zero and equal + * to mLookahead, encoding is complete. + */ + int mLookaheadWritten; + + /** + * If the input sample rate does not divide 48kHz evenly, the input data are + * resampled. + */ + SpeexResamplerState* mResampler; + + /** + * Store the resampled frames that don't fit into an Opus packet duration. + * They will be prepended to the resampled frames next encoding cycle. + */ + nsTArray<AudioDataValue> mResampledLeftover; + + /** + * Number of audio frames encoded, in kOpusSamplingRate. + */ + uint64_t mNumOutputFrames; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/encoder/TrackEncoder.cpp b/dom/media/encoder/TrackEncoder.cpp new file mode 100644 index 0000000000..bf02ba61c4 --- /dev/null +++ b/dom/media/encoder/TrackEncoder.cpp @@ -0,0 +1,764 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "TrackEncoder.h" + +#include "AudioChannelFormat.h" +#include "DriftCompensation.h" +#include "GeckoProfiler.h" +#include "MediaTrackGraph.h" +#include "MediaTrackListener.h" +#include "mozilla/AbstractThread.h" +#include "mozilla/Logging.h" +#include "VideoUtils.h" +#include "mozilla/Logging.h" +#include "mozilla/Telemetry.h" + +namespace mozilla { + +LazyLogModule gTrackEncoderLog("TrackEncoder"); +#define TRACK_LOG(type, msg) MOZ_LOG(gTrackEncoderLog, type, msg) + +static const int DEFAULT_CHANNELS = 1; +static const int DEFAULT_FRAME_WIDTH = 640; +static const int DEFAULT_FRAME_HEIGHT = 480; +// 10 second threshold if the audio encoder cannot be initialized. +static const int AUDIO_INIT_FAILED_DURATION = 10; +// 30 second threshold if the video encoder cannot be initialized. +static const int VIDEO_INIT_FAILED_DURATION = 30; +// A maximal key frame interval allowed to set. +// Longer values will be shorten to this value. +static const unsigned int DEFAULT_KEYFRAME_INTERVAL_MS = 1000; + +TrackEncoder::TrackEncoder(TrackRate aTrackRate) + : mEncodingComplete(false), + mInitialized(false), + mEndOfStream(false), + mCanceled(false), + mInitCounter(0), + mSuspended(false), + mTrackRate(aTrackRate) {} + +bool TrackEncoder::IsInitialized() { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + return mInitialized; +} + +bool TrackEncoder::IsEncodingComplete() { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + return mEncodingComplete; +} + +void TrackEncoder::SetInitialized() { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + + if (mInitialized) { + return; + } + + mInitialized = true; + + for (auto& l : mListeners.Clone()) { + l->Initialized(this); + } +} + +void TrackEncoder::OnDataAvailable() { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + + for (auto& l : mListeners.Clone()) { + l->DataAvailable(this); + } +} + +void TrackEncoder::OnError() { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + + Cancel(); + + for (auto& l : mListeners.Clone()) { + l->Error(this); + } +} + +void TrackEncoder::RegisterListener(TrackEncoderListener* aListener) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + MOZ_ASSERT(!mListeners.Contains(aListener)); + mListeners.AppendElement(aListener); +} + +bool TrackEncoder::UnregisterListener(TrackEncoderListener* aListener) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + return mListeners.RemoveElement(aListener); +} + +void TrackEncoder::SetWorkerThread(AbstractThread* aWorkerThread) { + mWorkerThread = aWorkerThread; +} + +void AudioTrackEncoder::Suspend() { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + TRACK_LOG(LogLevel::Info, ("[AudioTrackEncoder %p]: Suspend(), was %s", this, + mSuspended ? "suspended" : "live")); + + if (mSuspended) { + return; + } + + mSuspended = true; +} + +void AudioTrackEncoder::Resume() { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + TRACK_LOG(LogLevel::Info, ("[AudioTrackEncoder %p]: Resume(), was %s", this, + mSuspended ? "suspended" : "live")); + + if (!mSuspended) { + return; + } + + mSuspended = false; +} + +void AudioTrackEncoder::AppendAudioSegment(AudioSegment&& aSegment) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + AUTO_PROFILER_LABEL("AudioTrackEncoder::AppendAudioSegment", OTHER); + TRACK_LOG(LogLevel::Verbose, + ("[AudioTrackEncoder %p]: AppendAudioSegment() duration=%" PRIu64, + this, aSegment.GetDuration())); + + if (mCanceled) { + return; + } + + if (mEndOfStream) { + return; + } + + TryInit(mOutgoingBuffer, aSegment.GetDuration()); + + if (!mSuspended) { + mOutgoingBuffer.AppendFrom(&aSegment); + } + + if (mInitialized && + mOutgoingBuffer.GetDuration() >= NumInputFramesPerPacket()) { + OnDataAvailable(); + } +} + +void AudioTrackEncoder::TakeTrackData(AudioSegment& aSegment) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + + if (mCanceled) { + return; + } + + aSegment.AppendFrom(&mOutgoingBuffer); +} + +void AudioTrackEncoder::TryInit(const AudioSegment& aSegment, + TrackTime aDuration) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + + if (mInitialized) { + return; + } + + mInitCounter++; + TRACK_LOG(LogLevel::Debug, + ("[AudioTrackEncoder %p]: Inited the audio encoder %d times", this, + mInitCounter)); + + for (AudioSegment::ConstChunkIterator iter(aSegment); !iter.IsEnded(); + iter.Next()) { + // The number of channels is determined by the first non-null chunk, and + // thus the audio encoder is initialized at this time. + if (iter->IsNull()) { + continue; + } + + nsresult rv = Init(iter->mChannelData.Length()); + + if (NS_SUCCEEDED(rv)) { + TRACK_LOG(LogLevel::Info, + ("[AudioTrackEncoder %p]: Successfully initialized!", this)); + return; + } else { + TRACK_LOG( + LogLevel::Error, + ("[AudioTrackEncoder %p]: Failed to initialize the encoder!", this)); + OnError(); + return; + } + break; + } + + mNotInitDuration += aDuration; + if (!mInitialized && + ((mNotInitDuration - 1) / mTrackRate >= AUDIO_INIT_FAILED_DURATION) && + mInitCounter > 1) { + // Perform a best effort initialization since we haven't gotten any + // data yet. Motivated by issues like Bug 1336367 + TRACK_LOG(LogLevel::Warning, + ("[AudioTrackEncoder]: Initialize failed for %ds. Attempting to " + "init with %d (default) channels!", + AUDIO_INIT_FAILED_DURATION, DEFAULT_CHANNELS)); + nsresult rv = Init(DEFAULT_CHANNELS); + Telemetry::Accumulate( + Telemetry::MEDIA_RECORDER_TRACK_ENCODER_INIT_TIMEOUT_TYPE, 0); + if (NS_FAILED(rv)) { + TRACK_LOG(LogLevel::Error, + ("[AudioTrackEncoder %p]: Default-channel-init failed.", this)); + OnError(); + return; + } + } +} + +void AudioTrackEncoder::Cancel() { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + TRACK_LOG(LogLevel::Info, ("[AudioTrackEncoder %p]: Cancel()", this)); + mCanceled = true; + mOutgoingBuffer.Clear(); +} + +void AudioTrackEncoder::NotifyEndOfStream() { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + TRACK_LOG(LogLevel::Info, + ("[AudioTrackEncoder %p]: NotifyEndOfStream()", this)); + + if (!mCanceled && !mInitialized) { + // If source audio track is completely silent till the end of encoding, + // initialize the encoder with a default channel count. + Init(DEFAULT_CHANNELS); + } + + mEndOfStream = true; + + if (mInitialized && !mCanceled) { + OnDataAvailable(); + } +} + +/*static*/ +void AudioTrackEncoder::InterleaveTrackData(AudioChunk& aChunk, + int32_t aDuration, + uint32_t aOutputChannels, + AudioDataValue* aOutput) { + uint32_t numChannelsToCopy = std::min( + aOutputChannels, static_cast<uint32_t>(aChunk.mChannelData.Length())); + switch (aChunk.mBufferFormat) { + case AUDIO_FORMAT_S16: { + AutoTArray<const int16_t*, 2> array; + array.SetLength(numChannelsToCopy); + for (uint32_t i = 0; i < array.Length(); i++) { + array[i] = static_cast<const int16_t*>(aChunk.mChannelData[i]); + } + InterleaveTrackData(array, aDuration, aOutputChannels, aOutput, + aChunk.mVolume); + break; + } + case AUDIO_FORMAT_FLOAT32: { + AutoTArray<const float*, 2> array; + array.SetLength(numChannelsToCopy); + for (uint32_t i = 0; i < array.Length(); i++) { + array[i] = static_cast<const float*>(aChunk.mChannelData[i]); + } + InterleaveTrackData(array, aDuration, aOutputChannels, aOutput, + aChunk.mVolume); + break; + } + case AUDIO_FORMAT_SILENCE: { + MOZ_ASSERT(false, "To implement."); + } + }; +} + +/*static*/ +void AudioTrackEncoder::DeInterleaveTrackData(AudioDataValue* aInput, + int32_t aDuration, + int32_t aChannels, + AudioDataValue* aOutput) { + for (int32_t i = 0; i < aChannels; ++i) { + for (int32_t j = 0; j < aDuration; ++j) { + aOutput[i * aDuration + j] = aInput[i + j * aChannels]; + } + } +} + +size_t AudioTrackEncoder::SizeOfExcludingThis( + mozilla::MallocSizeOf aMallocSizeOf) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + return mOutgoingBuffer.SizeOfExcludingThis(aMallocSizeOf); +} + +VideoTrackEncoder::VideoTrackEncoder(RefPtr<DriftCompensator> aDriftCompensator, + TrackRate aTrackRate, + FrameDroppingMode aFrameDroppingMode) + : TrackEncoder(aTrackRate), + mDriftCompensator(std::move(aDriftCompensator)), + mFrameWidth(0), + mFrameHeight(0), + mDisplayWidth(0), + mDisplayHeight(0), + mEncodedTicks(0), + mVideoBitrate(0), + mFrameDroppingMode(aFrameDroppingMode), + mKeyFrameInterval(DEFAULT_KEYFRAME_INTERVAL_MS), + mEnabled(true) { + mLastChunk.mDuration = 0; +} + +void VideoTrackEncoder::Suspend(const TimeStamp& aTime) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + TRACK_LOG(LogLevel::Info, + ("[VideoTrackEncoder %p]: Suspend() at %.3fs, was %s", this, + mStartTime.IsNull() ? 0.0 : (aTime - mStartTime).ToSeconds(), + mSuspended ? "suspended" : "live")); + + if (mSuspended) { + return; + } + + mSuspended = true; + mSuspendTime = aTime; +} + +void VideoTrackEncoder::Resume(const TimeStamp& aTime) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + + if (!mSuspended) { + return; + } + + TRACK_LOG( + LogLevel::Info, + ("[VideoTrackEncoder %p]: Resume() after %.3fs, was %s", this, + (aTime - mSuspendTime).ToSeconds(), mSuspended ? "suspended" : "live")); + + mSuspended = false; + + TimeDuration suspendDuration = aTime - mSuspendTime; + if (!mLastChunk.mTimeStamp.IsNull()) { + VideoChunk* nextChunk = mIncomingBuffer.FindChunkContaining(aTime); + MOZ_ASSERT_IF(nextChunk, nextChunk->mTimeStamp <= aTime); + if (nextChunk) { + nextChunk->mTimeStamp = aTime; + } + mLastChunk.mTimeStamp += suspendDuration; + } + if (!mStartTime.IsNull()) { + mStartTime += suspendDuration; + } + + mSuspendTime = TimeStamp(); +} + +void VideoTrackEncoder::Disable(const TimeStamp& aTime) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + TRACK_LOG(LogLevel::Debug, ("[VideoTrackEncoder %p]: Disable()", this)); + + if (mStartTime.IsNull()) { + // We haven't started yet. No need to touch future frames. + mEnabled = false; + return; + } + + // Advancing currentTime to process any frames in mIncomingBuffer between + // mCurrentTime and aTime. + AdvanceCurrentTime(aTime); + if (!mLastChunk.mTimeStamp.IsNull()) { + // Insert a black frame at t=aTime into mIncomingBuffer, to trigger the + // shift to black at the right moment. + VideoSegment tempSegment; + tempSegment.AppendFrom(&mIncomingBuffer); + mIncomingBuffer.AppendFrame(do_AddRef(mLastChunk.mFrame.GetImage()), + mLastChunk.mFrame.GetIntrinsicSize(), + mLastChunk.mFrame.GetPrincipalHandle(), true, + aTime); + mIncomingBuffer.AppendFrom(&tempSegment); + } + mEnabled = false; +} + +void VideoTrackEncoder::Enable(const TimeStamp& aTime) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + TRACK_LOG(LogLevel::Debug, ("[VideoTrackEncoder %p]: Enable()", this)); + + if (mStartTime.IsNull()) { + // We haven't started yet. No need to touch future frames. + mEnabled = true; + return; + } + + // Advancing currentTime to process any frames in mIncomingBuffer between + // mCurrentTime and aTime. + AdvanceCurrentTime(aTime); + if (!mLastChunk.mTimeStamp.IsNull()) { + // Insert a real frame at t=aTime into mIncomingBuffer, to trigger the + // shift from black at the right moment. + VideoSegment tempSegment; + tempSegment.AppendFrom(&mIncomingBuffer); + mIncomingBuffer.AppendFrame(do_AddRef(mLastChunk.mFrame.GetImage()), + mLastChunk.mFrame.GetIntrinsicSize(), + mLastChunk.mFrame.GetPrincipalHandle(), + mLastChunk.mFrame.GetForceBlack(), aTime); + mIncomingBuffer.AppendFrom(&tempSegment); + } + mEnabled = true; +} + +void VideoTrackEncoder::AppendVideoSegment(VideoSegment&& aSegment) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + TRACK_LOG(LogLevel::Verbose, + ("[VideoTrackEncoder %p]: AppendVideoSegment()", this)); + + if (mCanceled) { + return; + } + + if (mEndOfStream) { + return; + } + + for (VideoSegment::ConstChunkIterator iter(aSegment); !iter.IsEnded(); + iter.Next()) { + if (iter->IsNull()) { + // A null image was sent. This is a signal from the source that we should + // clear any images buffered in the future. + mIncomingBuffer.Clear(); + continue; // Don't append iter, as it is null. + } + if (VideoChunk* c = mIncomingBuffer.GetLastChunk()) { + if (iter->mTimeStamp < c->mTimeStamp) { + // Time went backwards. This can happen when a MediaDecoder seeks. + // We need to handle this by removing any frames buffered in the future + // and start over at iter->mTimeStamp. + mIncomingBuffer.Clear(); + } + } + mIncomingBuffer.AppendFrame(do_AddRef(iter->mFrame.GetImage()), + iter->mFrame.GetIntrinsicSize(), + iter->mFrame.GetPrincipalHandle(), + iter->mFrame.GetForceBlack(), iter->mTimeStamp); + } + aSegment.Clear(); +} + +void VideoTrackEncoder::TakeTrackData(VideoSegment& aSegment) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + + if (mCanceled) { + return; + } + + aSegment.AppendFrom(&mOutgoingBuffer); + mOutgoingBuffer.Clear(); +} + +void VideoTrackEncoder::Init(const VideoSegment& aSegment, + const TimeStamp& aTime) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + + if (mInitialized) { + return; + } + + mInitCounter++; + TRACK_LOG(LogLevel::Debug, + ("[VideoTrackEncoder %p]: Init the video encoder %d times", this, + mInitCounter)); + + for (VideoSegment::ConstChunkIterator iter(aSegment); !iter.IsEnded(); + iter.Next()) { + if (iter->IsNull()) { + continue; + } + + gfx::IntSize imgsize = iter->mFrame.GetImage()->GetSize(); + gfx::IntSize intrinsicSize = iter->mFrame.GetIntrinsicSize(); + nsresult rv = Init(imgsize.width, imgsize.height, intrinsicSize.width, + intrinsicSize.height); + + if (NS_SUCCEEDED(rv)) { + TRACK_LOG(LogLevel::Info, + ("[VideoTrackEncoder %p]: Successfully initialized!", this)); + return; + } else { + TRACK_LOG( + LogLevel::Error, + ("[VideoTrackEncoder %p]: Failed to initialize the encoder!", this)); + OnError(); + } + break; + } + + if (((aTime - mStartTime).ToSeconds() > VIDEO_INIT_FAILED_DURATION) && + mInitCounter > 1) { + TRACK_LOG(LogLevel::Warning, + ("[VideoTrackEncoder %p]: No successful init for %ds.", this, + VIDEO_INIT_FAILED_DURATION)); + Telemetry::Accumulate( + Telemetry::MEDIA_RECORDER_TRACK_ENCODER_INIT_TIMEOUT_TYPE, 1); + OnError(); + return; + } +} + +void VideoTrackEncoder::Cancel() { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + TRACK_LOG(LogLevel::Info, ("[VideoTrackEncoder %p]: Cancel()", this)); + mCanceled = true; + mIncomingBuffer.Clear(); + mOutgoingBuffer.Clear(); + mLastChunk.SetNull(0); +} + +void VideoTrackEncoder::NotifyEndOfStream() { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + + if (!mCanceled && !mInitialized) { + // If source video track is muted till the end of encoding, initialize the + // encoder with default frame width, frame height, and track rate. + Init(DEFAULT_FRAME_WIDTH, DEFAULT_FRAME_HEIGHT, DEFAULT_FRAME_WIDTH, + DEFAULT_FRAME_HEIGHT); + } + + if (mEndOfStream) { + // We have already been notified. + return; + } + + mEndOfStream = true; + TRACK_LOG(LogLevel::Info, + ("[VideoTrackEncoder %p]: NotifyEndOfStream()", this)); + + if (!mLastChunk.IsNull()) { + RefPtr<layers::Image> lastImage = mLastChunk.mFrame.GetImage(); + const TimeStamp now = TimeStamp::Now(); + TimeStamp currentTime = mSuspended ? mSuspendTime : mCurrentTime; + currentTime = mDriftCompensator->GetVideoTime(now, currentTime); + TimeDuration absoluteEndTime = currentTime - mStartTime; + CheckedInt64 duration = + UsecsToFrames(absoluteEndTime.ToMicroseconds(), mTrackRate) - + mEncodedTicks; + if (duration.isValid() && duration.value() > 0) { + mEncodedTicks += duration.value(); + TRACK_LOG(LogLevel::Debug, + ("[VideoTrackEncoder %p]: Appending last video frame %p at pos " + "%.3fs, " + "track-end=%.3fs", + this, lastImage.get(), + (mLastChunk.mTimeStamp - mStartTime).ToSeconds(), + absoluteEndTime.ToSeconds())); + mOutgoingBuffer.AppendFrame( + lastImage.forget(), mLastChunk.mFrame.GetIntrinsicSize(), + PRINCIPAL_HANDLE_NONE, mLastChunk.mFrame.GetForceBlack() || !mEnabled, + mLastChunk.mTimeStamp); + mOutgoingBuffer.ExtendLastFrameBy(duration.value()); + } + } + + mIncomingBuffer.Clear(); + mLastChunk.SetNull(0); + + if (mInitialized && !mCanceled) { + OnDataAvailable(); + } +} + +void VideoTrackEncoder::SetStartOffset(const TimeStamp& aStartOffset) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + MOZ_ASSERT(mCurrentTime.IsNull()); + TRACK_LOG(LogLevel::Info, ("[VideoTrackEncoder %p]: SetStartOffset()", this)); + mStartTime = aStartOffset; + mCurrentTime = aStartOffset; +} + +void VideoTrackEncoder::AdvanceCurrentTime(const TimeStamp& aTime) { + AUTO_PROFILER_LABEL("VideoTrackEncoder::AdvanceCurrentTime", OTHER); + + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + MOZ_ASSERT(!mStartTime.IsNull()); + MOZ_ASSERT(!mCurrentTime.IsNull()); + + if (mCanceled) { + return; + } + + if (mEndOfStream) { + return; + } + + if (mSuspended) { + TRACK_LOG( + LogLevel::Verbose, + ("[VideoTrackEncoder %p]: AdvanceCurrentTime() suspended at %.3fs", + this, (mCurrentTime - mStartTime).ToSeconds())); + mCurrentTime = aTime; + mIncomingBuffer.ForgetUpToTime(mCurrentTime); + return; + } + + TRACK_LOG(LogLevel::Verbose, + ("[VideoTrackEncoder %p]: AdvanceCurrentTime() to %.3fs", this, + (aTime - mStartTime).ToSeconds())); + + // Grab frames within the currentTime range from the incoming buffer. + VideoSegment tempSegment; + { + VideoChunk* previousChunk = &mLastChunk; + auto appendDupes = [&](const TimeStamp& aUpTo) { + while ((aUpTo - previousChunk->mTimeStamp).ToSeconds() > 1.0) { + // We encode at least one frame per second, even if there are none + // flowing. + previousChunk->mTimeStamp += TimeDuration::FromSeconds(1.0); + tempSegment.AppendFrame( + do_AddRef(previousChunk->mFrame.GetImage()), + previousChunk->mFrame.GetIntrinsicSize(), + previousChunk->mFrame.GetPrincipalHandle(), + previousChunk->mFrame.GetForceBlack() || !mEnabled, + previousChunk->mTimeStamp); + TRACK_LOG( + LogLevel::Verbose, + ("[VideoTrackEncoder %p]: Duplicating video frame (%p) at pos %.3f", + this, previousChunk->mFrame.GetImage(), + (previousChunk->mTimeStamp - mStartTime).ToSeconds())); + } + }; + for (VideoSegment::ChunkIterator iter(mIncomingBuffer); !iter.IsEnded(); + iter.Next()) { + MOZ_ASSERT(!iter->IsNull()); + if (!previousChunk->IsNull() && + iter->mTimeStamp <= previousChunk->mTimeStamp) { + // This frame starts earlier than previousChunk. Skip. + continue; + } + if (iter->mTimeStamp >= aTime) { + // This frame starts in the future. Stop. + break; + } + if (!previousChunk->IsNull()) { + appendDupes(iter->mTimeStamp); + } + tempSegment.AppendFrame( + do_AddRef(iter->mFrame.GetImage()), iter->mFrame.GetIntrinsicSize(), + iter->mFrame.GetPrincipalHandle(), + iter->mFrame.GetForceBlack() || !mEnabled, iter->mTimeStamp); + TRACK_LOG(LogLevel::Verbose, + ("[VideoTrackEncoder %p]: Taking video frame (%p) at pos %.3f", + this, iter->mFrame.GetImage(), + (iter->mTimeStamp - mStartTime).ToSeconds())); + previousChunk = &*iter; + } + if (!previousChunk->IsNull()) { + appendDupes(aTime); + } + } + mCurrentTime = aTime; + mIncomingBuffer.ForgetUpToTime(mCurrentTime); + + // Convert tempSegment timestamps to durations and add chunks with known + // duration to mOutgoingBuffer. + const TimeStamp now = TimeStamp::Now(); + bool chunkAppended = false; + for (VideoSegment::ConstChunkIterator iter(tempSegment); !iter.IsEnded(); + iter.Next()) { + VideoChunk chunk = *iter; + + if (mLastChunk.mTimeStamp.IsNull()) { + // This is the first real chunk in the track. Make it start at the + // beginning of the track. + MOZ_ASSERT(!iter->mTimeStamp.IsNull()); + + TRACK_LOG( + LogLevel::Verbose, + ("[VideoTrackEncoder %p]: Got the first video frame (%p) at pos %.3f " + "(moving it to beginning)", + this, iter->mFrame.GetImage(), + (iter->mTimeStamp - mStartTime).ToSeconds())); + + mLastChunk = *iter; + mLastChunk.mTimeStamp = mStartTime; + continue; + } + + MOZ_ASSERT(!mLastChunk.IsNull()); + MOZ_ASSERT(!chunk.IsNull()); + + TimeDuration absoluteEndTime = + mDriftCompensator->GetVideoTime(now, chunk.mTimeStamp) - mStartTime; + TRACK_LOG(LogLevel::Verbose, + ("[VideoTrackEncoder %p]: Appending video frame %p, at pos %.3fs " + "until %.3fs", + this, mLastChunk.mFrame.GetImage(), + (mDriftCompensator->GetVideoTime(now, mLastChunk.mTimeStamp) - + mStartTime) + .ToSeconds(), + absoluteEndTime.ToSeconds())); + CheckedInt64 duration = + UsecsToFrames(absoluteEndTime.ToMicroseconds(), mTrackRate) - + mEncodedTicks; + if (!duration.isValid()) { + NS_ERROR("Duration overflow"); + return; + } + + if (duration.value() <= 0) { + // A frame either started before the last frame (can happen when + // multiple frames are added before SetStartOffset), or + // two frames were so close together that they ended up at the same + // position. We handle both cases by ignoring the previous frame. + + TRACK_LOG(LogLevel::Verbose, + ("[VideoTrackEncoder %p]: Duration from frame %p to frame %p " + "is %" PRId64 ". Ignoring %p", + this, mLastChunk.mFrame.GetImage(), iter->mFrame.GetImage(), + duration.value(), mLastChunk.mFrame.GetImage())); + + TimeStamp t = mLastChunk.mTimeStamp; + mLastChunk = *iter; + mLastChunk.mTimeStamp = t; + continue; + } + + mEncodedTicks += duration.value(); + mOutgoingBuffer.AppendFrame( + do_AddRef(mLastChunk.mFrame.GetImage()), + mLastChunk.mFrame.GetIntrinsicSize(), PRINCIPAL_HANDLE_NONE, + mLastChunk.mFrame.GetForceBlack() || !mEnabled, mLastChunk.mTimeStamp); + mOutgoingBuffer.ExtendLastFrameBy(duration.value()); + chunkAppended = true; + mLastChunk = chunk; + } + + if (chunkAppended) { + Init(mOutgoingBuffer, mCurrentTime); + if (mInitialized) { + OnDataAvailable(); + } + } +} + +size_t VideoTrackEncoder::SizeOfExcludingThis( + mozilla::MallocSizeOf aMallocSizeOf) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + return mIncomingBuffer.SizeOfExcludingThis(aMallocSizeOf) + + mOutgoingBuffer.SizeOfExcludingThis(aMallocSizeOf); +} + +void VideoTrackEncoder::SetKeyFrameInterval(uint32_t aKeyFrameInterval) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + if (aKeyFrameInterval == 0) { + mKeyFrameInterval = DEFAULT_KEYFRAME_INTERVAL_MS; + return; + } + mKeyFrameInterval = std::min(aKeyFrameInterval, DEFAULT_KEYFRAME_INTERVAL_MS); +} + +} // namespace mozilla + +#undef TRACK_LOG diff --git a/dom/media/encoder/TrackEncoder.h b/dom/media/encoder/TrackEncoder.h new file mode 100644 index 0000000000..942195c7dc --- /dev/null +++ b/dom/media/encoder/TrackEncoder.h @@ -0,0 +1,520 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef TrackEncoder_h_ +#define TrackEncoder_h_ + +#include "AudioSegment.h" +#include "EncodedFrame.h" +#include "MediaTrackGraph.h" +#include "TrackMetadataBase.h" +#include "VideoSegment.h" + +namespace mozilla { + +class AbstractThread; +class DriftCompensator; +class TrackEncoder; + +class TrackEncoderListener { + public: + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(TrackEncoderListener) + + /** + * Called when the TrackEncoder's underlying encoder has been successfully + * initialized and there's non-null data ready to be encoded. + */ + virtual void Initialized(TrackEncoder* aEncoder) = 0; + + /** + * Called when there's new data ready to be encoded. + * Always called after Initialized(). + */ + virtual void DataAvailable(TrackEncoder* aEncoder) = 0; + + /** + * Called after the TrackEncoder hit an unexpected error, causing it to + * abort operation. + */ + virtual void Error(TrackEncoder* aEncoder) = 0; + + protected: + virtual ~TrackEncoderListener() = default; +}; + +/** + * Base class of AudioTrackEncoder and VideoTrackEncoder. Lifetime managed by + * MediaEncoder. All methods are to be called only on the worker thread. + * + * MediaTrackListeners will get store raw data in mIncomingBuffer, so + * mIncomingBuffer is protected by a lock. The control APIs are all called by + * MediaEncoder on its dedicated thread, where GetEncodedTrack is called + * periodically to swap out mIncomingBuffer, feed it to the encoder, and return + * the encoded data. + */ +class TrackEncoder { + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(TrackEncoder); + + public: + explicit TrackEncoder(TrackRate aTrackRate); + + /** + * Called by MediaEncoder to cancel the encoding. + */ + virtual void Cancel() = 0; + + /** + * Notifies us that we have reached the end of the stream and no more data + * will be appended. + */ + virtual void NotifyEndOfStream() = 0; + + /** + * Creates and sets up meta data for a specific codec, called on the worker + * thread. + */ + virtual already_AddRefed<TrackMetadataBase> GetMetadata() = 0; + + /** + * Encodes raw segments. Result data is returned in aData, and called on the + * worker thread. + */ + virtual nsresult GetEncodedTrack(nsTArray<RefPtr<EncodedFrame>>& aData) = 0; + + /** + * Returns true once this TrackEncoder is initialized. + */ + bool IsInitialized(); + + /** + * True if the track encoder has encoded all source segments coming from + * MediaTrackGraph. Call on the worker thread. + */ + bool IsEncodingComplete(); + + /** + * If this TrackEncoder was not already initialized, it is set to initialized + * and listeners are notified. + */ + void SetInitialized(); + + /** + * Notifies listeners that there is data available for encoding. + */ + void OnDataAvailable(); + + /** + * Called after an error. Cancels the encoding and notifies listeners. + */ + void OnError(); + + /** + * Registers a listener to events from this TrackEncoder. + * We hold a strong reference to the listener. + */ + void RegisterListener(TrackEncoderListener* aListener); + + /** + * Unregisters a listener from events from this TrackEncoder. + * The listener will stop receiving events synchronously. + */ + bool UnregisterListener(TrackEncoderListener* aListener); + + virtual void SetBitrate(const uint32_t aBitrate) = 0; + + /** + * It's optional to set the worker thread, but if you do we'll assert that + * we are in the worker thread in every method that gets called. + */ + void SetWorkerThread(AbstractThread* aWorkerThread); + + /** + * Measure size of internal buffers. + */ + virtual size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) = 0; + + protected: + virtual ~TrackEncoder() { MOZ_ASSERT(mListeners.IsEmpty()); } + + /** + * True if the track encoder has encoded all source data. + */ + bool mEncodingComplete; + + /** + * True if the track encoder has been initialized successfully. + */ + bool mInitialized; + + /** + * True once all data until the end of the input track has been received. + */ + bool mEndOfStream; + + /** + * True once this encoding has been cancelled. + */ + bool mCanceled; + + // How many times we have tried to initialize the encoder. + uint32_t mInitCounter; + + /** + * True if this TrackEncoder is currently suspended. + */ + bool mSuspended; + + /** + * The track rate of source media. + */ + const TrackRate mTrackRate; + + /** + * If set we assert that all methods are called on this thread. + */ + RefPtr<AbstractThread> mWorkerThread; + + nsTArray<RefPtr<TrackEncoderListener>> mListeners; +}; + +class AudioTrackEncoder : public TrackEncoder { + public: + explicit AudioTrackEncoder(TrackRate aTrackRate) + : TrackEncoder(aTrackRate), + mChannels(0), + mNotInitDuration(0), + mAudioBitrate(0) {} + + /** + * Suspends encoding from now, i.e., all future audio data received through + * AppendAudioSegment() until the next Resume() will be dropped. + */ + void Suspend(); + + /** + * Resumes encoding starting now, i.e., data from the next + * AppendAudioSegment() will get encoded. + */ + void Resume(); + + /** + * Appends and consumes track data from aSegment. + */ + void AppendAudioSegment(AudioSegment&& aSegment); + + /** + * Takes all track data that has been played out from the last time + * TakeTrackData ran and moves it to aSegment. + */ + void TakeTrackData(AudioSegment& aSegment); + + template <typename T> + static void InterleaveTrackData(nsTArray<const T*>& aInput, int32_t aDuration, + uint32_t aOutputChannels, + AudioDataValue* aOutput, float aVolume) { + if (aInput.Length() < aOutputChannels) { + // Up-mix. This might make the mChannelData have more than aChannels. + AudioChannelsUpMix(&aInput, aOutputChannels, + SilentChannel::ZeroChannel<T>()); + } + + if (aInput.Length() > aOutputChannels) { + DownmixAndInterleave(aInput, aDuration, aVolume, aOutputChannels, + aOutput); + } else { + InterleaveAndConvertBuffer(aInput.Elements(), aDuration, aVolume, + aOutputChannels, aOutput); + } + } + + /** + * Interleaves the track data and stores the result into aOutput. Might need + * to up-mix or down-mix the channel data if the channels number of this chunk + * is different from aOutputChannels. The channel data from aChunk might be + * modified by up-mixing. + */ + static void InterleaveTrackData(AudioChunk& aChunk, int32_t aDuration, + uint32_t aOutputChannels, + AudioDataValue* aOutput); + + /** + * De-interleaves the aInput data and stores the result into aOutput. + * No up-mix or down-mix operations inside. + */ + static void DeInterleaveTrackData(AudioDataValue* aInput, int32_t aDuration, + int32_t aChannels, AudioDataValue* aOutput); + + /** + * Measure size of internal buffers. + */ + size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) override; + + void SetBitrate(const uint32_t aBitrate) override { + mAudioBitrate = aBitrate; + } + + /** + * Tries to initiate the AudioEncoder based on data in aSegment. + * This can be re-called often, as it will exit early should we already be + * initiated. mInitiated will only be set if there was enough data in + * aSegment to infer metadata. If mInitiated gets set, listeners are notified. + * + * Not having enough data in aSegment to initiate the encoder for an + * accumulated aDuration of one second will make us initiate with a default + * number of channels. + * + * If we attempt to initiate the underlying encoder but fail, we Cancel() and + * notify listeners. + */ + void TryInit(const AudioSegment& aSegment, TrackTime aDuration); + + void Cancel() override; + + /** + * Dispatched from MediaTrackGraph when we have finished feeding data to + * mIncomingBuffer. + */ + void NotifyEndOfStream() override; + + protected: + /** + * Number of samples per channel in a pcm buffer. This is also the value of + * frame size required by audio encoder, and listeners will be notified when + * at least this much data has been added to mOutgoingBuffer. + */ + virtual int NumInputFramesPerPacket() const { return 0; } + + /** + * Initializes the audio encoder. The call of this method is delayed until we + * have received the first valid track from MediaTrackGraph. + */ + virtual nsresult Init(int aChannels) = 0; + + /** + * The number of channels are used for processing PCM data in the audio + * encoder. This value comes from the first valid audio chunk. If encoder + * can't support the channels in the chunk, downmix PCM stream can be + * performed. This value also be used to initialize the audio encoder. + */ + int mChannels; + + /** + * A segment queue of outgoing audio track data to the encoder. + * The contents of mOutgoingBuffer will always be what has been appended on + * the encoder thread but not yet consumed by the encoder sub class. + */ + AudioSegment mOutgoingBuffer; + + TrackTime mNotInitDuration; + + uint32_t mAudioBitrate; +}; + +enum class FrameDroppingMode { + ALLOW, // Allowed to drop frames to keep up under load + DISALLOW, // Must not drop any frames, even if it means we will OOM +}; + +class VideoTrackEncoder : public TrackEncoder { + public: + explicit VideoTrackEncoder(RefPtr<DriftCompensator> aDriftCompensator, + TrackRate aTrackRate, + FrameDroppingMode aFrameDroppingMode); + + /** + * Suspends encoding from aTime, i.e., all video frame with a timestamp + * between aTime and the timestamp of the next Resume() will be dropped. + */ + void Suspend(const TimeStamp& aTime); + + /** + * Resumes encoding starting at aTime. + */ + void Resume(const TimeStamp& aTime); + + /** + * Makes the video black from aTime. + */ + void Disable(const TimeStamp& aTime); + + /** + * Makes the video non-black from aTime. + * + * NB that it could still be forced black for other reasons, like principals. + */ + void Enable(const TimeStamp& aTime); + + /** + * Appends source video frames to mIncomingBuffer. We only append the source + * chunk if the image is different from mLastChunk's image. Called on the + * MediaTrackGraph thread. + */ + void AppendVideoSegment(VideoSegment&& aSegment); + + /** + * Takes track data from the last time TakeTrackData ran until mCurrentTime + * and moves it to aSegment. + */ + void TakeTrackData(VideoSegment& aSegment); + + /** + * Measure size of internal buffers. + */ + size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) override; + + void SetBitrate(const uint32_t aBitrate) override { + mVideoBitrate = aBitrate; + } + + /** + * Tries to initiate the VideoEncoder based on data in aSegment. + * This can be re-called often, as it will exit early should we already be + * initiated. mInitiated will only be set if there was enough data in + * aSegment to infer metadata. If mInitiated gets set, listeners are notified. + * + * Failing to initiate the encoder for an accumulated aDuration of 30 seconds + * is seen as an error and will cancel the current encoding. + */ + void Init(const VideoSegment& aSegment, const TimeStamp& aTime); + + TrackTime SecondsToMediaTime(double aS) const { + NS_ASSERTION(0 <= aS && aS <= TRACK_TICKS_MAX / TRACK_RATE_MAX, + "Bad seconds"); + return mTrackRate * aS; + } + + /** + * MediaTrackGraph notifies us about the time of the track's start. + * This gets called on the MediaEncoder thread after a dispatch. + */ + void SetStartOffset(const TimeStamp& aStartOffset); + + void Cancel() override; + + /** + * Notifies us that we have reached the end of the stream and no more data + * will be appended to mIncomingBuffer. + */ + void NotifyEndOfStream() override; + + /** + * Dispatched from MediaTrackGraph when it has run an iteration so we can + * hand more data to the encoder. + */ + void AdvanceCurrentTime(const TimeStamp& aTime); + + /** + * Set desired keyframe interval defined in milliseconds. + */ + void SetKeyFrameInterval(uint32_t aKeyFrameInterval); + + protected: + /** + * Initialize the video encoder. In order to collect the value of width and + * height of source frames, this initialization is delayed until we have + * received the first valid video frame from MediaTrackGraph. + * Listeners will be notified after it has been successfully initialized. + */ + virtual nsresult Init(int aWidth, int aHeight, int aDisplayWidth, + int aDisplayHeight) = 0; + + /** + * Drift compensator for re-clocking incoming video frame wall-clock + * timestamps to audio time. + */ + const RefPtr<DriftCompensator> mDriftCompensator; + + /** + * The width of source video frame, ceiled if the source width is odd. + */ + int mFrameWidth; + + /** + * The height of source video frame, ceiled if the source height is odd. + */ + int mFrameHeight; + + /** + * The display width of source video frame. + */ + int mDisplayWidth; + + /** + * The display height of source video frame. + */ + int mDisplayHeight; + + /** + * The last unique frame and duration so far handled by + * NotifyAdvanceCurrentTime. When a new frame is detected, mLastChunk is added + * to mOutgoingBuffer. + */ + VideoChunk mLastChunk; + + /** + * A segment queue of incoming video track data, from listeners. + * The duration of mIncomingBuffer is irrelevant as we only look at TimeStamps + * of frames. Consumed data is replaced by null data. + */ + VideoSegment mIncomingBuffer; + + /** + * A segment queue of outgoing video track data to the encoder. + * The contents of mOutgoingBuffer will always be what has been consumed from + * mIncomingBuffer (up to mCurrentTime) but not yet consumed by the encoder + * sub class. There won't be any null data at the beginning of mOutgoingBuffer + * unless explicitly pushed by the producer. + */ + VideoSegment mOutgoingBuffer; + + /** + * The number of mTrackRate ticks we have passed to mOutgoingBuffer. + */ + TrackTime mEncodedTicks; + + /** + * The time up to which we have forwarded data from mIncomingBuffer to + * mOutgoingBuffer. + */ + TimeStamp mCurrentTime; + + /** + * The time the video track started, so the start of the video track can be + * synced to the start of the audio track. + * + * Note that this time will progress during suspension, to make sure the + * incoming frames stay in sync with the output. + */ + TimeStamp mStartTime; + + /** + * The time Suspend was called on the MediaRecorder, so we can calculate the + * duration on the next Resume(). + */ + TimeStamp mSuspendTime; + + uint32_t mVideoBitrate; + + /** + * ALLOW to drop frames under load. + * DISALLOW to encode all frames, mainly for testing. + */ + FrameDroppingMode mFrameDroppingMode; + + /** + * The desired keyframe interval defined in milliseconds. + */ + uint32_t mKeyFrameInterval; + + /** + * True if the video MediaTrackTrack this VideoTrackEncoder is attached to is + * currently enabled. While false, we encode all frames as black. + */ + bool mEnabled; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/encoder/TrackMetadataBase.h b/dom/media/encoder/TrackMetadataBase.h new file mode 100644 index 0000000000..503b52e5ec --- /dev/null +++ b/dom/media/encoder/TrackMetadataBase.h @@ -0,0 +1,76 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef TrackMetadataBase_h_ +#define TrackMetadataBase_h_ + +#include "nsTArray.h" +#include "nsCOMPtr.h" +namespace mozilla { + +// A class represent meta data for various codec format. Only support one track +// information. +class TrackMetadataBase { + public: + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(TrackMetadataBase) + enum MetadataKind { + METADATA_OPUS, // Represent the Opus metadata + METADATA_VP8, + METADATA_VORBIS, + METADATA_AVC, + METADATA_AAC, + METADATA_AMR, + METADATA_EVRC, + METADATA_UNKNOWN // Metadata Kind not set + }; + // Return the specific metadata kind + virtual MetadataKind GetKind() const = 0; + + protected: + // Protected destructor, to discourage deletion outside of Release(): + virtual ~TrackMetadataBase() = default; +}; + +// The base class for audio metadata. +class AudioTrackMetadata : public TrackMetadataBase { + public: + // The duration of each sample set generated by encoder. (counted by samples) + // If the duration is variant, this value should return 0. + virtual uint32_t GetAudioFrameDuration() = 0; + + // The size of each sample set generated by encoder. (counted by byte) + // If the size is variant, this value should return 0. + virtual uint32_t GetAudioFrameSize() = 0; + + // AudioSampleRate is the number of audio sample per second. + virtual uint32_t GetAudioSampleRate() = 0; + + virtual uint32_t GetAudioChannels() = 0; +}; + +// The base class for video metadata. +class VideoTrackMetadata : public TrackMetadataBase { + public: + // VideoHeight and VideoWidth are the frame size of the elementary stream. + virtual uint32_t GetVideoHeight() = 0; + virtual uint32_t GetVideoWidth() = 0; + + // VideoDisplayHeight and VideoDisplayWidth are the display frame size. + virtual uint32_t GetVideoDisplayHeight() = 0; + virtual uint32_t GetVideoDisplayWidth() = 0; + + // VideoClockRate is the number of samples per second in video frame's + // timestamp. + // For example, if VideoClockRate is 90k Hz and VideoFrameRate is + // 30 fps, each frame's sample duration will be 3000 Hz. + virtual uint32_t GetVideoClockRate() = 0; + + // VideoFrameRate is numner of frames per second. + virtual uint32_t GetVideoFrameRate() = 0; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/encoder/VP8TrackEncoder.cpp b/dom/media/encoder/VP8TrackEncoder.cpp new file mode 100644 index 0000000000..6e169e781b --- /dev/null +++ b/dom/media/encoder/VP8TrackEncoder.cpp @@ -0,0 +1,583 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "VP8TrackEncoder.h" + +#include "DriftCompensation.h" +#include "GeckoProfiler.h" +#include "ImageToI420.h" +#include "mozilla/gfx/2D.h" +#include "prsystem.h" +#include "VideoSegment.h" +#include "VideoUtils.h" +#include "vpx/vp8cx.h" +#include "vpx/vpx_encoder.h" +#include "WebMWriter.h" +#include "mozilla/media/MediaUtils.h" +#include "mozilla/dom/ImageUtils.h" +#include "mozilla/dom/ImageBitmapBinding.h" + +namespace mozilla { + +LazyLogModule gVP8TrackEncoderLog("VP8TrackEncoder"); +#define VP8LOG(level, msg, ...) \ + MOZ_LOG(gVP8TrackEncoderLog, level, (msg, ##__VA_ARGS__)) + +#define DEFAULT_BITRATE_BPS 2500000 +#define MAX_KEYFRAME_INTERVAL 600 + +using namespace mozilla::gfx; +using namespace mozilla::layers; +using namespace mozilla::media; +using namespace mozilla::dom; + +VP8TrackEncoder::VP8TrackEncoder(RefPtr<DriftCompensator> aDriftCompensator, + TrackRate aTrackRate, + FrameDroppingMode aFrameDroppingMode) + : VideoTrackEncoder(std::move(aDriftCompensator), aTrackRate, + aFrameDroppingMode), + mVPXContext(new vpx_codec_ctx_t()), + mVPXImageWrapper(new vpx_image_t()) { + MOZ_COUNT_CTOR(VP8TrackEncoder); +} + +VP8TrackEncoder::~VP8TrackEncoder() { + Destroy(); + MOZ_COUNT_DTOR(VP8TrackEncoder); +} + +void VP8TrackEncoder::Destroy() { + if (mInitialized) { + vpx_codec_destroy(mVPXContext.get()); + } + + if (mVPXImageWrapper) { + vpx_img_free(mVPXImageWrapper.get()); + } + mInitialized = false; +} + +nsresult VP8TrackEncoder::Init(int32_t aWidth, int32_t aHeight, + int32_t aDisplayWidth, int32_t aDisplayHeight) { + if (aWidth < 1 || aHeight < 1 || aDisplayWidth < 1 || aDisplayHeight < 1) { + return NS_ERROR_FAILURE; + } + + if (mInitialized) { + MOZ_ASSERT(false); + return NS_ERROR_FAILURE; + } + + // Encoder configuration structure. + vpx_codec_enc_cfg_t config; + nsresult rv = SetConfigurationValues(aWidth, aHeight, aDisplayWidth, + aDisplayHeight, config); + NS_ENSURE_SUCCESS(rv, NS_ERROR_FAILURE); + + // Creating a wrapper to the image - setting image data to NULL. Actual + // pointer will be set in encode. Setting align to 1, as it is meaningless + // (actual memory is not allocated). + vpx_img_wrap(mVPXImageWrapper.get(), VPX_IMG_FMT_I420, mFrameWidth, + mFrameHeight, 1, nullptr); + + vpx_codec_flags_t flags = 0; + flags |= VPX_CODEC_USE_OUTPUT_PARTITION; + if (vpx_codec_enc_init(mVPXContext.get(), vpx_codec_vp8_cx(), &config, + flags)) { + return NS_ERROR_FAILURE; + } + + vpx_codec_control(mVPXContext.get(), VP8E_SET_STATIC_THRESHOLD, 1); + vpx_codec_control(mVPXContext.get(), VP8E_SET_CPUUSED, -6); + vpx_codec_control(mVPXContext.get(), VP8E_SET_TOKEN_PARTITIONS, + VP8_ONE_TOKENPARTITION); + + SetInitialized(); + + return NS_OK; +} + +nsresult VP8TrackEncoder::Reconfigure(int32_t aWidth, int32_t aHeight, + int32_t aDisplayWidth, + int32_t aDisplayHeight) { + if (aWidth <= 0 || aHeight <= 0 || aDisplayWidth <= 0 || + aDisplayHeight <= 0) { + MOZ_ASSERT(false); + return NS_ERROR_FAILURE; + } + + if (!mInitialized) { + MOZ_ASSERT(false); + return NS_ERROR_FAILURE; + } + + // Recreate image wrapper + vpx_img_free(mVPXImageWrapper.get()); + vpx_img_wrap(mVPXImageWrapper.get(), VPX_IMG_FMT_I420, aWidth, aHeight, 1, + nullptr); + // Encoder configuration structure. + vpx_codec_enc_cfg_t config; + nsresult rv = SetConfigurationValues(aWidth, aHeight, aDisplayWidth, + aDisplayHeight, config); + NS_ENSURE_SUCCESS(rv, NS_ERROR_FAILURE); + // Set new configuration + if (vpx_codec_enc_config_set(mVPXContext.get(), &config) != VPX_CODEC_OK) { + VP8LOG(LogLevel::Error, "Failed to set new configuration"); + return NS_ERROR_FAILURE; + } + return NS_OK; +} + +nsresult VP8TrackEncoder::SetConfigurationValues(int32_t aWidth, + int32_t aHeight, + int32_t aDisplayWidth, + int32_t aDisplayHeight, + vpx_codec_enc_cfg_t& config) { + mFrameWidth = aWidth; + mFrameHeight = aHeight; + mDisplayWidth = aDisplayWidth; + mDisplayHeight = aDisplayHeight; + + // Encoder configuration structure. + memset(&config, 0, sizeof(vpx_codec_enc_cfg_t)); + if (vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &config, 0)) { + VP8LOG(LogLevel::Error, "Failed to get default configuration"); + return NS_ERROR_FAILURE; + } + + config.g_w = mFrameWidth; + config.g_h = mFrameHeight; + // TODO: Maybe we should have various aFrameRate bitrate pair for each + // devices? or for different platform + + // rc_target_bitrate needs kbit/s + config.rc_target_bitrate = + (mVideoBitrate != 0 ? mVideoBitrate : DEFAULT_BITRATE_BPS) / 1000; + + // Setting the time base of the codec + config.g_timebase.num = 1; + config.g_timebase.den = mTrackRate; + + config.g_error_resilient = 0; + + config.g_lag_in_frames = 0; // 0- no frame lagging + + int32_t number_of_cores = PR_GetNumberOfProcessors(); + if (mFrameWidth * mFrameHeight > 1280 * 960 && number_of_cores >= 6) { + config.g_threads = 3; // 3 threads for 1080p. + } else if (mFrameWidth * mFrameHeight > 640 * 480 && number_of_cores >= 3) { + config.g_threads = 2; // 2 threads for qHD/HD. + } else { + config.g_threads = 1; // 1 thread for VGA or less + } + + // rate control settings + config.rc_dropframe_thresh = 0; + config.rc_end_usage = VPX_VBR; + config.g_pass = VPX_RC_ONE_PASS; + // ffmpeg doesn't currently support streams that use resize. + // Therefore, for safety, we should turn it off until it does. + config.rc_resize_allowed = 0; + config.rc_undershoot_pct = 100; + config.rc_overshoot_pct = 15; + config.rc_buf_initial_sz = 500; + config.rc_buf_optimal_sz = 600; + config.rc_buf_sz = 1000; + + // we set key frame interval to automatic and later manually + // force key frame by setting VPX_EFLAG_FORCE_KF when mKeyFrameInterval > 0 + config.kf_mode = VPX_KF_AUTO; + config.kf_max_dist = MAX_KEYFRAME_INTERVAL; + + return NS_OK; +} + +already_AddRefed<TrackMetadataBase> VP8TrackEncoder::GetMetadata() { + AUTO_PROFILER_LABEL("VP8TrackEncoder::GetMetadata", OTHER); + + MOZ_ASSERT(mInitialized || mCanceled); + + if (mCanceled || mEncodingComplete) { + return nullptr; + } + + if (!mInitialized) { + return nullptr; + } + + RefPtr<VP8Metadata> meta = new VP8Metadata(); + meta->mWidth = mFrameWidth; + meta->mHeight = mFrameHeight; + meta->mDisplayWidth = mDisplayWidth; + meta->mDisplayHeight = mDisplayHeight; + + VP8LOG(LogLevel::Info, + "GetMetadata() width=%d, height=%d, " + "displayWidht=%d, displayHeight=%d", + meta->mWidth, meta->mHeight, meta->mDisplayWidth, + meta->mDisplayHeight); + + return meta.forget(); +} + +nsresult VP8TrackEncoder::GetEncodedPartitions( + nsTArray<RefPtr<EncodedFrame>>& aData) { + vpx_codec_iter_t iter = nullptr; + EncodedFrame::FrameType frameType = EncodedFrame::VP8_P_FRAME; + auto frameData = MakeRefPtr<EncodedFrame::FrameData>(); + const vpx_codec_cx_pkt_t* pkt = nullptr; + while ((pkt = vpx_codec_get_cx_data(mVPXContext.get(), &iter)) != nullptr) { + switch (pkt->kind) { + case VPX_CODEC_CX_FRAME_PKT: { + // Copy the encoded data from libvpx to frameData + frameData->AppendElements((uint8_t*)pkt->data.frame.buf, + pkt->data.frame.sz); + break; + } + default: { + break; + } + } + // End of frame + if ((pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT) == 0) { + if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { + frameType = EncodedFrame::VP8_I_FRAME; + } + break; + } + } + + if (!frameData->IsEmpty()) { + // Convert the timestamp and duration to Usecs. + media::TimeUnit timestamp = + FramesToTimeUnit(pkt->data.frame.pts, mTrackRate); + if (!timestamp.IsValid()) { + NS_ERROR("Microsecond timestamp overflow"); + return NS_ERROR_DOM_MEDIA_OVERFLOW_ERR; + } + + mExtractedDuration += pkt->data.frame.duration; + if (!mExtractedDuration.isValid()) { + NS_ERROR("Duration overflow"); + return NS_ERROR_DOM_MEDIA_OVERFLOW_ERR; + } + + media::TimeUnit totalDuration = + FramesToTimeUnit(mExtractedDuration.value(), mTrackRate); + if (!totalDuration.IsValid()) { + NS_ERROR("Duration overflow"); + return NS_ERROR_DOM_MEDIA_OVERFLOW_ERR; + } + + media::TimeUnit duration = totalDuration - mExtractedDurationUs; + if (!duration.IsValid()) { + NS_ERROR("Duration overflow"); + return NS_ERROR_DOM_MEDIA_OVERFLOW_ERR; + } + + mExtractedDurationUs = totalDuration; + + VP8LOG(LogLevel::Verbose, + "GetEncodedPartitions TimeStamp %.2f, Duration %.2f, FrameType %d", + timestamp.ToSeconds(), duration.ToSeconds(), frameType); + + // Copy the encoded data to aData. + aData.AppendElement(MakeRefPtr<EncodedFrame>( + timestamp, duration.ToMicroseconds(), PR_USEC_PER_SEC, frameType, + std::move(frameData))); + } + + return pkt ? NS_OK : NS_ERROR_NOT_AVAILABLE; +} + +template <int N> +static int Aligned(int aValue) { + if (aValue < N) { + return N; + } + + // The `- 1` avoids overreaching when `aValue % N == 0`. + return (((aValue - 1) / N) + 1) * N; +} + +nsresult VP8TrackEncoder::PrepareRawFrame(VideoChunk& aChunk) { + RefPtr<Image> img; + if (aChunk.mFrame.GetForceBlack() || aChunk.IsNull()) { + if (!mMuteFrame) { + mMuteFrame = + VideoFrame::CreateBlackImage(gfx::IntSize(mFrameWidth, mFrameHeight)); + } + if (!mMuteFrame) { + VP8LOG(LogLevel::Warning, "Failed to allocate black image of size %dx%d", + mFrameWidth, mFrameHeight); + return NS_OK; + } + img = mMuteFrame; + } else { + img = aChunk.mFrame.GetImage(); + } + + if (img->GetSize() != IntSize(mFrameWidth, mFrameHeight)) { + VP8LOG(LogLevel::Info, "Dynamic resolution change (was %dx%d, now %dx%d).", + mFrameWidth, mFrameHeight, img->GetSize().width, + img->GetSize().height); + + gfx::IntSize intrinsicSize = aChunk.mFrame.GetIntrinsicSize(); + gfx::IntSize imgSize = aChunk.mFrame.GetImage()->GetSize(); + if (imgSize <= IntSize(mFrameWidth, + mFrameHeight) && // check buffer size instead + // If the new size is less than or + // equal to old, the existing + // encoder instance can continue. + NS_SUCCEEDED(Reconfigure(imgSize.width, imgSize.height, + intrinsicSize.width, intrinsicSize.height))) { + VP8LOG(LogLevel::Info, "Reconfigured VP8 encoder."); + } else { + // New frame size is larger; re-create the encoder. + Destroy(); + nsresult rv = Init(imgSize.width, imgSize.height, intrinsicSize.width, + intrinsicSize.height); + VP8LOG(LogLevel::Info, "Recreated VP8 encoder."); + NS_ENSURE_SUCCESS(rv, rv); + } + } + + // Clear image state from last frame + mVPXImageWrapper->planes[VPX_PLANE_Y] = nullptr; + mVPXImageWrapper->stride[VPX_PLANE_Y] = 0; + mVPXImageWrapper->planes[VPX_PLANE_U] = nullptr; + mVPXImageWrapper->stride[VPX_PLANE_U] = 0; + mVPXImageWrapper->planes[VPX_PLANE_V] = nullptr; + mVPXImageWrapper->stride[VPX_PLANE_V] = 0; + + int yStride = Aligned<16>(mFrameWidth); + int yHeight = mFrameHeight; + size_t yPlaneSize = yStride * yHeight; + + int uvStride = Aligned<16>((mFrameWidth + 1) / 2); + int uvHeight = (mFrameHeight + 1) / 2; + size_t uvPlaneSize = uvStride * uvHeight; + + size_t neededSize = yPlaneSize + uvPlaneSize * 2; + + if (neededSize > mI420FrameSize) { + mI420Frame.reset(new (fallible) uint8_t[neededSize]); + } + + if (!mI420Frame) { + VP8LOG(LogLevel::Warning, "Allocating I420 frame of size %zu failed", + neededSize); + return NS_ERROR_FAILURE; + } + mI420FrameSize = neededSize; + + uint8_t* yChannel = &mI420Frame[0]; + uint8_t* uChannel = &mI420Frame[yPlaneSize]; + uint8_t* vChannel = &mI420Frame[yPlaneSize + uvPlaneSize]; + + nsresult rv = ConvertToI420(img, yChannel, yStride, uChannel, uvStride, + vChannel, uvStride); + + if (NS_FAILED(rv)) { + VP8LOG(LogLevel::Error, "Converting to I420 failed"); + return rv; + } + + mVPXImageWrapper->planes[VPX_PLANE_Y] = yChannel; + mVPXImageWrapper->stride[VPX_PLANE_Y] = yStride; + mVPXImageWrapper->planes[VPX_PLANE_U] = uChannel; + mVPXImageWrapper->stride[VPX_PLANE_U] = uvStride; + mVPXImageWrapper->planes[VPX_PLANE_V] = vChannel; + mVPXImageWrapper->stride[VPX_PLANE_V] = uvStride; + + return NS_OK; +} + +// These two define value used in GetNextEncodeOperation to determine the +// EncodeOperation for next target frame. +#define I_FRAME_RATIO (0.5) +#define SKIP_FRAME_RATIO (0.75) + +/** + * Compares the elapsed time from the beginning of GetEncodedTrack and + * the processed frame duration in mSourceSegment + * in order to set the nextEncodeOperation for next target frame. + */ +VP8TrackEncoder::EncodeOperation VP8TrackEncoder::GetNextEncodeOperation( + TimeDuration aTimeElapsed, TrackTime aProcessedDuration) { + if (mFrameDroppingMode == FrameDroppingMode::DISALLOW) { + return ENCODE_NORMAL_FRAME; + } + + int64_t durationInUsec = + FramesToUsecs(aProcessedDuration, mTrackRate).value(); + if (aTimeElapsed.ToMicroseconds() > (durationInUsec * SKIP_FRAME_RATIO)) { + // The encoder is too slow. + // We should skip next frame to consume the mSourceSegment. + return SKIP_FRAME; + } else if (aTimeElapsed.ToMicroseconds() > (durationInUsec * I_FRAME_RATIO)) { + // The encoder is a little slow. + // We force the encoder to encode an I-frame to accelerate. + return ENCODE_I_FRAME; + } else { + return ENCODE_NORMAL_FRAME; + } +} + +/** + * Encoding flow in GetEncodedTrack(): + * 1: Check the mInitialized state and the packet duration. + * 2: Move the data from mRawSegment to mSourceSegment. + * 3: Encode the video chunks in mSourceSegment in a for-loop. + * 3.1: The duration is taken straight from the video chunk's duration. + * 3.2: Setup the video chunk with mVPXImageWrapper by PrepareRawFrame(). + * 3.3: Pass frame to vp8 encoder by vpx_codec_encode(). + * 3.4: Get the encoded frame from encoder by GetEncodedPartitions(). + * 3.5: Set the nextEncodeOperation for the next target frame. + * There is a heuristic: If the frame duration we have processed in + * mSourceSegment is 100ms, means that we can't spend more than 100ms to + * encode it. + * 4. Remove the encoded chunks in mSourceSegment after for-loop. + */ +nsresult VP8TrackEncoder::GetEncodedTrack( + nsTArray<RefPtr<EncodedFrame>>& aData) { + AUTO_PROFILER_LABEL("VP8TrackEncoder::GetEncodedTrack", OTHER); + + MOZ_ASSERT(mInitialized || mCanceled); + + if (mCanceled || mEncodingComplete) { + return NS_ERROR_FAILURE; + } + + if (!mInitialized) { + return NS_ERROR_FAILURE; + } + + TakeTrackData(mSourceSegment); + + TrackTime totalProcessedDuration = 0; + TimeStamp timebase = TimeStamp::Now(); + EncodeOperation nextEncodeOperation = ENCODE_NORMAL_FRAME; + + for (VideoSegment::ChunkIterator iter(mSourceSegment); !iter.IsEnded(); + iter.Next()) { + VideoChunk& chunk = *iter; + VP8LOG(LogLevel::Verbose, + "nextEncodeOperation is %d for frame of duration %" PRId64, + nextEncodeOperation, chunk.GetDuration()); + + // Encode frame. + if (nextEncodeOperation != SKIP_FRAME) { + nsresult rv = PrepareRawFrame(chunk); + NS_ENSURE_SUCCESS(rv, NS_ERROR_FAILURE); + + // Encode the data with VP8 encoder + int flags = 0; + if (nextEncodeOperation == ENCODE_I_FRAME) { + VP8LOG(LogLevel::Warning, + "MediaRecorder lagging behind. Encoding keyframe."); + flags |= VPX_EFLAG_FORCE_KF; + } + + // Sum duration of non-key frames and force keyframe if exceeded the given + // keyframe interval + if (mKeyFrameInterval > 0) { + if ((mDurationSinceLastKeyframe * 1000 / mTrackRate) >= + mKeyFrameInterval) { + mDurationSinceLastKeyframe = 0; + flags |= VPX_EFLAG_FORCE_KF; + } + mDurationSinceLastKeyframe += chunk.GetDuration(); + } + + if (vpx_codec_encode( + mVPXContext.get(), mVPXImageWrapper.get(), mEncodedTimestamp, + (unsigned long)chunk.GetDuration(), flags, VPX_DL_REALTIME)) { + VP8LOG(LogLevel::Error, "vpx_codec_encode failed to encode the frame."); + return NS_ERROR_FAILURE; + } + // Get the encoded data from VP8 encoder. + rv = GetEncodedPartitions(aData); + if (rv != NS_OK && rv != NS_ERROR_NOT_AVAILABLE) { + VP8LOG(LogLevel::Error, "GetEncodedPartitions failed."); + return NS_ERROR_FAILURE; + } + } else { + // SKIP_FRAME + // Extend the duration of the last encoded data in aData + // because this frame will be skipped. + VP8LOG(LogLevel::Warning, + "MediaRecorder lagging behind. Skipping a frame."); + + mExtractedDuration += chunk.mDuration; + if (!mExtractedDuration.isValid()) { + NS_ERROR("skipped duration overflow"); + return NS_ERROR_DOM_MEDIA_OVERFLOW_ERR; + } + + media::TimeUnit totalDuration = + FramesToTimeUnit(mExtractedDuration.value(), mTrackRate); + media::TimeUnit skippedDuration = totalDuration - mExtractedDurationUs; + mExtractedDurationUs = totalDuration; + if (!skippedDuration.IsValid()) { + NS_ERROR("skipped duration overflow"); + return NS_ERROR_DOM_MEDIA_OVERFLOW_ERR; + } + + { + auto& last = aData.LastElement(); + MOZ_DIAGNOSTIC_ASSERT(aData.LastElement()); + uint64_t longerDuration = + last->mDuration + skippedDuration.ToMicroseconds(); + auto longerFrame = MakeRefPtr<EncodedFrame>( + last->mTime, longerDuration, last->mDurationBase, last->mFrameType, + last->mFrameData); + std::swap(last, longerFrame); + MOZ_ASSERT(last->mDuration == longerDuration); + } + } + + // Move forward the mEncodedTimestamp. + mEncodedTimestamp += chunk.GetDuration(); + totalProcessedDuration += chunk.GetDuration(); + + // Check what to do next. + TimeDuration elapsedTime = TimeStamp::Now() - timebase; + nextEncodeOperation = + GetNextEncodeOperation(elapsedTime, totalProcessedDuration); + } + + // Remove the chunks we have processed. + mSourceSegment.Clear(); + + // End of stream, pull the rest frames in encoder. + if (mEndOfStream) { + VP8LOG(LogLevel::Debug, "mEndOfStream is true"); + mEncodingComplete = true; + // Bug 1243611, keep calling vpx_codec_encode and vpx_codec_get_cx_data + // until vpx_codec_get_cx_data return null. + while (true) { + if (vpx_codec_encode(mVPXContext.get(), nullptr, mEncodedTimestamp, 0, 0, + VPX_DL_REALTIME)) { + return NS_ERROR_FAILURE; + } + nsresult rv = GetEncodedPartitions(aData); + if (rv == NS_ERROR_NOT_AVAILABLE) { + // End-of-stream + break; + } + if (rv != NS_OK) { + // Error + return NS_ERROR_FAILURE; + } + } + } + + return NS_OK; +} + +} // namespace mozilla + +#undef VP8LOG diff --git a/dom/media/encoder/VP8TrackEncoder.h b/dom/media/encoder/VP8TrackEncoder.h new file mode 100644 index 0000000000..c69f437afd --- /dev/null +++ b/dom/media/encoder/VP8TrackEncoder.h @@ -0,0 +1,108 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef VP8TrackEncoder_h_ +#define VP8TrackEncoder_h_ + +#include "TrackEncoder.h" + +#include "TimeUnits.h" +#include "vpx/vpx_codec.h" + +namespace mozilla { + +typedef struct vpx_codec_ctx vpx_codec_ctx_t; +typedef struct vpx_codec_enc_cfg vpx_codec_enc_cfg_t; +typedef struct vpx_image vpx_image_t; + +/** + * VP8TrackEncoder implements VideoTrackEncoder by using the libvpx library. + * We implement a realtime and variable frame rate encoder. In order to achieve + * that, there is a frame-drop encoding policy implemented in GetEncodedTrack. + */ +class VP8TrackEncoder : public VideoTrackEncoder { + enum EncodeOperation { + ENCODE_NORMAL_FRAME, // VP8 track encoder works normally. + ENCODE_I_FRAME, // The next frame will be encoded as I-Frame. + SKIP_FRAME, // Skip the next frame. + }; + + public: + VP8TrackEncoder(RefPtr<DriftCompensator> aDriftCompensator, + TrackRate aTrackRate, FrameDroppingMode aFrameDroppingMode); + virtual ~VP8TrackEncoder(); + + already_AddRefed<TrackMetadataBase> GetMetadata() final; + + nsresult GetEncodedTrack(nsTArray<RefPtr<EncodedFrame>>& aData) final; + + protected: + nsresult Init(int32_t aWidth, int32_t aHeight, int32_t aDisplayWidth, + int32_t aDisplayHeight) final; + + private: + // Get the EncodeOperation for next target frame. + EncodeOperation GetNextEncodeOperation(TimeDuration aTimeElapsed, + TrackTime aProcessedDuration); + + // Get the encoded data from encoder to aData. + // Return value: NS_ERROR_NOT_AVAILABABLE if the vpx_codec_get_cx_data returns + // null for EOS detection. + // NS_OK if some data was appended to aData. + // An error nsresult otherwise. + nsresult GetEncodedPartitions(nsTArray<RefPtr<EncodedFrame>>& aData); + + // Prepare the input data to the mVPXImageWrapper for encoding. + nsresult PrepareRawFrame(VideoChunk& aChunk); + + // Re-configures an existing encoder with a new frame size. + nsresult Reconfigure(int32_t aWidth, int32_t aHeight, int32_t aDisplayWidth, + int32_t aDisplayHeight); + + // Destroys the context and image wrapper. Does not de-allocate the structs. + void Destroy(); + + // Helper method to set the values on a VPX configuration. + nsresult SetConfigurationValues(int32_t aWidth, int32_t aHeight, + int32_t aDisplayWidth, int32_t aDisplayHeight, + vpx_codec_enc_cfg_t& config); + + // Encoded timestamp. + TrackTime mEncodedTimestamp = 0; + + // Total duration in mTrackRate extracted by GetEncodedPartitions(). + CheckedInt64 mExtractedDuration; + + // Total duration extracted by GetEncodedPartitions(). + media::TimeUnit mExtractedDurationUs; + + // Muted frame, we only create it once. + RefPtr<layers::Image> mMuteFrame; + + // I420 frame, for converting to I420. + UniquePtr<uint8_t[]> mI420Frame; + size_t mI420FrameSize = 0; + + /** + * A duration of non-key frames in milliseconds. + */ + TrackTime mDurationSinceLastKeyframe = 0; + + /** + * A local segment queue which takes the raw data out from mRawSegment in the + * call of GetEncodedTrack(). + */ + VideoSegment mSourceSegment; + + // VP8 relative members. + // Codec context structure. + UniquePtr<vpx_codec_ctx_t> mVPXContext; + // Image Descriptor. + UniquePtr<vpx_image_t> mVPXImageWrapper; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/encoder/moz.build b/dom/media/encoder/moz.build new file mode 100644 index 0000000000..c4a8cb6155 --- /dev/null +++ b/dom/media/encoder/moz.build @@ -0,0 +1,50 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +with Files("*"): + BUG_COMPONENT = ("Core", "Audio/Video: Recording") + +EXPORTS += [ + "ContainerWriter.h", + "EncodedFrame.h", + "MediaEncoder.h", + "OpusTrackEncoder.h", + "TrackEncoder.h", + "TrackMetadataBase.h", +] + +UNIFIED_SOURCES += [ + "MediaEncoder.cpp", + "Muxer.cpp", + "OpusTrackEncoder.cpp", + "TrackEncoder.cpp", +] + +if CONFIG["MOZ_WEBM_ENCODER"]: + EXPORTS += [ + "VP8TrackEncoder.h", + ] + UNIFIED_SOURCES += [ + "VP8TrackEncoder.cpp", + ] + LOCAL_INCLUDES += ["/media/libyuv/libyuv/include"] + +DEFINES["TRACING"] = True + +FINAL_LIBRARY = "xul" + +LOCAL_INCLUDES += [ + "/dom/media", + "/ipc/chromium/src", +] + +include("/ipc/chromium/chromium-config.mozbuild") + +# Suppress some GCC warnings being treated as errors: +# - about attributes on forward declarations for types that are already +# defined, which complains about an important MOZ_EXPORT for android::AString +if CONFIG["CC_TYPE"] in ("clang", "gcc"): + CXXFLAGS += ["-Wno-error=attributes"] |