diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /dom/media/encoder | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'dom/media/encoder')
-rw-r--r-- | dom/media/encoder/ContainerWriter.h | 75 | ||||
-rw-r--r-- | dom/media/encoder/EncodedFrame.h | 64 | ||||
-rw-r--r-- | dom/media/encoder/MediaEncoder.cpp | 1142 | ||||
-rw-r--r-- | dom/media/encoder/MediaEncoder.h | 400 | ||||
-rw-r--r-- | dom/media/encoder/Muxer.cpp | 185 | ||||
-rw-r--r-- | dom/media/encoder/Muxer.h | 71 | ||||
-rw-r--r-- | dom/media/encoder/OpusTrackEncoder.cpp | 454 | ||||
-rw-r--r-- | dom/media/encoder/OpusTrackEncoder.h | 117 | ||||
-rw-r--r-- | dom/media/encoder/TrackEncoder.cpp | 822 | ||||
-rw-r--r-- | dom/media/encoder/TrackEncoder.h | 501 | ||||
-rw-r--r-- | dom/media/encoder/TrackMetadataBase.h | 76 | ||||
-rw-r--r-- | dom/media/encoder/VP8TrackEncoder.cpp | 720 | ||||
-rw-r--r-- | dom/media/encoder/VP8TrackEncoder.h | 167 | ||||
-rw-r--r-- | dom/media/encoder/moz.build | 42 |
14 files changed, 4836 insertions, 0 deletions
diff --git a/dom/media/encoder/ContainerWriter.h b/dom/media/encoder/ContainerWriter.h new file mode 100644 index 0000000000..724c8b90c9 --- /dev/null +++ b/dom/media/encoder/ContainerWriter.h @@ -0,0 +1,75 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ContainerWriter_h_ +#define ContainerWriter_h_ + +#include "nsTArray.h" +#include "EncodedFrame.h" +#include "TrackMetadataBase.h" + +namespace mozilla { +/** + * ContainerWriter packs encoded track data into a specific media container. + */ +class ContainerWriter { + public: + ContainerWriter() : mInitialized(false), mIsWritingComplete(false) {} + virtual ~ContainerWriter() {} + // Mapping to DOMMediaStream::TrackTypeHints + enum { + CREATE_AUDIO_TRACK = 1 << 0, + CREATE_VIDEO_TRACK = 1 << 1, + }; + enum { END_OF_STREAM = 1 << 0 }; + + /** + * Writes encoded track data from aData into the internal stream of container + * writer. aFlags is used to signal the impl of different conditions + * such as END_OF_STREAM. Each impl may handle different flags, and should be + * documented accordingly. Currently, WriteEncodedTrack doesn't support + * explicit track specification, though each impl may provide logic to + * allocate frames into different tracks. + */ + virtual nsresult WriteEncodedTrack( + const nsTArray<RefPtr<EncodedFrame>>& aData, uint32_t aFlags = 0) = 0; + + /** + * Stores the metadata for all given tracks to the muxer. + * + * This method checks the integrity of aMetadata. + * If the metadata isn't well formatted, this method returns NS_ERROR_FAILURE. + * If the metadata is well formatted, it stores the metadata and returns + * NS_OK. + */ + virtual nsresult SetMetadata( + const nsTArray<RefPtr<TrackMetadataBase>>& aMetadata) = 0; + + /** + * Indicate if the writer has finished to output data + */ + virtual bool IsWritingComplete() { return mIsWritingComplete; } + + enum { FLUSH_NEEDED = 1 << 0, GET_HEADER = 1 << 1 }; + + /** + * Copies the final container data to a buffer if it has accumulated enough + * packets from WriteEncodedTrack. This buffer of data is appended to + * aOutputBufs, and existing elements of aOutputBufs should not be modified. + * aFlags is true with FLUSH_NEEDED will force OggWriter to flush an ogg page + * even it is not full, and copy these container data to a buffer for + * aOutputBufs to append. + */ + virtual nsresult GetContainerData(nsTArray<nsTArray<uint8_t>>* aOutputBufs, + uint32_t aFlags = 0) = 0; + + protected: + bool mInitialized; + bool mIsWritingComplete; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/encoder/EncodedFrame.h b/dom/media/encoder/EncodedFrame.h new file mode 100644 index 0000000000..e76babef89 --- /dev/null +++ b/dom/media/encoder/EncodedFrame.h @@ -0,0 +1,64 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef EncodedFrame_h_ +#define EncodedFrame_h_ + +#include "nsISupportsImpl.h" +#include "mozilla/media/MediaUtils.h" +#include "TimeUnits.h" +#include "VideoUtils.h" + +namespace mozilla { + +// Represent an encoded frame emitted by an encoder +class EncodedFrame final { + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(EncodedFrame) + public: + enum FrameType { + VP8_I_FRAME, // VP8 intraframe + VP8_P_FRAME, // VP8 predicted frame + OPUS_AUDIO_FRAME, // Opus audio frame + UNKNOWN // FrameType not set + }; + using ConstFrameData = const media::Refcountable<nsTArray<uint8_t>>; + using FrameData = media::Refcountable<nsTArray<uint8_t>>; + EncodedFrame(const media::TimeUnit& aTime, uint64_t aDuration, + uint64_t aDurationBase, FrameType aFrameType, + RefPtr<ConstFrameData> aData) + : mTime(aTime), + mDuration(aDuration), + mDurationBase(aDurationBase), + mFrameType(aFrameType), + mFrameData(std::move(aData)) { + MOZ_ASSERT(mFrameData); + MOZ_ASSERT_IF(mFrameType == VP8_I_FRAME, mDurationBase == PR_USEC_PER_SEC); + MOZ_ASSERT_IF(mFrameType == VP8_P_FRAME, mDurationBase == PR_USEC_PER_SEC); + MOZ_ASSERT_IF(mFrameType == OPUS_AUDIO_FRAME, mDurationBase == 48000); + } + // Timestamp in microseconds + const media::TimeUnit mTime; + // The playback duration of this packet in mDurationBase. + const uint64_t mDuration; + // The time base of mDuration. + const uint64_t mDurationBase; + // Represent what is in the FrameData + const FrameType mFrameType; + // Encoded data + const RefPtr<ConstFrameData> mFrameData; + + // The end time of the frame in microseconds. + media::TimeUnit GetEndTime() const { + return mTime + media::TimeUnit(mDuration, mDurationBase); + } + + private: + // Private destructor, to discourage deletion outside of Release(): + ~EncodedFrame() = default; +}; + +} // namespace mozilla + +#endif // EncodedFrame_h_ diff --git a/dom/media/encoder/MediaEncoder.cpp b/dom/media/encoder/MediaEncoder.cpp new file mode 100644 index 0000000000..4eca742c77 --- /dev/null +++ b/dom/media/encoder/MediaEncoder.cpp @@ -0,0 +1,1142 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "MediaEncoder.h" + +#include <algorithm> +#include "AudioNodeEngine.h" +#include "AudioNodeTrack.h" +#include "DriftCompensation.h" +#include "MediaDecoder.h" +#include "MediaTrackGraphImpl.h" +#include "MediaTrackListener.h" +#include "mozilla/dom/AudioNode.h" +#include "mozilla/dom/AudioStreamTrack.h" +#include "mozilla/dom/Blob.h" +#include "mozilla/dom/BlobImpl.h" +#include "mozilla/dom/MediaStreamTrack.h" +#include "mozilla/dom/MutableBlobStorage.h" +#include "mozilla/dom/VideoStreamTrack.h" +#include "mozilla/gfx/Point.h" // IntSize +#include "mozilla/Logging.h" +#include "mozilla/Preferences.h" +#include "mozilla/ProfilerLabels.h" +#include "mozilla/StaticPrefs_media.h" +#include "mozilla/StaticPtr.h" +#include "mozilla/TaskQueue.h" +#include "mozilla/Unused.h" +#include "Muxer.h" +#include "nsMimeTypes.h" +#include "nsThreadUtils.h" +#include "OggWriter.h" +#include "OpusTrackEncoder.h" +#include "TimeUnits.h" +#include "Tracing.h" + +#include "VP8TrackEncoder.h" +#include "WebMWriter.h" + +mozilla::LazyLogModule gMediaEncoderLog("MediaEncoder"); +#define LOG(type, msg) MOZ_LOG(gMediaEncoderLog, type, msg) + +namespace mozilla { + +using namespace dom; +using namespace media; + +namespace { +class BlobStorer : public MutableBlobStorageCallback { + MozPromiseHolder<MediaEncoder::BlobPromise> mHolder; + + virtual ~BlobStorer() = default; + + public: + BlobStorer() = default; + + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(BlobStorer, override) + + void BlobStoreCompleted(MutableBlobStorage*, BlobImpl* aBlobImpl, + nsresult aRv) override { + MOZ_ASSERT(NS_IsMainThread()); + if (NS_FAILED(aRv)) { + mHolder.Reject(aRv, __func__); + return; + } + + mHolder.Resolve(aBlobImpl, __func__); + } + + RefPtr<MediaEncoder::BlobPromise> Promise() { + return mHolder.Ensure(__func__); + } +}; +} // namespace + +class MediaEncoder::AudioTrackListener : public DirectMediaTrackListener { + public: + AudioTrackListener(RefPtr<DriftCompensator> aDriftCompensator, + RefPtr<MediaEncoder> aMediaEncoder) + : mDirectConnected(false), + mInitialized(false), + mRemoved(false), + mDriftCompensator(std::move(aDriftCompensator)), + mMediaEncoder(std::move(aMediaEncoder)), + mEncoderThread(mMediaEncoder->mEncoderThread), + mShutdownPromise(mShutdownHolder.Ensure(__func__)) { + MOZ_ASSERT(mMediaEncoder); + MOZ_ASSERT(mMediaEncoder->mAudioEncoder); + MOZ_ASSERT(mEncoderThread); + } + + void NotifyDirectListenerInstalled(InstallationResult aResult) override { + if (aResult == InstallationResult::SUCCESS) { + LOG(LogLevel::Info, ("Audio track direct listener installed")); + mDirectConnected = true; + } else { + LOG(LogLevel::Info, ("Audio track failed to install direct listener")); + MOZ_ASSERT(!mDirectConnected); + } + } + + void NotifyDirectListenerUninstalled() override { + mDirectConnected = false; + + if (mRemoved) { + mMediaEncoder = nullptr; + mEncoderThread = nullptr; + } + } + + void NotifyQueuedChanges(MediaTrackGraph* aGraph, TrackTime aTrackOffset, + const MediaSegment& aQueuedMedia) override { + TRACE_COMMENT("MediaEncoder::NotifyQueuedChanges", "%p", + mMediaEncoder.get()); + MOZ_ASSERT(mMediaEncoder); + MOZ_ASSERT(mEncoderThread); + + if (!mInitialized) { + mDriftCompensator->NotifyAudioStart(TimeStamp::Now()); + mInitialized = true; + } + + mDriftCompensator->NotifyAudio(aQueuedMedia.GetDuration()); + + const AudioSegment& audio = static_cast<const AudioSegment&>(aQueuedMedia); + + AudioSegment copy; + copy.AppendSlice(audio, 0, audio.GetDuration()); + + nsresult rv = mEncoderThread->Dispatch(NS_NewRunnableFunction( + "mozilla::AudioTrackEncoder::AppendAudioSegment", + [encoder = mMediaEncoder, copy = std::move(copy)]() mutable { + encoder->mAudioEncoder->AppendAudioSegment(std::move(copy)); + })); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + } + + void NotifyEnded(MediaTrackGraph* aGraph) override { + MOZ_ASSERT(mMediaEncoder); + MOZ_ASSERT(mMediaEncoder->mAudioEncoder); + MOZ_ASSERT(mEncoderThread); + + nsresult rv = mEncoderThread->Dispatch( + NS_NewRunnableFunction("mozilla::AudioTrackEncoder::NotifyEndOfStream", + [encoder = mMediaEncoder] { + encoder->mAudioEncoder->NotifyEndOfStream(); + })); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + } + + void NotifyRemoved(MediaTrackGraph* aGraph) override { + nsresult rv = mEncoderThread->Dispatch( + NS_NewRunnableFunction("mozilla::AudioTrackEncoder::NotifyEndOfStream", + [encoder = mMediaEncoder] { + encoder->mAudioEncoder->NotifyEndOfStream(); + })); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + + mRemoved = true; + + if (!mDirectConnected) { + mMediaEncoder = nullptr; + mEncoderThread = nullptr; + } + + mShutdownHolder.Resolve(true, __func__); + } + + const RefPtr<GenericNonExclusivePromise>& OnShutdown() const { + return mShutdownPromise; + } + + private: + bool mDirectConnected; + bool mInitialized; + bool mRemoved; + const RefPtr<DriftCompensator> mDriftCompensator; + RefPtr<MediaEncoder> mMediaEncoder; + RefPtr<TaskQueue> mEncoderThread; + MozPromiseHolder<GenericNonExclusivePromise> mShutdownHolder; + const RefPtr<GenericNonExclusivePromise> mShutdownPromise; +}; + +class MediaEncoder::VideoTrackListener : public DirectMediaTrackListener { + public: + explicit VideoTrackListener(RefPtr<MediaEncoder> aMediaEncoder) + : mDirectConnected(false), + mInitialized(false), + mRemoved(false), + mPendingAdvanceCurrentTime(false), + mMediaEncoder(std::move(aMediaEncoder)), + mEncoderThread(mMediaEncoder->mEncoderThread), + mShutdownPromise(mShutdownHolder.Ensure(__func__)) { + MOZ_ASSERT(mMediaEncoder); + MOZ_ASSERT(mEncoderThread); + } + + void NotifyDirectListenerInstalled(InstallationResult aResult) override { + if (aResult == InstallationResult::SUCCESS) { + LOG(LogLevel::Info, ("Video track direct listener installed")); + mDirectConnected = true; + } else { + LOG(LogLevel::Info, ("Video track failed to install direct listener")); + MOZ_ASSERT(!mDirectConnected); + return; + } + } + + void NotifyDirectListenerUninstalled() override { + mDirectConnected = false; + + if (mRemoved) { + mMediaEncoder = nullptr; + mEncoderThread = nullptr; + } + } + + void NotifyQueuedChanges(MediaTrackGraph* aGraph, TrackTime aTrackOffset, + const MediaSegment& aQueuedMedia) override { + TRACE_COMMENT("MediaEncoder::NotifyQueuedChanges", "%p", + mMediaEncoder.get()); + MOZ_ASSERT(mMediaEncoder); + MOZ_ASSERT(mMediaEncoder->mVideoEncoder); + MOZ_ASSERT(mEncoderThread); + + mCurrentTime = TimeStamp::Now(); + if (!mInitialized) { + nsresult rv = mEncoderThread->Dispatch( + NS_NewRunnableFunction("mozilla::VideoTrackEncoder::SetStartOffset", + [encoder = mMediaEncoder, now = mCurrentTime] { + encoder->mVideoEncoder->SetStartOffset(now); + })); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + mInitialized = true; + } + + if (!mPendingAdvanceCurrentTime) { + mPendingAdvanceCurrentTime = true; + nsresult rv = mEncoderThread->Dispatch(NS_NewRunnableFunction( + "mozilla::VideoTrackEncoder::AdvanceCurrentTime", + [encoder = mMediaEncoder, now = mCurrentTime] { + encoder->mVideoListener->mPendingAdvanceCurrentTime = false; + encoder->mVideoEncoder->AdvanceCurrentTime(now); + })); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + } + } + + void NotifyRealtimeTrackData(MediaTrackGraph* aGraph, TrackTime aTrackOffset, + const MediaSegment& aMedia) override { + TRACE_COMMENT("MediaEncoder::NotifyRealtimeTrackData", "%p", + mMediaEncoder.get()); + MOZ_ASSERT(mMediaEncoder); + MOZ_ASSERT(mMediaEncoder->mVideoEncoder); + MOZ_ASSERT(mEncoderThread); + MOZ_ASSERT(aMedia.GetType() == MediaSegment::VIDEO); + + const VideoSegment& video = static_cast<const VideoSegment&>(aMedia); + VideoSegment copy; + for (VideoSegment::ConstChunkIterator iter(video); !iter.IsEnded(); + iter.Next()) { + copy.AppendFrame(do_AddRef(iter->mFrame.GetImage()), + iter->mFrame.GetIntrinsicSize(), + iter->mFrame.GetPrincipalHandle(), + iter->mFrame.GetForceBlack(), iter->mTimeStamp); + } + + nsresult rv = mEncoderThread->Dispatch(NS_NewRunnableFunction( + "mozilla::VideoTrackEncoder::AppendVideoSegment", + [encoder = mMediaEncoder, copy = std::move(copy)]() mutable { + encoder->mVideoEncoder->AppendVideoSegment(std::move(copy)); + })); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + } + + void NotifyEnabledStateChanged(MediaTrackGraph* aGraph, + bool aEnabled) override { + MOZ_ASSERT(mMediaEncoder); + MOZ_ASSERT(mMediaEncoder->mVideoEncoder); + MOZ_ASSERT(mEncoderThread); + + nsresult rv; + if (aEnabled) { + rv = mEncoderThread->Dispatch(NS_NewRunnableFunction( + "mozilla::VideoTrackEncoder::Enable", + [encoder = mMediaEncoder, now = TimeStamp::Now()] { + encoder->mVideoEncoder->Enable(now); + })); + } else { + rv = mEncoderThread->Dispatch(NS_NewRunnableFunction( + "mozilla::VideoTrackEncoder::Disable", + [encoder = mMediaEncoder, now = TimeStamp::Now()] { + encoder->mVideoEncoder->Disable(now); + })); + } + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + } + + void NotifyEnded(MediaTrackGraph* aGraph) override { + MOZ_ASSERT(mMediaEncoder); + MOZ_ASSERT(mMediaEncoder->mVideoEncoder); + MOZ_ASSERT(mEncoderThread); + + nsresult rv = mEncoderThread->Dispatch(NS_NewRunnableFunction( + "mozilla::VideoTrackEncoder::NotifyEndOfStream", + [encoder = mMediaEncoder, now = mCurrentTime] { + if (!now.IsNull()) { + encoder->mVideoEncoder->AdvanceCurrentTime(now); + } + encoder->mVideoEncoder->NotifyEndOfStream(); + })); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + } + + void NotifyRemoved(MediaTrackGraph* aGraph) override { + nsresult rv = mEncoderThread->Dispatch(NS_NewRunnableFunction( + "mozilla::VideoTrackEncoder::NotifyEndOfStream", + [encoder = mMediaEncoder, now = mCurrentTime] { + if (!now.IsNull()) { + encoder->mVideoEncoder->AdvanceCurrentTime(now); + } + encoder->mVideoEncoder->NotifyEndOfStream(); + })); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + + mRemoved = true; + + if (!mDirectConnected) { + mMediaEncoder = nullptr; + mEncoderThread = nullptr; + } + + mShutdownHolder.Resolve(true, __func__); + } + + const RefPtr<GenericNonExclusivePromise>& OnShutdown() const { + return mShutdownPromise; + } + + private: + bool mDirectConnected; + bool mInitialized; + bool mRemoved; + TimeStamp mCurrentTime; + Atomic<bool> mPendingAdvanceCurrentTime; + RefPtr<MediaEncoder> mMediaEncoder; + RefPtr<TaskQueue> mEncoderThread; + MozPromiseHolder<GenericNonExclusivePromise> mShutdownHolder; + const RefPtr<GenericNonExclusivePromise> mShutdownPromise; +}; + +class MediaEncoder::EncoderListener : public TrackEncoderListener { + public: + EncoderListener(TaskQueue* aEncoderThread, MediaEncoder* aEncoder) + : mEncoderThread(aEncoderThread), mEncoder(aEncoder) {} + + void Forget() { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + mEncoder = nullptr; + } + + void Initialized(TrackEncoder* aTrackEncoder) override { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + MOZ_ASSERT(aTrackEncoder->IsInitialized()); + + if (!mEncoder) { + return; + } + + mEncoder->UpdateInitialized(); + } + + void Started(TrackEncoder* aTrackEncoder) override { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + MOZ_ASSERT(aTrackEncoder->IsStarted()); + + if (!mEncoder) { + return; + } + + mEncoder->UpdateStarted(); + } + + void Error(TrackEncoder* aTrackEncoder) override { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + + if (!mEncoder) { + return; + } + + mEncoder->SetError(); + } + + protected: + RefPtr<TaskQueue> mEncoderThread; + RefPtr<MediaEncoder> mEncoder; +}; + +MediaEncoder::MediaEncoder( + RefPtr<TaskQueue> aEncoderThread, + RefPtr<DriftCompensator> aDriftCompensator, + UniquePtr<ContainerWriter> aWriter, + UniquePtr<AudioTrackEncoder> aAudioEncoder, + UniquePtr<VideoTrackEncoder> aVideoEncoder, + UniquePtr<MediaQueue<EncodedFrame>> aEncodedAudioQueue, + UniquePtr<MediaQueue<EncodedFrame>> aEncodedVideoQueue, + TrackRate aTrackRate, const nsAString& aMimeType, uint64_t aMaxMemory, + TimeDuration aTimeslice) + : mMainThread(GetMainThreadSerialEventTarget()), + mEncoderThread(std::move(aEncoderThread)), + mEncodedAudioQueue(std::move(aEncodedAudioQueue)), + mEncodedVideoQueue(std::move(aEncodedVideoQueue)), + mMuxer(MakeUnique<Muxer>(std::move(aWriter), *mEncodedAudioQueue, + *mEncodedVideoQueue)), + mAudioEncoder(std::move(aAudioEncoder)), + mAudioListener(mAudioEncoder ? MakeAndAddRef<AudioTrackListener>( + std::move(aDriftCompensator), this) + : nullptr), + mVideoEncoder(std::move(aVideoEncoder)), + mVideoListener(mVideoEncoder ? MakeAndAddRef<VideoTrackListener>(this) + : nullptr), + mEncoderListener(MakeAndAddRef<EncoderListener>(mEncoderThread, this)), + mMimeType(aMimeType), + mMaxMemory(aMaxMemory), + mTimeslice(aTimeslice), + mStartTime(TimeStamp::Now()), + mInitialized(false), + mStarted(false), + mCompleted(false), + mError(false) { + if (mAudioEncoder) { + mAudioPushListener = mEncodedAudioQueue->PushEvent().Connect( + mEncoderThread, this, &MediaEncoder::OnEncodedAudioPushed); + mAudioFinishListener = mEncodedAudioQueue->FinishEvent().Connect( + mEncoderThread, this, &MediaEncoder::MaybeShutdown); + nsresult rv = mEncoderThread->Dispatch(NS_NewRunnableFunction( + "mozilla::AudioTrackEncoder::RegisterListener", + [self = RefPtr<MediaEncoder>(this), this] { + mAudioEncoder->RegisterListener(mEncoderListener); + })); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + } else { + mMuxedAudioEndTime = TimeUnit::FromInfinity(); + mEncodedAudioQueue->Finish(); + } + if (mVideoEncoder) { + mVideoPushListener = mEncodedVideoQueue->PushEvent().Connect( + mEncoderThread, this, &MediaEncoder::OnEncodedVideoPushed); + mVideoFinishListener = mEncodedVideoQueue->FinishEvent().Connect( + mEncoderThread, this, &MediaEncoder::MaybeShutdown); + nsresult rv = mEncoderThread->Dispatch(NS_NewRunnableFunction( + "mozilla::VideoTrackEncoder::RegisterListener", + [self = RefPtr<MediaEncoder>(this), this] { + mVideoEncoder->RegisterListener(mEncoderListener); + })); + MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv)); + Unused << rv; + } else { + mMuxedVideoEndTime = TimeUnit::FromInfinity(); + mEncodedVideoQueue->Finish(); + } +} + +MediaEncoder::~MediaEncoder() { + MOZ_ASSERT(!mAudioTrack); + MOZ_ASSERT(!mVideoTrack); + MOZ_ASSERT(!mAudioNode); + MOZ_ASSERT(!mInputPort); + MOZ_ASSERT(!mPipeTrack); +} + +void MediaEncoder::EnsureGraphTrackFrom(MediaTrack* aTrack) { + if (mGraphTrack) { + return; + } + MOZ_DIAGNOSTIC_ASSERT(!aTrack->IsDestroyed()); + mGraphTrack = MakeAndAddRef<SharedDummyTrack>( + aTrack->GraphImpl()->CreateSourceTrack(MediaSegment::VIDEO)); +} + +void MediaEncoder::RunOnGraph(already_AddRefed<Runnable> aRunnable) { + MOZ_ASSERT(mGraphTrack); + class Message : public ControlMessage { + public: + explicit Message(already_AddRefed<Runnable> aRunnable) + : ControlMessage(nullptr), mRunnable(aRunnable) {} + void Run() override { + TRACE("MediaEncoder::RunOnGraph"); + mRunnable->Run(); + } + const RefPtr<Runnable> mRunnable; + }; + mGraphTrack->mTrack->GraphImpl()->AppendMessage( + MakeUnique<Message>(std::move(aRunnable))); +} + +void MediaEncoder::Suspend() { + RunOnGraph(NS_NewRunnableFunction( + "MediaEncoder::Suspend (graph)", + [self = RefPtr<MediaEncoder>(this), this] { + if (NS_FAILED(mEncoderThread->Dispatch( + NS_NewRunnableFunction("MediaEncoder::Suspend (encoder)", + [self, this, now = TimeStamp::Now()] { + if (mAudioEncoder) { + mAudioEncoder->Suspend(); + } + if (mVideoEncoder) { + mVideoEncoder->Suspend(now); + } + })))) { + // RunOnGraph added an extra async step, and now `thread` has shut + // down. + return; + } + })); +} + +void MediaEncoder::Resume() { + RunOnGraph(NS_NewRunnableFunction( + "MediaEncoder::Resume (graph)", + [self = RefPtr<MediaEncoder>(this), this] { + if (NS_FAILED(mEncoderThread->Dispatch( + NS_NewRunnableFunction("MediaEncoder::Resume (encoder)", + [self, this, now = TimeStamp::Now()] { + if (mAudioEncoder) { + mAudioEncoder->Resume(); + } + if (mVideoEncoder) { + mVideoEncoder->Resume(now); + } + })))) { + // RunOnGraph added an extra async step, and now `thread` has shut + // down. + return; + } + })); +} + +void MediaEncoder::ConnectAudioNode(AudioNode* aNode, uint32_t aOutput) { + MOZ_ASSERT(NS_IsMainThread()); + + if (mAudioNode) { + MOZ_ASSERT(false, "Only one audio node supported"); + return; + } + + // Only AudioNodeTrack of kind EXTERNAL_OUTPUT stores output audio data in + // the track (see AudioNodeTrack::AdvanceOutputSegment()). That means + // forwarding input track in recorder session won't be able to copy data from + // the track of non-destination node. Create a pipe track in this case. + if (aNode->NumberOfOutputs() > 0) { + AudioContext* ctx = aNode->Context(); + AudioNodeEngine* engine = new AudioNodeEngine(nullptr); + AudioNodeTrack::Flags flags = AudioNodeTrack::EXTERNAL_OUTPUT | + AudioNodeTrack::NEED_MAIN_THREAD_ENDED; + mPipeTrack = AudioNodeTrack::Create(ctx, engine, flags, ctx->Graph()); + AudioNodeTrack* ns = aNode->GetTrack(); + if (ns) { + mInputPort = mPipeTrack->AllocateInputPort(aNode->GetTrack(), 0, aOutput); + } + } + + mAudioNode = aNode; + + if (mPipeTrack) { + mPipeTrack->AddListener(mAudioListener); + EnsureGraphTrackFrom(mPipeTrack); + } else { + mAudioNode->GetTrack()->AddListener(mAudioListener); + EnsureGraphTrackFrom(mAudioNode->GetTrack()); + } +} + +void MediaEncoder::ConnectMediaStreamTrack(MediaStreamTrack* aTrack) { + MOZ_ASSERT(NS_IsMainThread()); + + if (aTrack->Ended()) { + MOZ_ASSERT_UNREACHABLE("Cannot connect ended track"); + return; + } + + EnsureGraphTrackFrom(aTrack->GetTrack()); + + if (AudioStreamTrack* audio = aTrack->AsAudioStreamTrack()) { + if (!mAudioEncoder) { + // No audio encoder for this audio track. It could be disabled. + LOG(LogLevel::Warning, ("Cannot connect to audio track - no encoder")); + return; + } + + MOZ_ASSERT(!mAudioTrack, "Only one audio track supported."); + MOZ_ASSERT(mAudioListener, "No audio listener for this audio track"); + + LOG(LogLevel::Info, ("Connected to audio track %p", aTrack)); + + mAudioTrack = audio; + audio->AddListener(mAudioListener); + } else if (VideoStreamTrack* video = aTrack->AsVideoStreamTrack()) { + if (!mVideoEncoder) { + // No video encoder for this video track. It could be disabled. + LOG(LogLevel::Warning, ("Cannot connect to video track - no encoder")); + return; + } + + MOZ_ASSERT(!mVideoTrack, "Only one video track supported."); + MOZ_ASSERT(mVideoListener, "No video listener for this video track"); + + LOG(LogLevel::Info, ("Connected to video track %p", aTrack)); + + mVideoTrack = video; + video->AddDirectListener(mVideoListener); + video->AddListener(mVideoListener); + } else { + MOZ_ASSERT(false, "Unknown track type"); + } +} + +void MediaEncoder::RemoveMediaStreamTrack(MediaStreamTrack* aTrack) { + if (!aTrack) { + MOZ_ASSERT(false); + return; + } + + if (AudioStreamTrack* audio = aTrack->AsAudioStreamTrack()) { + if (audio != mAudioTrack) { + MOZ_ASSERT(false, "Not connected to this audio track"); + return; + } + + if (mAudioListener) { + audio->RemoveDirectListener(mAudioListener); + audio->RemoveListener(mAudioListener); + } + mAudioTrack = nullptr; + } else if (VideoStreamTrack* video = aTrack->AsVideoStreamTrack()) { + if (video != mVideoTrack) { + MOZ_ASSERT(false, "Not connected to this video track"); + return; + } + + if (mVideoListener) { + video->RemoveDirectListener(mVideoListener); + video->RemoveListener(mVideoListener); + } + mVideoTrack = nullptr; + } +} + +/* static */ +already_AddRefed<MediaEncoder> MediaEncoder::CreateEncoder( + RefPtr<TaskQueue> aEncoderThread, const nsAString& aMimeType, + uint32_t aAudioBitrate, uint32_t aVideoBitrate, uint8_t aTrackTypes, + TrackRate aTrackRate, uint64_t aMaxMemory, TimeDuration aTimeslice) { + AUTO_PROFILER_LABEL("MediaEncoder::CreateEncoder", OTHER); + + UniquePtr<ContainerWriter> writer; + UniquePtr<AudioTrackEncoder> audioEncoder; + UniquePtr<VideoTrackEncoder> videoEncoder; + auto encodedAudioQueue = MakeUnique<MediaQueue<EncodedFrame>>(); + auto encodedVideoQueue = MakeUnique<MediaQueue<EncodedFrame>>(); + auto driftCompensator = + MakeRefPtr<DriftCompensator>(aEncoderThread, aTrackRate); + + Maybe<MediaContainerType> mimeType = MakeMediaContainerType(aMimeType); + if (!mimeType) { + return nullptr; + } + + for (const auto& codec : mimeType->ExtendedType().Codecs().Range()) { + if (codec.EqualsLiteral("opus")) { + MOZ_ASSERT(!audioEncoder); + audioEncoder = + MakeUnique<OpusTrackEncoder>(aTrackRate, *encodedAudioQueue); + } else if (codec.EqualsLiteral("vp8") || codec.EqualsLiteral("vp8.0")) { + MOZ_ASSERT(!videoEncoder); + if (Preferences::GetBool("media.recorder.video.frame_drops", true)) { + videoEncoder = MakeUnique<VP8TrackEncoder>(driftCompensator, aTrackRate, + *encodedVideoQueue, + FrameDroppingMode::ALLOW); + } else { + videoEncoder = MakeUnique<VP8TrackEncoder>(driftCompensator, aTrackRate, + *encodedVideoQueue, + FrameDroppingMode::DISALLOW); + } + } else { + MOZ_CRASH("Unknown codec"); + } + } + + if (mimeType->Type() == MEDIAMIMETYPE(VIDEO_WEBM) || + mimeType->Type() == MEDIAMIMETYPE(AUDIO_WEBM)) { + MOZ_ASSERT_IF(mimeType->Type() == MEDIAMIMETYPE(AUDIO_WEBM), !videoEncoder); + writer = MakeUnique<WebMWriter>(); + } else if (mimeType->Type() == MEDIAMIMETYPE(AUDIO_OGG)) { + MOZ_ASSERT(audioEncoder); + MOZ_ASSERT(!videoEncoder); + writer = MakeUnique<OggWriter>(); + } + NS_ENSURE_TRUE(writer, nullptr); + + LOG(LogLevel::Info, + ("Create encoder result:a[%p](%u bps) v[%p](%u bps) w[%p] mimeType = " + "%s.", + audioEncoder.get(), aAudioBitrate, videoEncoder.get(), aVideoBitrate, + writer.get(), NS_ConvertUTF16toUTF8(aMimeType).get())); + + if (audioEncoder) { + audioEncoder->SetWorkerThread(aEncoderThread); + if (aAudioBitrate != 0) { + audioEncoder->SetBitrate(aAudioBitrate); + } + } + if (videoEncoder) { + videoEncoder->SetWorkerThread(aEncoderThread); + if (aVideoBitrate != 0) { + videoEncoder->SetBitrate(aVideoBitrate); + } + } + return MakeAndAddRef<MediaEncoder>( + std::move(aEncoderThread), std::move(driftCompensator), std::move(writer), + std::move(audioEncoder), std::move(videoEncoder), + std::move(encodedAudioQueue), std::move(encodedVideoQueue), aTrackRate, + aMimeType, aMaxMemory, aTimeslice); +} + +nsresult MediaEncoder::GetEncodedData( + nsTArray<nsTArray<uint8_t>>* aOutputBufs) { + AUTO_PROFILER_LABEL("MediaEncoder::GetEncodedData", OTHER); + + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + + LOG(LogLevel::Verbose, + ("GetEncodedData TimeStamp = %f", GetEncodeTimeStamp())); + + if (!mInitialized) { + return NS_ERROR_NOT_INITIALIZED; + } + + nsresult rv = mMuxer->GetData(aOutputBufs); + if (mMuxer->IsFinished()) { + mCompleted = true; + } + + LOG(LogLevel::Verbose, + ("END GetEncodedData TimeStamp=%f " + "mCompleted=%d, aComplete=%d, vComplete=%d", + GetEncodeTimeStamp(), mCompleted, + !mAudioEncoder || mAudioEncoder->IsEncodingComplete(), + !mVideoEncoder || mVideoEncoder->IsEncodingComplete())); + + return rv; +} + +void MediaEncoder::MaybeShutdown() { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + if (!mEncodedAudioQueue->IsFinished()) { + LOG(LogLevel::Debug, + ("MediaEncoder %p not shutting down, audio is still live", this)); + return; + } + + if (!mEncodedVideoQueue->IsFinished()) { + LOG(LogLevel::Debug, + ("MediaEncoder %p not shutting down, video is still live", this)); + return; + } + + mShutdownEvent.Notify(); + + // Stop will Shutdown() gracefully. + Unused << InvokeAsync(mMainThread, this, __func__, &MediaEncoder::Stop); +} + +RefPtr<GenericNonExclusivePromise> MediaEncoder::Shutdown() { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + if (mShutdownPromise) { + return mShutdownPromise; + } + + LOG(LogLevel::Info, ("MediaEncoder is shutting down.")); + + AutoTArray<RefPtr<GenericNonExclusivePromise>, 2> shutdownPromises; + if (mAudioListener) { + shutdownPromises.AppendElement(mAudioListener->OnShutdown()); + } + if (mVideoListener) { + shutdownPromises.AppendElement(mVideoListener->OnShutdown()); + } + + mShutdownPromise = + GenericNonExclusivePromise::All(mEncoderThread, shutdownPromises) + ->Then(mEncoderThread, __func__, + [](const GenericNonExclusivePromise::AllPromiseType:: + ResolveOrRejectValue& aValue) { + if (aValue.IsResolve()) { + return GenericNonExclusivePromise::CreateAndResolve( + true, __func__); + } + return GenericNonExclusivePromise::CreateAndReject( + aValue.RejectValue(), __func__); + }); + + mShutdownPromise->Then( + mEncoderThread, __func__, [self = RefPtr<MediaEncoder>(this), this] { + if (mAudioEncoder) { + mAudioEncoder->UnregisterListener(mEncoderListener); + } + if (mVideoEncoder) { + mVideoEncoder->UnregisterListener(mEncoderListener); + } + mEncoderListener->Forget(); + mMuxer->Disconnect(); + mAudioPushListener.DisconnectIfExists(); + mAudioFinishListener.DisconnectIfExists(); + mVideoPushListener.DisconnectIfExists(); + mVideoFinishListener.DisconnectIfExists(); + }); + + return mShutdownPromise; +} + +RefPtr<GenericNonExclusivePromise> MediaEncoder::Stop() { + MOZ_ASSERT(NS_IsMainThread()); + + LOG(LogLevel::Info, ("MediaEncoder %p Stop", this)); + + DisconnectTracks(); + + return InvokeAsync(mEncoderThread, this, __func__, &MediaEncoder::Shutdown); +} + +RefPtr<GenericNonExclusivePromise> MediaEncoder::Cancel() { + MOZ_ASSERT(NS_IsMainThread()); + + LOG(LogLevel::Info, ("MediaEncoder %p Cancel", this)); + + DisconnectTracks(); + + return InvokeAsync(mEncoderThread, __func__, + [self = RefPtr<MediaEncoder>(this), this]() { + if (mAudioEncoder) { + mAudioEncoder->Cancel(); + } + if (mVideoEncoder) { + mVideoEncoder->Cancel(); + } + return Shutdown(); + }); +} + +bool MediaEncoder::HasError() { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + return mError; +} + +void MediaEncoder::SetError() { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + + if (mError) { + return; + } + + mError = true; + mErrorEvent.Notify(); +} + +auto MediaEncoder::RequestData() -> RefPtr<BlobPromise> { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + TimeUnit muxedEndTime = std::min(mMuxedAudioEndTime, mMuxedVideoEndTime); + mLastBlobTime = muxedEndTime; + mLastExtractTime = muxedEndTime; + return Extract()->Then( + mMainThread, __func__, + [this, self = RefPtr<MediaEncoder>(this)]( + const GenericPromise::ResolveOrRejectValue& aValue) { + // Even if rejected, we want to gather what has already been + // extracted into the current blob and expose that. + Unused << NS_WARN_IF(aValue.IsReject()); + return GatherBlob(); + }); +} + +void MediaEncoder::MaybeCreateMutableBlobStorage() { + MOZ_ASSERT(NS_IsMainThread()); + if (!mMutableBlobStorage) { + mMutableBlobStorage = new MutableBlobStorage( + MutableBlobStorage::eCouldBeInTemporaryFile, nullptr, mMaxMemory); + } +} + +void MediaEncoder::OnEncodedAudioPushed(const RefPtr<EncodedFrame>& aFrame) { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + mMuxedAudioEndTime = aFrame->GetEndTime(); + MaybeExtractOrGatherBlob(); +} + +void MediaEncoder::OnEncodedVideoPushed(const RefPtr<EncodedFrame>& aFrame) { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + mMuxedVideoEndTime = aFrame->GetEndTime(); + MaybeExtractOrGatherBlob(); +} + +void MediaEncoder::MaybeExtractOrGatherBlob() { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + + TimeUnit muxedEndTime = std::min(mMuxedAudioEndTime, mMuxedVideoEndTime); + if ((muxedEndTime - mLastBlobTime).ToTimeDuration() >= mTimeslice) { + LOG(LogLevel::Verbose, ("MediaEncoder %p Muxed %.2fs of data since last " + "blob. Issuing new blob.", + this, (muxedEndTime - mLastBlobTime).ToSeconds())); + RequestData()->Then(mEncoderThread, __func__, + [this, self = RefPtr<MediaEncoder>(this)]( + const BlobPromise::ResolveOrRejectValue& aValue) { + if (aValue.IsReject()) { + SetError(); + return; + } + RefPtr<BlobImpl> blob = aValue.ResolveValue(); + mDataAvailableEvent.Notify(std::move(blob)); + }); + } + + if (muxedEndTime - mLastExtractTime > TimeUnit::FromSeconds(1)) { + // Extract data from the muxer at least every second. + LOG(LogLevel::Verbose, + ("MediaEncoder %p Muxed %.2fs of data since last " + "extract. Extracting more data into blob.", + this, (muxedEndTime - mLastExtractTime).ToSeconds())); + mLastExtractTime = muxedEndTime; + Unused << Extract(); + } +} + +// Pull encoded media data from MediaEncoder and put into MutableBlobStorage. +RefPtr<GenericPromise> MediaEncoder::Extract() { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + + LOG(LogLevel::Debug, ("MediaEncoder %p Extract", this)); + + AUTO_PROFILER_LABEL("MediaEncoder::Extract", OTHER); + + // Pull encoded media data from MediaEncoder + nsTArray<nsTArray<uint8_t>> buffer; + nsresult rv = GetEncodedData(&buffer); + MOZ_ASSERT(rv != NS_ERROR_INVALID_ARG, "Invalid args can be prevented."); + if (NS_FAILED(rv)) { + MOZ_RELEASE_ASSERT(buffer.IsEmpty()); + // Even if we failed to encode more data, it might be time to push a blob + // with already encoded data. + } + + // To ensure Extract() promises are resolved in calling order, we always + // invoke the main thread. Even when the encoded buffer is empty. + return InvokeAsync( + mMainThread, __func__, + [self = RefPtr<MediaEncoder>(this), this, buffer = std::move(buffer)] { + MaybeCreateMutableBlobStorage(); + for (const auto& part : buffer) { + if (part.IsEmpty()) { + continue; + } + + nsresult rv = + mMutableBlobStorage->Append(part.Elements(), part.Length()); + if (NS_WARN_IF(NS_FAILED(rv))) { + return GenericPromise::CreateAndReject(rv, __func__); + } + } + return GenericPromise::CreateAndResolve(true, __func__); + }); +} + +auto MediaEncoder::GatherBlob() -> RefPtr<BlobPromise> { + MOZ_ASSERT(NS_IsMainThread()); + if (!mBlobPromise) { + return mBlobPromise = GatherBlobImpl(); + } + return mBlobPromise = mBlobPromise->Then(mMainThread, __func__, + [self = RefPtr<MediaEncoder>(this)] { + return self->GatherBlobImpl(); + }); +} + +auto MediaEncoder::GatherBlobImpl() -> RefPtr<BlobPromise> { + RefPtr<BlobStorer> storer = MakeAndAddRef<BlobStorer>(); + MaybeCreateMutableBlobStorage(); + mMutableBlobStorage->GetBlobImplWhenReady(NS_ConvertUTF16toUTF8(mMimeType), + storer); + mMutableBlobStorage = nullptr; + + storer->Promise()->Then( + mMainThread, __func__, + [self = RefPtr<MediaEncoder>(this), p = storer->Promise()] { + if (self->mBlobPromise == p) { + // Reset BlobPromise. + self->mBlobPromise = nullptr; + } + }); + + return storer->Promise(); +} + +void MediaEncoder::DisconnectTracks() { + MOZ_ASSERT(NS_IsMainThread()); + + if (mAudioNode) { + mAudioNode->GetTrack()->RemoveListener(mAudioListener); + if (mInputPort) { + mInputPort->Destroy(); + mInputPort = nullptr; + } + if (mPipeTrack) { + mPipeTrack->RemoveListener(mAudioListener); + mPipeTrack->Destroy(); + mPipeTrack = nullptr; + } + mAudioNode = nullptr; + } + + if (mAudioTrack) { + RemoveMediaStreamTrack(mAudioTrack); + } + + if (mVideoTrack) { + RemoveMediaStreamTrack(mVideoTrack); + } +} + +bool MediaEncoder::IsWebMEncoderEnabled() { + return StaticPrefs::media_encoder_webm_enabled(); +} + +void MediaEncoder::UpdateInitialized() { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + + if (mInitialized) { + // This could happen if an encoder re-inits due to a resolution change. + return; + } + + if (mAudioEncoder && !mAudioEncoder->IsInitialized()) { + LOG(LogLevel::Debug, + ("MediaEncoder %p UpdateInitialized waiting for audio", this)); + return; + } + + if (mVideoEncoder && !mVideoEncoder->IsInitialized()) { + LOG(LogLevel::Debug, + ("MediaEncoder %p UpdateInitialized waiting for video", this)); + return; + } + + MOZ_ASSERT(mMuxer->NeedsMetadata()); + nsTArray<RefPtr<TrackMetadataBase>> meta; + if (mAudioEncoder && !*meta.AppendElement(mAudioEncoder->GetMetadata())) { + LOG(LogLevel::Error, ("Audio metadata is null")); + SetError(); + return; + } + if (mVideoEncoder && !*meta.AppendElement(mVideoEncoder->GetMetadata())) { + LOG(LogLevel::Error, ("Video metadata is null")); + SetError(); + return; + } + + if (NS_FAILED(mMuxer->SetMetadata(meta))) { + LOG(LogLevel::Error, ("SetMetadata failed")); + SetError(); + return; + } + + LOG(LogLevel::Info, + ("MediaEncoder %p UpdateInitialized set metadata in muxer", this)); + + mInitialized = true; +} + +void MediaEncoder::UpdateStarted() { + MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn()); + + if (mStarted) { + return; + } + + if (mAudioEncoder && !mAudioEncoder->IsStarted()) { + return; + } + + if (mVideoEncoder && !mVideoEncoder->IsStarted()) { + return; + } + + mStarted = true; + + // Start issuing timeslice-based blobs. + MOZ_ASSERT(mLastBlobTime == TimeUnit::Zero()); + + mStartedEvent.Notify(); +} + +/* + * SizeOfExcludingThis measures memory being used by the Media Encoder. + * Currently it measures the size of the Encoder buffer and memory occupied + * by mAudioEncoder, mVideoEncoder, and any current blob storage. + */ +auto MediaEncoder::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) + -> RefPtr<SizeOfPromise> { + MOZ_ASSERT(NS_IsMainThread()); + size_t blobStorageSize = + mMutableBlobStorage ? mMutableBlobStorage->SizeOfCurrentMemoryBuffer() + : 0; + + return InvokeAsync( + mEncoderThread, __func__, + [self = RefPtr<MediaEncoder>(this), this, blobStorageSize, + aMallocSizeOf]() { + size_t size = 0; + if (mAudioEncoder) { + size += mAudioEncoder->SizeOfExcludingThis(aMallocSizeOf); + } + if (mVideoEncoder) { + size += mVideoEncoder->SizeOfExcludingThis(aMallocSizeOf); + } + return SizeOfPromise::CreateAndResolve(blobStorageSize + size, + __func__); + }); +} + +} // namespace mozilla + +#undef LOG diff --git a/dom/media/encoder/MediaEncoder.h b/dom/media/encoder/MediaEncoder.h new file mode 100644 index 0000000000..dae887edc6 --- /dev/null +++ b/dom/media/encoder/MediaEncoder.h @@ -0,0 +1,400 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MediaEncoder_h_ +#define MediaEncoder_h_ + +#include "ContainerWriter.h" +#include "CubebUtils.h" +#include "MediaQueue.h" +#include "MediaTrackGraph.h" +#include "MediaTrackListener.h" +#include "mozilla/DebugOnly.h" +#include "mozilla/MemoryReporting.h" +#include "mozilla/MozPromise.h" +#include "mozilla/UniquePtr.h" +#include "nsIMemoryReporter.h" +#include "TrackEncoder.h" + +namespace mozilla { + +class DriftCompensator; +class Muxer; +class Runnable; +class TaskQueue; + +namespace dom { +class AudioNode; +class AudioStreamTrack; +class BlobImpl; +class MediaStreamTrack; +class MutableBlobStorage; +class VideoStreamTrack; +} // namespace dom + +class DriftCompensator; + +/** + * MediaEncoder is the framework of encoding module, it controls and manages + * procedures between Muxer, ContainerWriter and TrackEncoder. ContainerWriter + * writes the encoded track data into a specific container (e.g. ogg, webm). + * AudioTrackEncoder and VideoTrackEncoder are subclasses of TrackEncoder, and + * are responsible for encoding raw data coming from MediaStreamTracks. + * + * MediaEncoder solves threading issues by doing message passing to a TaskQueue + * (the "encoder thread") as passed in to the constructor. Each + * MediaStreamTrack to be recorded is set up with a MediaTrackListener. + * Typically there are a non-direct track listeners for audio, direct listeners + * for video, and there is always a non-direct listener on each track for + * time-keeping. The listeners forward data to their corresponding TrackEncoders + * on the encoder thread. + * + * The MediaEncoder listens to events from all TrackEncoders, and in turn + * signals events to interested parties. Typically a MediaRecorder::Session. + * The MediaEncoder automatically encodes incoming data, muxes it, writes it + * into a container and stores the container data into a MutableBlobStorage. + * It is timeslice-aware so that it can notify listeners when it's time to + * expose a blob due to filling the timeslice. + * + * MediaEncoder is designed to be a passive component, neither does it own or is + * in charge of managing threads. Instead this is done by its owner. + * + * For example, usage from MediaRecorder of this component would be: + * 1) Create an encoder with a valid MIME type. Note that there are more + * configuration options, see the docs on MediaEncoder::CreateEncoder. + * => encoder = MediaEncoder::CreateEncoder(aMIMEType); + * It then creates track encoders and the appropriate ContainerWriter + * according to the MIME type + * + * 2) Connect handlers through MediaEventListeners to the MediaEncoder's + * MediaEventSources, StartedEvent(), DataAvailableEvent(), ErrorEvent() and + * ShutdownEvent(). + * => listener = encoder->DataAvailableEvent().Connect(mainThread, &OnBlob); + * + * 3) Connect the sources to be recorded. Either through: + * => encoder->ConnectAudioNode(node); + * or + * => encoder->ConnectMediaStreamTrack(track); + * These should not be mixed. When connecting MediaStreamTracks there is + * support for at most one of each kind. + * + * 4) MediaEncoder automatically encodes data from the connected tracks, muxes + * them and writes it all into a blob, including metadata. When the blob + * contains at least `timeslice` worth of data it notifies the + * DataAvailableEvent that was connected in step 2. + * => void OnBlob(RefPtr<BlobImpl> aBlob) { + * => DispatchBlobEvent(Blob::Create(GetOwnerGlobal(), aBlob)); + * => }; + * + * 5) To stop encoding, there are multiple options: + * + * 5.1) Stop() for a graceful stop. + * => encoder->Stop(); + * + * 5.2) Cancel() for an immediate stop, if you don't need the data currently + * buffered. + * => encoder->Cancel(); + * + * 5.3) When all input tracks end, the MediaEncoder will automatically stop + * and shut down. + */ +class MediaEncoder { + private: + class AudioTrackListener; + class VideoTrackListener; + class EncoderListener; + + public: + using BlobPromise = + MozPromise<RefPtr<dom::BlobImpl>, nsresult, false /* IsExclusive */>; + using SizeOfPromise = MozPromise<size_t, size_t, true /* IsExclusive */>; + + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MediaEncoder) + + MediaEncoder(RefPtr<TaskQueue> aEncoderThread, + RefPtr<DriftCompensator> aDriftCompensator, + UniquePtr<ContainerWriter> aWriter, + UniquePtr<AudioTrackEncoder> aAudioEncoder, + UniquePtr<VideoTrackEncoder> aVideoEncoder, + UniquePtr<MediaQueue<EncodedFrame>> aEncodedAudioQueue, + UniquePtr<MediaQueue<EncodedFrame>> aEncodedVideoQueue, + TrackRate aTrackRate, const nsAString& aMIMEType, + uint64_t aMaxMemory, TimeDuration aTimeslice); + + /** + * Called on main thread from MediaRecorder::Pause. + */ + void Suspend(); + + /** + * Called on main thread from MediaRecorder::Resume. + */ + void Resume(); + + /** + * Disconnects the input tracks, causing the encoding to stop. + */ + void DisconnectTracks(); + + /** + * Connects an AudioNode with the appropriate encoder. + */ + void ConnectAudioNode(dom::AudioNode* aNode, uint32_t aOutput); + + /** + * Connects a MediaStreamTrack with the appropriate encoder. + */ + void ConnectMediaStreamTrack(dom::MediaStreamTrack* aTrack); + + /** + * Removes a connected MediaStreamTrack. + */ + void RemoveMediaStreamTrack(dom::MediaStreamTrack* aTrack); + + /** + * Creates an encoder with the given MIME type. This must be a valid MIME type + * or we will crash hard. + * Bitrates are given either explicit, or with 0 for defaults. + * aTrackRate is the rate in which data will be fed to the TrackEncoders. + * aMaxMemory is the maximum number of bytes of muxed data allowed in memory. + * Beyond that the blob is moved to a temporary file. + * aTimeslice is the minimum duration of muxed data we gather before + * automatically issuing a dataavailable event. + */ + static already_AddRefed<MediaEncoder> CreateEncoder( + RefPtr<TaskQueue> aEncoderThread, const nsAString& aMimeType, + uint32_t aAudioBitrate, uint32_t aVideoBitrate, uint8_t aTrackTypes, + TrackRate aTrackRate, uint64_t aMaxMemory, TimeDuration aTimeslice); + + /** + * Encodes raw data for all tracks to aOutputBufs. The buffer of container + * data is allocated in ContainerWriter::GetContainerData(). + * + * On its first call, metadata is also encoded. TrackEncoders must have been + * initialized before this is called. + */ + nsresult GetEncodedData(nsTArray<nsTArray<uint8_t>>* aOutputBufs); + + /** + * Asserts that Shutdown() has been called. Reasons are encoding + * complete, encounter an error, or being canceled by its caller. + */ + void AssertShutdownCalled() { MOZ_ASSERT(mShutdownPromise); } + + /** + * Stops (encoding any data currently buffered) the encoding and shuts down + * the encoder using Shutdown(). + */ + RefPtr<GenericNonExclusivePromise> Stop(); + + /** + * Cancels (discarding any data currently buffered) the encoding and shuts + * down the encoder using Shutdown(). + */ + RefPtr<GenericNonExclusivePromise> Cancel(); + + bool HasError(); + + static bool IsWebMEncoderEnabled(); + + /** + * Updates internal state when track encoders are all initialized. + */ + void UpdateInitialized(); + + /** + * Updates internal state when track encoders are all initialized, and + * notifies listeners that this MediaEncoder has been started. + */ + void UpdateStarted(); + + MOZ_DEFINE_MALLOC_SIZE_OF(MallocSizeOf) + /* + * Measure the size of the buffer, and heap memory in bytes occupied by + * mAudioEncoder and mVideoEncoder. + */ + RefPtr<SizeOfPromise> SizeOfExcludingThis( + mozilla::MallocSizeOf aMallocSizeOf); + + /** + * Encode, mux and store into blob storage what has been buffered until now, + * then return the blob backed by that storage. + */ + RefPtr<BlobPromise> RequestData(); + + // Event that gets notified when all track encoders have received data. + MediaEventSource<void>& StartedEvent() { return mStartedEvent; } + // Event that gets notified when there was an error preventing continued + // recording somewhere in the MediaEncoder stack. + MediaEventSource<void>& ErrorEvent() { return mErrorEvent; } + // Event that gets notified when the MediaEncoder stack has been shut down. + MediaEventSource<void>& ShutdownEvent() { return mShutdownEvent; } + // Event that gets notified after we have muxed at least mTimeslice worth of + // data into the current blob storage. + MediaEventSource<RefPtr<dom::BlobImpl>>& DataAvailableEvent() { + return mDataAvailableEvent; + } + + protected: + ~MediaEncoder(); + + private: + /** + * Sets mGraphTrack if not already set, using a new stream from aTrack's + * graph. + */ + void EnsureGraphTrackFrom(MediaTrack* aTrack); + + /** + * Takes a regular runnable and dispatches it to the graph wrapped in a + * ControlMessage. + */ + void RunOnGraph(already_AddRefed<Runnable> aRunnable); + + /** + * Shuts down gracefully if there is no remaining live track encoder. + */ + void MaybeShutdown(); + + /** + * Waits for TrackEncoders to shut down, then shuts down the MediaEncoder and + * cleans up track encoders. + */ + RefPtr<GenericNonExclusivePromise> Shutdown(); + + /** + * Sets mError to true, notifies listeners of the error if mError changed, + * and stops encoding. + */ + void SetError(); + + /** + * Creates a new MutableBlobStorage if one doesn't exist. + */ + void MaybeCreateMutableBlobStorage(); + + /** + * Called when an encoded audio frame has been pushed by the audio encoder. + */ + void OnEncodedAudioPushed(const RefPtr<EncodedFrame>& aFrame); + + /** + * Called when an encoded video frame has been pushed by the video encoder. + */ + void OnEncodedVideoPushed(const RefPtr<EncodedFrame>& aFrame); + + /** + * If enough data has been pushed to the muxer, extract it into the current + * blob storage. If more than mTimeslice data has been pushed to the muxer + * since the last DataAvailableEvent was notified, also gather the blob and + * notify MediaRecorder. + */ + void MaybeExtractOrGatherBlob(); + + // Extracts encoded and muxed data into the current blob storage, creating one + // if it doesn't exist. The returned promise resolves when data has been + // stored into the blob. + RefPtr<GenericPromise> Extract(); + + // Stops gathering data into the current blob and resolves when the current + // blob is available. Future data will be stored in a new blob. + // Should a previous async GatherBlob() operation still be in progress, we'll + // wait for it to finish before starting this one. + RefPtr<BlobPromise> GatherBlob(); + + RefPtr<BlobPromise> GatherBlobImpl(); + + const RefPtr<nsISerialEventTarget> mMainThread; + const RefPtr<TaskQueue> mEncoderThread; + const RefPtr<DriftCompensator> mDriftCompensator; + + const UniquePtr<MediaQueue<EncodedFrame>> mEncodedAudioQueue; + const UniquePtr<MediaQueue<EncodedFrame>> mEncodedVideoQueue; + + const UniquePtr<Muxer> mMuxer; + const UniquePtr<AudioTrackEncoder> mAudioEncoder; + const RefPtr<AudioTrackListener> mAudioListener; + const UniquePtr<VideoTrackEncoder> mVideoEncoder; + const RefPtr<VideoTrackListener> mVideoListener; + const RefPtr<EncoderListener> mEncoderListener; + + public: + const nsString mMimeType; + + // Max memory to use for the MutableBlobStorage. + const uint64_t mMaxMemory; + + // The interval of passing encoded data from MutableBlobStorage to + // onDataAvailable handler. + const TimeDuration mTimeslice; + + private: + MediaEventListener mAudioPushListener; + MediaEventListener mAudioFinishListener; + MediaEventListener mVideoPushListener; + MediaEventListener mVideoFinishListener; + + MediaEventProducer<void> mStartedEvent; + MediaEventProducer<void> mErrorEvent; + MediaEventProducer<void> mShutdownEvent; + MediaEventProducer<RefPtr<dom::BlobImpl>> mDataAvailableEvent; + + // The AudioNode we are encoding. + // Will be null when input is media stream or destination node. + RefPtr<dom::AudioNode> mAudioNode; + // Pipe-track for allowing a track listener on a non-destination AudioNode. + // Will be null when input is media stream or destination node. + RefPtr<AudioNodeTrack> mPipeTrack; + // Input port that connect mAudioNode to mPipeTrack. + // Will be null when input is media stream or destination node. + RefPtr<MediaInputPort> mInputPort; + // An audio track that we are encoding. Will be null if the input stream + // doesn't contain audio on start() or if the input is an AudioNode. + RefPtr<dom::AudioStreamTrack> mAudioTrack; + // A video track that we are encoding. Will be null if the input stream + // doesn't contain video on start() or if the input is an AudioNode. + RefPtr<dom::VideoStreamTrack> mVideoTrack; + + // A stream to keep the MediaTrackGraph alive while we're recording. + RefPtr<SharedDummyTrack> mGraphTrack; + + // A buffer to cache muxed encoded data. + RefPtr<dom::MutableBlobStorage> mMutableBlobStorage; + // If set, is a promise for the latest GatherBlob() operation. Allows + // GatherBlob() operations to be serialized in order to avoid races. + RefPtr<BlobPromise> mBlobPromise; + // The end time of the muxed data in the last gathered blob. If more than one + // track is present, this is the end time of the track that ends the earliest + // in the last blob. Encoder thread only. + media::TimeUnit mLastBlobTime; + // The end time of the muxed data in the current blob storage. If more than + // one track is present, this is the end time of the track that ends the + // earliest in the current blob storage. Encoder thread only. + media::TimeUnit mLastExtractTime; + // The end time of encoded audio data sent to the muxer. Positive infinity if + // there is no audio encoder. Encoder thread only. + media::TimeUnit mMuxedAudioEndTime; + // The end time of encoded video data sent to the muxer. Positive infinity if + // there is no video encoder. Encoder thread only. + media::TimeUnit mMuxedVideoEndTime; + + TimeStamp mStartTime; + bool mInitialized; + bool mStarted; + bool mCompleted; + bool mError; + // Set when shutdown starts. + RefPtr<GenericNonExclusivePromise> mShutdownPromise; + // Get duration from create encoder, for logging purpose + double GetEncodeTimeStamp() { + TimeDuration decodeTime; + decodeTime = TimeStamp::Now() - mStartTime; + return decodeTime.ToMilliseconds(); + } +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/encoder/Muxer.cpp b/dom/media/encoder/Muxer.cpp new file mode 100644 index 0000000000..8225062ee5 --- /dev/null +++ b/dom/media/encoder/Muxer.cpp @@ -0,0 +1,185 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "Muxer.h" + +#include "ContainerWriter.h" + +namespace mozilla { + +LazyLogModule gMuxerLog("Muxer"); +#define LOG(type, ...) MOZ_LOG(gMuxerLog, type, (__VA_ARGS__)) + +Muxer::Muxer(UniquePtr<ContainerWriter> aWriter, + MediaQueue<EncodedFrame>& aEncodedAudioQueue, + MediaQueue<EncodedFrame>& aEncodedVideoQueue) + : mEncodedAudioQueue(aEncodedAudioQueue), + mEncodedVideoQueue(aEncodedVideoQueue), + mWriter(std::move(aWriter)) {} + +void Muxer::Disconnect() { + mAudioPushListener.DisconnectIfExists(); + mAudioFinishListener.DisconnectIfExists(); + mVideoPushListener.DisconnectIfExists(); + mVideoFinishListener.DisconnectIfExists(); +} + +bool Muxer::IsFinished() { return mWriter->IsWritingComplete(); } + +nsresult Muxer::SetMetadata( + const nsTArray<RefPtr<TrackMetadataBase>>& aMetadata) { + MOZ_DIAGNOSTIC_ASSERT(!mMetadataSet); + MOZ_DIAGNOSTIC_ASSERT(!mHasAudio); + MOZ_DIAGNOSTIC_ASSERT(!mHasVideo); + nsresult rv = mWriter->SetMetadata(aMetadata); + if (NS_FAILED(rv)) { + LOG(LogLevel::Error, "%p Setting metadata failed, tracks=%zu", this, + aMetadata.Length()); + return rv; + } + + for (const auto& track : aMetadata) { + switch (track->GetKind()) { + case TrackMetadataBase::METADATA_OPUS: + case TrackMetadataBase::METADATA_VORBIS: + case TrackMetadataBase::METADATA_AAC: + case TrackMetadataBase::METADATA_AMR: + case TrackMetadataBase::METADATA_EVRC: + MOZ_ASSERT(!mHasAudio, "Only one audio track supported"); + mHasAudio = true; + break; + case TrackMetadataBase::METADATA_VP8: + MOZ_ASSERT(!mHasVideo, "Only one video track supported"); + mHasVideo = true; + break; + default: + MOZ_CRASH("Unknown codec metadata"); + }; + } + mMetadataSet = true; + MOZ_ASSERT(mHasAudio || mHasVideo); + LOG(LogLevel::Info, "%p Metadata set; audio=%d, video=%d", this, mHasAudio, + mHasVideo); + return NS_OK; +} + +nsresult Muxer::GetData(nsTArray<nsTArray<uint8_t>>* aOutputBuffers) { + MOZ_ASSERT(mHasAudio || mHasVideo); + + nsresult rv; + if (!mMetadataEncoded) { + rv = mWriter->GetContainerData(aOutputBuffers, ContainerWriter::GET_HEADER); + if (NS_FAILED(rv)) { + LOG(LogLevel::Error, "%p Failed getting metadata from writer", this); + return rv; + } + mMetadataEncoded = true; + } + + if (mEncodedAudioQueue.GetSize() == 0 && !mEncodedAudioQueue.IsFinished() && + mEncodedVideoQueue.GetSize() == 0 && !mEncodedVideoQueue.IsFinished()) { + // Nothing to mux. + return NS_OK; + } + + rv = Mux(); + if (NS_FAILED(rv)) { + LOG(LogLevel::Error, "%p Failed muxing data into writer", this); + return rv; + } + + MOZ_ASSERT_IF( + mEncodedAudioQueue.IsFinished() && mEncodedVideoQueue.IsFinished(), + mEncodedAudioQueue.AtEndOfStream()); + MOZ_ASSERT_IF( + mEncodedAudioQueue.IsFinished() && mEncodedVideoQueue.IsFinished(), + mEncodedVideoQueue.AtEndOfStream()); + uint32_t flags = + mEncodedAudioQueue.AtEndOfStream() && mEncodedVideoQueue.AtEndOfStream() + ? ContainerWriter::FLUSH_NEEDED + : 0; + + if (mEncodedAudioQueue.AtEndOfStream() && + mEncodedVideoQueue.AtEndOfStream()) { + LOG(LogLevel::Info, "%p All data written", this); + } + + return mWriter->GetContainerData(aOutputBuffers, flags); +} + +nsresult Muxer::Mux() { + MOZ_ASSERT(mMetadataSet); + MOZ_ASSERT(mHasAudio || mHasVideo); + + nsTArray<RefPtr<EncodedFrame>> frames; + // The times at which we expect our next video and audio frames. These are + // based on the time + duration (GetEndTime()) of the last seen frames. + // Assumes that the encoders write the correct duration for frames.; + media::TimeUnit expectedNextVideoTime; + media::TimeUnit expectedNextAudioTime; + // Interleave frames until we're out of audio or video + while (mEncodedVideoQueue.GetSize() > 0 && mEncodedAudioQueue.GetSize() > 0) { + RefPtr<EncodedFrame> videoFrame = mEncodedVideoQueue.PeekFront(); + RefPtr<EncodedFrame> audioFrame = mEncodedAudioQueue.PeekFront(); + // For any expected time our frames should occur at or after that time. + MOZ_ASSERT(videoFrame->mTime >= expectedNextVideoTime); + MOZ_ASSERT(audioFrame->mTime >= expectedNextAudioTime); + if (videoFrame->mTime <= audioFrame->mTime) { + expectedNextVideoTime = videoFrame->GetEndTime(); + RefPtr<EncodedFrame> frame = mEncodedVideoQueue.PopFront(); + frames.AppendElement(std::move(frame)); + } else { + expectedNextAudioTime = audioFrame->GetEndTime(); + RefPtr<EncodedFrame> frame = mEncodedAudioQueue.PopFront(); + frames.AppendElement(std::move(frame)); + } + } + + // If we're out of audio we still may be able to add more video... + if (mEncodedAudioQueue.GetSize() == 0) { + while (mEncodedVideoQueue.GetSize() > 0) { + if (!mEncodedAudioQueue.AtEndOfStream() && + mEncodedVideoQueue.PeekFront()->mTime > expectedNextAudioTime) { + // Audio encoding is not complete and since the video frame comes + // after our next audio frame we cannot safely add it. + break; + } + frames.AppendElement(mEncodedVideoQueue.PopFront()); + } + } + + // If we're out of video we still may be able to add more audio... + if (mEncodedVideoQueue.GetSize() == 0) { + while (mEncodedAudioQueue.GetSize() > 0) { + if (!mEncodedVideoQueue.AtEndOfStream() && + mEncodedAudioQueue.PeekFront()->mTime > expectedNextVideoTime) { + // Video encoding is not complete and since the audio frame comes + // after our next video frame we cannot safely add it. + break; + } + frames.AppendElement(mEncodedAudioQueue.PopFront()); + } + } + + LOG(LogLevel::Debug, + "%p Muxed data, remaining-audio=%zu, remaining-video=%zu", this, + mEncodedAudioQueue.GetSize(), mEncodedVideoQueue.GetSize()); + + // If encoding is complete for both encoders we should signal end of stream, + // otherwise we keep going. + uint32_t flags = + mEncodedVideoQueue.AtEndOfStream() && mEncodedAudioQueue.AtEndOfStream() + ? ContainerWriter::END_OF_STREAM + : 0; + nsresult rv = mWriter->WriteEncodedTrack(frames, flags); + if (NS_FAILED(rv)) { + LOG(LogLevel::Error, "Error! Failed to write muxed data to the container"); + } + return rv; +} + +} // namespace mozilla + +#undef LOG diff --git a/dom/media/encoder/Muxer.h b/dom/media/encoder/Muxer.h new file mode 100644 index 0000000000..983e260230 --- /dev/null +++ b/dom/media/encoder/Muxer.h @@ -0,0 +1,71 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef DOM_MEDIA_ENCODER_MUXER_H_ +#define DOM_MEDIA_ENCODER_MUXER_H_ + +#include "MediaQueue.h" +#include "mozilla/media/MediaUtils.h" + +namespace mozilla { + +class ContainerWriter; +class EncodedFrame; +class TrackMetadataBase; + +// Generic Muxer class that helps pace the output from track encoders to the +// ContainerWriter, so time never appears to go backwards. +// Note that the entire class is written for single threaded access. +class Muxer { + public: + Muxer(UniquePtr<ContainerWriter> aWriter, + MediaQueue<EncodedFrame>& aEncodedAudioQueue, + MediaQueue<EncodedFrame>& aEncodedVideoQueue); + ~Muxer() = default; + + // Disconnects MediaQueues such that they will no longer be consumed. + // Idempotent. + void Disconnect(); + + // Returns true when all tracks have ended, and all data has been muxed and + // fetched. + bool IsFinished(); + + // Returns true if this muxer has not been given metadata yet. + bool NeedsMetadata() const { return !mMetadataSet; } + + // Sets metadata for all tracks. This may only be called once. + nsresult SetMetadata(const nsTArray<RefPtr<TrackMetadataBase>>& aMetadata); + + // Gets the data that has been muxed and written into the container so far. + nsresult GetData(nsTArray<nsTArray<uint8_t>>* aOutputBuffers); + + private: + // Writes data in MediaQueues to the ContainerWriter. + nsresult Mux(); + + // Audio frames that have been encoded and are pending write to the muxer. + MediaQueue<EncodedFrame>& mEncodedAudioQueue; + // Video frames that have been encoded and are pending write to the muxer. + MediaQueue<EncodedFrame>& mEncodedVideoQueue; + // Listeners driving the muxing as encoded data gets produced. + MediaEventListener mAudioPushListener; + MediaEventListener mAudioFinishListener; + MediaEventListener mVideoPushListener; + MediaEventListener mVideoFinishListener; + // The writer for the specific container we're recording into. + UniquePtr<ContainerWriter> mWriter; + // True once metadata has been set in the muxer. + bool mMetadataSet = false; + // True once metadata has been written to file. + bool mMetadataEncoded = false; + // True if metadata is set and contains an audio track. + bool mHasAudio = false; + // True if metadata is set and contains a video track. + bool mHasVideo = false; +}; +} // namespace mozilla + +#endif diff --git a/dom/media/encoder/OpusTrackEncoder.cpp b/dom/media/encoder/OpusTrackEncoder.cpp new file mode 100644 index 0000000000..16b71d378e --- /dev/null +++ b/dom/media/encoder/OpusTrackEncoder.cpp @@ -0,0 +1,454 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "OpusTrackEncoder.h" +#include "nsString.h" +#include "mozilla/CheckedInt.h" +#include "mozilla/ProfilerLabels.h" +#include "VideoUtils.h" + +#include <opus/opus.h> + +#define LOG(args, ...) + +namespace mozilla { + +// The Opus format supports up to 8 channels, and supports multitrack audio up +// to 255 channels, but the current implementation supports only mono and +// stereo, and downmixes any more than that. +constexpr int MAX_SUPPORTED_AUDIO_CHANNELS = 8; + +// http://www.opus-codec.org/docs/html_api-1.0.2/group__opus__encoder.html +// In section "opus_encoder_init", channels must be 1 or 2 of input signal. +constexpr int MAX_CHANNELS = 2; + +// A maximum data bytes for Opus to encode. +constexpr int MAX_DATA_BYTES = 4096; + +// http://tools.ietf.org/html/draft-ietf-codec-oggopus-00#section-4 +// Second paragraph, " The granule position of an audio data page is in units +// of PCM audio samples at a fixed rate of 48 kHz." +constexpr int kOpusSamplingRate = 48000; + +// The duration of an Opus frame, and it must be 2.5, 5, 10, 20, 40 or 60 ms. +constexpr int kFrameDurationMs = 20; + +// The supported sampling rate of input signal (Hz), +// must be one of the following. Will resampled to 48kHz otherwise. +constexpr int kOpusSupportedInputSamplingRates[] = {8000, 12000, 16000, 24000, + 48000}; + +namespace { + +// An endian-neutral serialization of integers. Serializing T in little endian +// format to aOutput, where T is a 16 bits or 32 bits integer. +template <typename T> +static void SerializeToBuffer(T aValue, nsTArray<uint8_t>* aOutput) { + for (uint32_t i = 0; i < sizeof(T); i++) { + aOutput->AppendElement((uint8_t)(0x000000ff & (aValue >> (i * 8)))); + } +} + +static inline void SerializeToBuffer(const nsCString& aComment, + nsTArray<uint8_t>* aOutput) { + // Format of serializing a string to buffer is, the length of string (32 bits, + // little endian), and the string. + SerializeToBuffer((uint32_t)(aComment.Length()), aOutput); + aOutput->AppendElements(aComment.get(), aComment.Length()); +} + +static void SerializeOpusIdHeader(uint8_t aChannelCount, uint16_t aPreskip, + uint32_t aInputSampleRate, + nsTArray<uint8_t>* aOutput) { + // The magic signature, null terminator has to be stripped off from strings. + constexpr uint8_t magic[] = "OpusHead"; + aOutput->AppendElements(magic, sizeof(magic) - 1); + + // The version must always be 1 (8 bits, unsigned). + aOutput->AppendElement(1); + + // Number of output channels (8 bits, unsigned). + aOutput->AppendElement(aChannelCount); + + // Number of samples (at 48 kHz) to discard from the decoder output when + // starting playback (16 bits, unsigned, little endian). + SerializeToBuffer(aPreskip, aOutput); + + // The sampling rate of input source (32 bits, unsigned, little endian). + SerializeToBuffer(aInputSampleRate, aOutput); + + // Output gain, an encoder should set this field to zero (16 bits, signed, + // little endian). + SerializeToBuffer((int16_t)0, aOutput); + + // Channel mapping family. Family 0 allows only 1 or 2 channels (8 bits, + // unsigned). + aOutput->AppendElement(0); +} + +static void SerializeOpusCommentHeader(const nsCString& aVendor, + const nsTArray<nsCString>& aComments, + nsTArray<uint8_t>* aOutput) { + // The magic signature, null terminator has to be stripped off. + constexpr uint8_t magic[] = "OpusTags"; + aOutput->AppendElements(magic, sizeof(magic) - 1); + + // The vendor; Should append in the following order: + // vendor string length (32 bits, unsigned, little endian) + // vendor string. + SerializeToBuffer(aVendor, aOutput); + + // Add comments; Should append in the following order: + // comment list length (32 bits, unsigned, little endian) + // comment #0 string length (32 bits, unsigned, little endian) + // comment #0 string + // comment #1 string length (32 bits, unsigned, little endian) + // comment #1 string ... + SerializeToBuffer((uint32_t)aComments.Length(), aOutput); + for (uint32_t i = 0; i < aComments.Length(); ++i) { + SerializeToBuffer(aComments[i], aOutput); + } +} + +bool IsSampleRateSupported(TrackRate aSampleRate) { + // According to www.opus-codec.org, creating an opus encoder requires the + // sampling rate of source signal be one of 8000, 12000, 16000, 24000, or + // 48000. If this constraint is not satisfied, we resample the input to 48kHz. + AutoTArray<int, 5> supportedSamplingRates; + supportedSamplingRates.AppendElements( + kOpusSupportedInputSamplingRates, + ArrayLength(kOpusSupportedInputSamplingRates)); + return supportedSamplingRates.Contains(aSampleRate); +} + +} // Anonymous namespace. + +OpusTrackEncoder::OpusTrackEncoder(TrackRate aTrackRate, + MediaQueue<EncodedFrame>& aEncodedDataQueue) + : AudioTrackEncoder(aTrackRate, aEncodedDataQueue), + mOutputSampleRate(IsSampleRateSupported(aTrackRate) ? aTrackRate + : kOpusSamplingRate), + mEncoder(nullptr), + mLookahead(0), + mLookaheadWritten(0), + mResampler(nullptr), + mNumOutputFrames(0) {} + +OpusTrackEncoder::~OpusTrackEncoder() { + if (mEncoder) { + opus_encoder_destroy(mEncoder); + } + if (mResampler) { + speex_resampler_destroy(mResampler); + mResampler = nullptr; + } +} + +nsresult OpusTrackEncoder::Init(int aChannels) { + NS_ENSURE_TRUE((aChannels <= MAX_SUPPORTED_AUDIO_CHANNELS) && (aChannels > 0), + NS_ERROR_FAILURE); + + // This version of encoder API only support 1 or 2 channels, + // So set the mChannels less or equal 2 and + // let InterleaveTrackData downmix pcm data. + mChannels = aChannels > MAX_CHANNELS ? MAX_CHANNELS : aChannels; + + // Reject non-audio sample rates. + NS_ENSURE_TRUE(mTrackRate >= 8000, NS_ERROR_INVALID_ARG); + NS_ENSURE_TRUE(mTrackRate <= 192000, NS_ERROR_INVALID_ARG); + + if (NeedsResampler()) { + int error; + mResampler = speex_resampler_init(mChannels, mTrackRate, kOpusSamplingRate, + SPEEX_RESAMPLER_QUALITY_DEFAULT, &error); + + if (error != RESAMPLER_ERR_SUCCESS) { + return NS_ERROR_FAILURE; + } + } + + int error = 0; + mEncoder = opus_encoder_create(mOutputSampleRate, mChannels, + OPUS_APPLICATION_AUDIO, &error); + + if (error != OPUS_OK) { + return NS_ERROR_FAILURE; + } + + if (mAudioBitrate) { + int bps = static_cast<int>( + std::min<uint32_t>(mAudioBitrate, std::numeric_limits<int>::max())); + error = opus_encoder_ctl(mEncoder, OPUS_SET_BITRATE(bps)); + if (error != OPUS_OK) { + return NS_ERROR_FAILURE; + } + } + + // In the case of Opus we need to calculate the codec delay based on the + // pre-skip. For more information see: + // https://tools.ietf.org/html/rfc7845#section-4.2 + error = opus_encoder_ctl(mEncoder, OPUS_GET_LOOKAHEAD(&mLookahead)); + if (error != OPUS_OK) { + mLookahead = 0; + return NS_ERROR_FAILURE; + } + + SetInitialized(); + + return NS_OK; +} + +int OpusTrackEncoder::GetLookahead() const { + return mLookahead * kOpusSamplingRate / mOutputSampleRate; +} + +int OpusTrackEncoder::NumInputFramesPerPacket() const { + return mTrackRate * kFrameDurationMs / 1000; +} + +int OpusTrackEncoder::NumOutputFramesPerPacket() const { + return mOutputSampleRate * kFrameDurationMs / 1000; +} + +bool OpusTrackEncoder::NeedsResampler() const { + // A resampler is needed when mTrackRate is not supported by the opus encoder. + // This is equivalent to !IsSampleRateSupported(mTrackRate) but less cycles. + return mTrackRate != mOutputSampleRate && + mOutputSampleRate == kOpusSamplingRate; +} + +already_AddRefed<TrackMetadataBase> OpusTrackEncoder::GetMetadata() { + AUTO_PROFILER_LABEL("OpusTrackEncoder::GetMetadata", OTHER); + + MOZ_ASSERT(mInitialized); + + if (!mInitialized) { + return nullptr; + } + + RefPtr<OpusMetadata> meta = new OpusMetadata(); + meta->mChannels = mChannels; + meta->mSamplingFrequency = mTrackRate; + + // Ogg and Webm timestamps are always sampled at 48k for Opus. + SerializeOpusIdHeader(mChannels, + mLookahead * (kOpusSamplingRate / mOutputSampleRate), + mTrackRate, &meta->mIdHeader); + + nsCString vendor; + vendor.AppendASCII(opus_get_version_string()); + + nsTArray<nsCString> comments; + comments.AppendElement( + nsLiteralCString("ENCODER=Mozilla" MOZ_APP_UA_VERSION)); + + SerializeOpusCommentHeader(vendor, comments, &meta->mCommentHeader); + + return meta.forget(); +} + +nsresult OpusTrackEncoder::Encode(AudioSegment* aSegment) { + AUTO_PROFILER_LABEL("OpusTrackEncoder::Encode", OTHER); + + MOZ_ASSERT(aSegment); + MOZ_ASSERT(mInitialized || mCanceled); + + if (mCanceled || IsEncodingComplete()) { + return NS_ERROR_FAILURE; + } + + if (!mInitialized) { + // calculation below depends on the truth that mInitialized is true. + return NS_ERROR_FAILURE; + } + + int result = 0; + // Loop until we run out of packets of input data + while (result >= 0 && !IsEncodingComplete()) { + // re-sampled frames left last time which didn't fit into an Opus packet + // duration. + const int framesLeft = mResampledLeftover.Length() / mChannels; + MOZ_ASSERT(NumOutputFramesPerPacket() >= framesLeft); + // Fetch input frames such that there will be n frames where (n + + // framesLeft) >= NumOutputFramesPerPacket() after re-sampling. + const int framesToFetch = NumInputFramesPerPacket() - + (framesLeft * mTrackRate / kOpusSamplingRate) + + (NeedsResampler() ? 1 : 0); + + if (!mEndOfStream && aSegment->GetDuration() < framesToFetch) { + // Not enough raw data + return NS_OK; + } + + // Start encoding data. + AutoTArray<AudioDataValue, 9600> pcm; + pcm.SetLength(NumOutputFramesPerPacket() * mChannels); + + int frameCopied = 0; + + for (AudioSegment::ChunkIterator iter(*aSegment); + !iter.IsEnded() && frameCopied < framesToFetch; iter.Next()) { + AudioChunk chunk = *iter; + + // Chunk to the required frame size. + TrackTime frameToCopy = + std::min(chunk.GetDuration(), + static_cast<TrackTime>(framesToFetch - frameCopied)); + + // Possible greatest value of framesToFetch = 3844: see + // https://bugzilla.mozilla.org/show_bug.cgi?id=1349421#c8. frameToCopy + // should not be able to exceed this value. + MOZ_ASSERT(frameToCopy <= 3844, "frameToCopy exceeded expected range"); + + if (!chunk.IsNull()) { + // Append the interleaved data to the end of pcm buffer. + AudioTrackEncoder::InterleaveTrackData( + chunk, frameToCopy, mChannels, + pcm.Elements() + frameCopied * mChannels); + } else { + CheckedInt<int> memsetLength = + CheckedInt<int>(frameToCopy) * mChannels * sizeof(AudioDataValue); + if (!memsetLength.isValid()) { + // This should never happen, but we use a defensive check because + // we really don't want a bad memset + MOZ_ASSERT_UNREACHABLE("memsetLength invalid!"); + return NS_ERROR_FAILURE; + } + memset(pcm.Elements() + frameCopied * mChannels, 0, + memsetLength.value()); + } + + frameCopied += frameToCopy; + } + + // Possible greatest value of framesToFetch = 3844: see + // https://bugzilla.mozilla.org/show_bug.cgi?id=1349421#c8. frameCopied + // should not be able to exceed this value. + MOZ_ASSERT(frameCopied <= 3844, "frameCopied exceeded expected range"); + + int framesInPCM = frameCopied; + if (mResampler) { + AutoTArray<AudioDataValue, 9600> resamplingDest; + uint32_t inframes = frameCopied; + uint32_t outframes = inframes * kOpusSamplingRate / mTrackRate + 1; + + // We want to consume all the input data, so we slightly oversize the + // resampled data buffer so we can fit the output data in. We cannot + // really predict the output frame count at each call. + resamplingDest.SetLength(outframes * mChannels); + +#if MOZ_SAMPLE_TYPE_S16 + short* in = reinterpret_cast<short*>(pcm.Elements()); + short* out = reinterpret_cast<short*>(resamplingDest.Elements()); + speex_resampler_process_interleaved_int(mResampler, in, &inframes, out, + &outframes); +#else + float* in = reinterpret_cast<float*>(pcm.Elements()); + float* out = reinterpret_cast<float*>(resamplingDest.Elements()); + speex_resampler_process_interleaved_float(mResampler, in, &inframes, out, + &outframes); +#endif + + MOZ_ASSERT(pcm.Length() >= mResampledLeftover.Length()); + PodCopy(pcm.Elements(), mResampledLeftover.Elements(), + mResampledLeftover.Length()); + + uint32_t outframesToCopy = std::min( + outframes, + static_cast<uint32_t>(NumOutputFramesPerPacket() - framesLeft)); + + MOZ_ASSERT(pcm.Length() - mResampledLeftover.Length() >= + outframesToCopy * mChannels); + PodCopy(pcm.Elements() + mResampledLeftover.Length(), + resamplingDest.Elements(), outframesToCopy * mChannels); + int frameLeftover = outframes - outframesToCopy; + mResampledLeftover.SetLength(frameLeftover * mChannels); + PodCopy(mResampledLeftover.Elements(), + resamplingDest.Elements() + outframesToCopy * mChannels, + mResampledLeftover.Length()); + // This is always at 48000Hz. + framesInPCM = framesLeft + outframesToCopy; + } + + // Remove the raw data which has been pulled to pcm buffer. + // The value of frameCopied should be equal to (or smaller than, if eos) + // NumOutputFramesPerPacket(). + aSegment->RemoveLeading(frameCopied); + + // Has reached the end of input stream and all queued data has pulled for + // encoding. + bool isFinalPacket = false; + if (aSegment->GetDuration() == 0 && mEndOfStream && + framesInPCM < NumOutputFramesPerPacket()) { + // Pad |mLookahead| samples to the end of the track to prevent loss of + // original data. + const int toWrite = std::min(mLookahead - mLookaheadWritten, + NumOutputFramesPerPacket() - framesInPCM); + PodZero(pcm.Elements() + framesInPCM * mChannels, toWrite * mChannels); + mLookaheadWritten += toWrite; + framesInPCM += toWrite; + if (mLookaheadWritten == mLookahead) { + isFinalPacket = true; + } + } + + MOZ_ASSERT_IF(!isFinalPacket, framesInPCM == NumOutputFramesPerPacket()); + + // Append null data to pcm buffer if the leftover data is not enough for + // opus encoder. + if (framesInPCM < NumOutputFramesPerPacket() && isFinalPacket) { + PodZero(pcm.Elements() + framesInPCM * mChannels, + (NumOutputFramesPerPacket() - framesInPCM) * mChannels); + } + auto frameData = MakeRefPtr<EncodedFrame::FrameData>(); + // Encode the data with Opus Encoder. + frameData->SetLength(MAX_DATA_BYTES); + // result is returned as opus error code if it is negative. + result = 0; +#ifdef MOZ_SAMPLE_TYPE_S16 + const opus_int16* pcmBuf = static_cast<opus_int16*>(pcm.Elements()); + result = opus_encode(mEncoder, pcmBuf, NumOutputFramesPerPacket(), + frameData->Elements(), MAX_DATA_BYTES); +#else + const float* pcmBuf = static_cast<float*>(pcm.Elements()); + result = opus_encode_float(mEncoder, pcmBuf, NumOutputFramesPerPacket(), + frameData->Elements(), MAX_DATA_BYTES); +#endif + frameData->SetLength(result >= 0 ? result : 0); + + if (result < 0) { + LOG("[Opus] Fail to encode data! Result: %s.", opus_strerror(result)); + } + if (isFinalPacket) { + if (mResampler) { + speex_resampler_destroy(mResampler); + mResampler = nullptr; + } + mResampledLeftover.SetLength(0); + } + + // timestamp should be the time of the first sample + mEncodedDataQueue.Push(MakeAndAddRef<EncodedFrame>( + media::TimeUnit(mNumOutputFrames + mLookahead, mOutputSampleRate), + static_cast<uint64_t>(framesInPCM) * kOpusSamplingRate / + mOutputSampleRate, + kOpusSamplingRate, EncodedFrame::OPUS_AUDIO_FRAME, + std::move(frameData))); + + mNumOutputFrames += NumOutputFramesPerPacket(); + LOG("[Opus] mOutputTimeStamp %.3f.", + media::TimeUnit(mNumOutputFrames, mOutputSampleRate).ToSeconds()); + + if (isFinalPacket) { + LOG("[Opus] Done encoding."); + mEncodedDataQueue.Finish(); + } + } + + return result >= 0 ? NS_OK : NS_ERROR_FAILURE; +} + +} // namespace mozilla + +#undef LOG diff --git a/dom/media/encoder/OpusTrackEncoder.h b/dom/media/encoder/OpusTrackEncoder.h new file mode 100644 index 0000000000..5206944169 --- /dev/null +++ b/dom/media/encoder/OpusTrackEncoder.h @@ -0,0 +1,117 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef OpusTrackEncoder_h_ +#define OpusTrackEncoder_h_ + +#include <stdint.h> +#include <speex/speex_resampler.h> +#include "TimeUnits.h" +#include "TrackEncoder.h" + +struct OpusEncoder; + +namespace mozilla { + +// Opus meta data structure +class OpusMetadata : public TrackMetadataBase { + public: + // The ID Header of OggOpus. refer to http://wiki.xiph.org/OggOpus. + nsTArray<uint8_t> mIdHeader; + // The Comment Header of OggOpus. + nsTArray<uint8_t> mCommentHeader; + int32_t mChannels; + float mSamplingFrequency; + MetadataKind GetKind() const override { return METADATA_OPUS; } +}; + +class OpusTrackEncoder : public AudioTrackEncoder { + public: + OpusTrackEncoder(TrackRate aTrackRate, + MediaQueue<EncodedFrame>& aEncodedDataQueue); + virtual ~OpusTrackEncoder(); + + already_AddRefed<TrackMetadataBase> GetMetadata() override; + + /** + * The encoder lookahead at 48k rate. + */ + int GetLookahead() const; + + protected: + /** + * The number of frames, in the input rate mTrackRate, needed to fill an + * encoded opus packet. A frame is a sample per channel. + */ + int NumInputFramesPerPacket() const override; + + nsresult Init(int aChannels) override; + + /** + * Encodes buffered data and pushes it to mEncodedDataQueue. + */ + nsresult Encode(AudioSegment* aSegment) override; + + /** + * The number of frames, in the output rate (see GetOutputSampleRate), needed + * to fill an encoded opus packet. A frame is a sample per channel. + */ + int NumOutputFramesPerPacket() const; + + /** + * True if the input needs to be resampled to be fed to the underlying opus + * encoder. + */ + bool NeedsResampler() const; + + public: + /** + * Get the samplerate of the data to be fed to the Opus encoder. This might be + * different from the input samplerate if resampling occurs. + */ + const TrackRate mOutputSampleRate; + + private: + /** + * The Opus encoder from libopus. + */ + OpusEncoder* mEncoder; + + /** + * Total samples of delay added by codec (in rate mOutputSampleRate), can + * be queried by the encoder. From the perspective of decoding, real data + * begins this many samples late, so the encoder needs to append this many + * null samples to the end of stream, in order to align the time of input and + * output. + */ + int mLookahead; + + /** + * Number of mLookahead samples that has been written. When non-zero and equal + * to mLookahead, encoding is complete. + */ + int mLookaheadWritten; + + /** + * If the input sample rate does not divide 48kHz evenly, the input data are + * resampled. + */ + SpeexResamplerState* mResampler; + + /** + * Store the resampled frames that don't fit into an Opus packet duration. + * They will be prepended to the resampled frames next encoding cycle. + */ + nsTArray<AudioDataValue> mResampledLeftover; + + /** + * Number of audio frames encoded, in kOpusSamplingRate. + */ + uint64_t mNumOutputFrames; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/encoder/TrackEncoder.cpp b/dom/media/encoder/TrackEncoder.cpp new file mode 100644 index 0000000000..8e03fd6fe3 --- /dev/null +++ b/dom/media/encoder/TrackEncoder.cpp @@ -0,0 +1,822 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "TrackEncoder.h" + +#include "AudioChannelFormat.h" +#include "DriftCompensation.h" +#include "MediaTrackGraph.h" +#include "MediaTrackListener.h" +#include "mozilla/AbstractThread.h" +#include "mozilla/Logging.h" +#include "mozilla/ProfilerLabels.h" +#include "mozilla/RollingMean.h" +#include "VideoUtils.h" +#include "mozilla/Telemetry.h" + +namespace mozilla { + +LazyLogModule gTrackEncoderLog("TrackEncoder"); +#define TRACK_LOG(type, msg) MOZ_LOG(gTrackEncoderLog, type, msg) + +constexpr int DEFAULT_CHANNELS = 1; +constexpr int DEFAULT_FRAME_WIDTH = 640; +constexpr int DEFAULT_FRAME_HEIGHT = 480; +constexpr int DEFAULT_FRAME_RATE = 30; +// 10 second threshold if the audio encoder cannot be initialized. +constexpr int AUDIO_INIT_FAILED_DURATION = 10; +// 30 second threshold if the video encoder cannot be initialized. +constexpr int VIDEO_INIT_FAILED_DURATION = 30; +constexpr int FRAMERATE_DETECTION_ROLLING_WINDOW = 3; +constexpr size_t FRAMERATE_DETECTION_MIN_CHUNKS = 5; +constexpr int FRAMERATE_DETECTION_MAX_DURATION_S = 6; + +TrackEncoder::TrackEncoder(TrackRate aTrackRate, + MediaQueue<EncodedFrame>& aEncodedDataQueue) + : mInitialized(false), + mStarted(false), + mEndOfStream(false), + mCanceled(false), + mInitCounter(0), + mSuspended(false), + mTrackRate(aTrackRate), + mEncodedDataQueue(aEncodedDataQueue) {} + +bool TrackEncoder::IsInitialized() { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + return mInitialized; +} + +bool TrackEncoder::IsStarted() { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + return mStarted; +} + +bool TrackEncoder::IsEncodingComplete() const { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + return mEncodedDataQueue.IsFinished(); +} + +void TrackEncoder::SetInitialized() { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + + if (mInitialized) { + return; + } + + mInitialized = true; + + for (auto& l : mListeners.Clone()) { + l->Initialized(this); + } +} + +void TrackEncoder::SetStarted() { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + + if (mStarted) { + return; + } + + mStarted = true; + + for (auto& l : mListeners.Clone()) { + l->Started(this); + } +} + +void TrackEncoder::OnError() { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + + Cancel(); + + for (auto& l : mListeners.Clone()) { + l->Error(this); + } +} + +void TrackEncoder::RegisterListener(TrackEncoderListener* aListener) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + MOZ_ASSERT(!mListeners.Contains(aListener)); + mListeners.AppendElement(aListener); +} + +bool TrackEncoder::UnregisterListener(TrackEncoderListener* aListener) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + return mListeners.RemoveElement(aListener); +} + +void TrackEncoder::SetWorkerThread(AbstractThread* aWorkerThread) { + mWorkerThread = aWorkerThread; +} + +void AudioTrackEncoder::Suspend() { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + TRACK_LOG(LogLevel::Info, ("[AudioTrackEncoder %p]: Suspend(), was %s", this, + mSuspended ? "suspended" : "live")); + + if (mSuspended) { + return; + } + + mSuspended = true; +} + +void AudioTrackEncoder::Resume() { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + TRACK_LOG(LogLevel::Info, ("[AudioTrackEncoder %p]: Resume(), was %s", this, + mSuspended ? "suspended" : "live")); + + if (!mSuspended) { + return; + } + + mSuspended = false; +} + +void AudioTrackEncoder::AppendAudioSegment(AudioSegment&& aSegment) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + AUTO_PROFILER_LABEL("AudioTrackEncoder::AppendAudioSegment", OTHER); + TRACK_LOG(LogLevel::Verbose, + ("[AudioTrackEncoder %p]: AppendAudioSegment() duration=%" PRIu64, + this, aSegment.GetDuration())); + + if (mCanceled) { + return; + } + + if (mEndOfStream) { + return; + } + + TryInit(mOutgoingBuffer, aSegment.GetDuration()); + + if (mSuspended) { + return; + } + + SetStarted(); + mOutgoingBuffer.AppendFrom(&aSegment); + + if (!mInitialized) { + return; + } + + if (NS_FAILED(Encode(&mOutgoingBuffer))) { + OnError(); + return; + } + + MOZ_ASSERT_IF(IsEncodingComplete(), mOutgoingBuffer.IsEmpty()); +} + +void AudioTrackEncoder::TryInit(const AudioSegment& aSegment, + TrackTime aDuration) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + + if (mInitialized) { + return; + } + + mInitCounter++; + TRACK_LOG(LogLevel::Debug, + ("[AudioTrackEncoder %p]: Inited the audio encoder %d times", this, + mInitCounter)); + + for (AudioSegment::ConstChunkIterator iter(aSegment); !iter.IsEnded(); + iter.Next()) { + // The number of channels is determined by the first non-null chunk, and + // thus the audio encoder is initialized at this time. + if (iter->IsNull()) { + continue; + } + + nsresult rv = Init(iter->mChannelData.Length()); + + if (NS_SUCCEEDED(rv)) { + TRACK_LOG(LogLevel::Info, + ("[AudioTrackEncoder %p]: Successfully initialized!", this)); + return; + } else { + TRACK_LOG( + LogLevel::Error, + ("[AudioTrackEncoder %p]: Failed to initialize the encoder!", this)); + OnError(); + return; + } + break; + } + + mNotInitDuration += aDuration; + if (!mInitialized && + ((mNotInitDuration - 1) / mTrackRate >= AUDIO_INIT_FAILED_DURATION) && + mInitCounter > 1) { + // Perform a best effort initialization since we haven't gotten any + // data yet. Motivated by issues like Bug 1336367 + TRACK_LOG(LogLevel::Warning, + ("[AudioTrackEncoder]: Initialize failed for %ds. Attempting to " + "init with %d (default) channels!", + AUDIO_INIT_FAILED_DURATION, DEFAULT_CHANNELS)); + nsresult rv = Init(DEFAULT_CHANNELS); + if (NS_FAILED(rv)) { + TRACK_LOG(LogLevel::Error, + ("[AudioTrackEncoder %p]: Default-channel-init failed.", this)); + OnError(); + return; + } + } +} + +void AudioTrackEncoder::Cancel() { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + TRACK_LOG(LogLevel::Info, ("[AudioTrackEncoder %p]: Cancel()", this)); + mCanceled = true; + mEndOfStream = true; + mOutgoingBuffer.Clear(); + mEncodedDataQueue.Finish(); +} + +void AudioTrackEncoder::NotifyEndOfStream() { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + TRACK_LOG(LogLevel::Info, + ("[AudioTrackEncoder %p]: NotifyEndOfStream()", this)); + + if (!mCanceled && !mInitialized) { + // If source audio track is completely silent till the end of encoding, + // initialize the encoder with a default channel count. + Init(DEFAULT_CHANNELS); + } + + if (mEndOfStream) { + return; + } + + mEndOfStream = true; + + if (NS_FAILED(Encode(&mOutgoingBuffer))) { + mOutgoingBuffer.Clear(); + OnError(); + } + + MOZ_ASSERT(mOutgoingBuffer.GetDuration() == 0); +} + +/*static*/ +void AudioTrackEncoder::InterleaveTrackData(AudioChunk& aChunk, + int32_t aDuration, + uint32_t aOutputChannels, + AudioDataValue* aOutput) { + uint32_t numChannelsToCopy = std::min( + aOutputChannels, static_cast<uint32_t>(aChunk.mChannelData.Length())); + switch (aChunk.mBufferFormat) { + case AUDIO_FORMAT_S16: { + AutoTArray<const int16_t*, 2> array; + array.SetLength(numChannelsToCopy); + for (uint32_t i = 0; i < array.Length(); i++) { + array[i] = static_cast<const int16_t*>(aChunk.mChannelData[i]); + } + InterleaveTrackData(array, aDuration, aOutputChannels, aOutput, + aChunk.mVolume); + break; + } + case AUDIO_FORMAT_FLOAT32: { + AutoTArray<const float*, 2> array; + array.SetLength(numChannelsToCopy); + for (uint32_t i = 0; i < array.Length(); i++) { + array[i] = static_cast<const float*>(aChunk.mChannelData[i]); + } + InterleaveTrackData(array, aDuration, aOutputChannels, aOutput, + aChunk.mVolume); + break; + } + case AUDIO_FORMAT_SILENCE: { + MOZ_ASSERT(false, "To implement."); + } + }; +} + +/*static*/ +void AudioTrackEncoder::DeInterleaveTrackData(AudioDataValue* aInput, + int32_t aDuration, + int32_t aChannels, + AudioDataValue* aOutput) { + for (int32_t i = 0; i < aChannels; ++i) { + for (int32_t j = 0; j < aDuration; ++j) { + aOutput[i * aDuration + j] = aInput[i + j * aChannels]; + } + } +} + +size_t AudioTrackEncoder::SizeOfExcludingThis( + mozilla::MallocSizeOf aMallocSizeOf) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + return mOutgoingBuffer.SizeOfExcludingThis(aMallocSizeOf); +} + +VideoTrackEncoder::VideoTrackEncoder( + RefPtr<DriftCompensator> aDriftCompensator, TrackRate aTrackRate, + MediaQueue<EncodedFrame>& aEncodedDataQueue, + FrameDroppingMode aFrameDroppingMode) + : TrackEncoder(aTrackRate, aEncodedDataQueue), + mDriftCompensator(std::move(aDriftCompensator)), + mEncodedTicks(0), + mVideoBitrate(0), + mFrameDroppingMode(aFrameDroppingMode), + mEnabled(true) { + mLastChunk.mDuration = 0; +} + +void VideoTrackEncoder::Suspend(const TimeStamp& aTime) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + TRACK_LOG(LogLevel::Info, + ("[VideoTrackEncoder %p]: Suspend() at %.3fs, was %s", this, + mStartTime.IsNull() ? 0.0 : (aTime - mStartTime).ToSeconds(), + mSuspended ? "suspended" : "live")); + + if (mSuspended) { + return; + } + + mSuspended = true; + mSuspendTime = aTime; +} + +void VideoTrackEncoder::Resume(const TimeStamp& aTime) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + + if (!mSuspended) { + return; + } + + TRACK_LOG( + LogLevel::Info, + ("[VideoTrackEncoder %p]: Resume() after %.3fs, was %s", this, + (aTime - mSuspendTime).ToSeconds(), mSuspended ? "suspended" : "live")); + + mSuspended = false; + + TimeDuration suspendDuration = aTime - mSuspendTime; + if (!mLastChunk.mTimeStamp.IsNull()) { + VideoChunk* nextChunk = mIncomingBuffer.FindChunkContaining(aTime); + MOZ_ASSERT_IF(nextChunk, nextChunk->mTimeStamp <= aTime); + if (nextChunk) { + nextChunk->mTimeStamp = aTime; + } + mLastChunk.mTimeStamp += suspendDuration; + } + if (!mStartTime.IsNull()) { + mStartTime += suspendDuration; + } + + mSuspendTime = TimeStamp(); +} + +void VideoTrackEncoder::Disable(const TimeStamp& aTime) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + TRACK_LOG(LogLevel::Debug, ("[VideoTrackEncoder %p]: Disable()", this)); + + if (mStartTime.IsNull()) { + // We haven't started yet. No need to touch future frames. + mEnabled = false; + return; + } + + // Advancing currentTime to process any frames in mIncomingBuffer between + // mCurrentTime and aTime. + AdvanceCurrentTime(aTime); + if (!mLastChunk.mTimeStamp.IsNull()) { + // Insert a black frame at t=aTime into mIncomingBuffer, to trigger the + // shift to black at the right moment. + VideoSegment tempSegment; + tempSegment.AppendFrom(&mIncomingBuffer); + mIncomingBuffer.AppendFrame(do_AddRef(mLastChunk.mFrame.GetImage()), + mLastChunk.mFrame.GetIntrinsicSize(), + mLastChunk.mFrame.GetPrincipalHandle(), true, + aTime); + mIncomingBuffer.AppendFrom(&tempSegment); + } + mEnabled = false; +} + +void VideoTrackEncoder::Enable(const TimeStamp& aTime) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + TRACK_LOG(LogLevel::Debug, ("[VideoTrackEncoder %p]: Enable()", this)); + + if (mStartTime.IsNull()) { + // We haven't started yet. No need to touch future frames. + mEnabled = true; + return; + } + + // Advancing currentTime to process any frames in mIncomingBuffer between + // mCurrentTime and aTime. + AdvanceCurrentTime(aTime); + if (!mLastChunk.mTimeStamp.IsNull()) { + // Insert a real frame at t=aTime into mIncomingBuffer, to trigger the + // shift from black at the right moment. + VideoSegment tempSegment; + tempSegment.AppendFrom(&mIncomingBuffer); + mIncomingBuffer.AppendFrame(do_AddRef(mLastChunk.mFrame.GetImage()), + mLastChunk.mFrame.GetIntrinsicSize(), + mLastChunk.mFrame.GetPrincipalHandle(), + mLastChunk.mFrame.GetForceBlack(), aTime); + mIncomingBuffer.AppendFrom(&tempSegment); + } + mEnabled = true; +} + +void VideoTrackEncoder::AppendVideoSegment(VideoSegment&& aSegment) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + TRACK_LOG(LogLevel::Verbose, + ("[VideoTrackEncoder %p]: AppendVideoSegment()", this)); + + if (mCanceled) { + return; + } + + if (mEndOfStream) { + return; + } + + for (VideoSegment::ConstChunkIterator iter(aSegment); !iter.IsEnded(); + iter.Next()) { + if (iter->IsNull()) { + // A null image was sent. This is a signal from the source that we should + // clear any images buffered in the future. + mIncomingBuffer.Clear(); + continue; // Don't append iter, as it is null. + } + if (VideoChunk* c = mIncomingBuffer.GetLastChunk()) { + if (iter->mTimeStamp < c->mTimeStamp) { + // Time went backwards. This can happen when a MediaDecoder seeks. + // We need to handle this by removing any frames buffered in the future + // and start over at iter->mTimeStamp. + mIncomingBuffer.Clear(); + } + } + SetStarted(); + mIncomingBuffer.AppendFrame(do_AddRef(iter->mFrame.GetImage()), + iter->mFrame.GetIntrinsicSize(), + iter->mFrame.GetPrincipalHandle(), + iter->mFrame.GetForceBlack(), iter->mTimeStamp); + } + aSegment.Clear(); +} + +void VideoTrackEncoder::Init(const VideoSegment& aSegment, + const TimeStamp& aTime, + size_t aFrameRateDetectionMinChunks) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + MOZ_ASSERT(!aTime.IsNull()); + + if (mInitialized) { + return; + } + + mInitCounter++; + TRACK_LOG(LogLevel::Debug, + ("[VideoTrackEncoder %p]: Init the video encoder %d times", this, + mInitCounter)); + + Maybe<float> framerate; + if (!aSegment.IsEmpty()) { + // The number of whole frames, i.e., with known duration. + size_t frameCount = 0; + RollingMean<TimeDuration, TimeDuration> meanDuration( + FRAMERATE_DETECTION_ROLLING_WINDOW); + VideoSegment::ConstChunkIterator iter(aSegment); + TimeStamp previousChunkTime = iter->mTimeStamp; + iter.Next(); + for (; !iter.IsEnded(); iter.Next(), ++frameCount) { + meanDuration.insert(iter->mTimeStamp - previousChunkTime); + previousChunkTime = iter->mTimeStamp; + } + TRACK_LOG(LogLevel::Debug, ("[VideoTrackEncoder %p]: Init() frameCount=%zu", + this, frameCount)); + if (frameCount >= aFrameRateDetectionMinChunks) { + if (meanDuration.empty()) { + // No whole frames available, use aTime as end time. + framerate = Some(1.0f / (aTime - mStartTime).ToSeconds()); + } else { + // We want some frames for estimating the framerate. + framerate = Some(1.0f / meanDuration.mean().ToSeconds()); + } + } else if ((aTime - mStartTime).ToSeconds() > + FRAMERATE_DETECTION_MAX_DURATION_S) { + // Instead of failing init after the fail-timeout, we fallback to a very + // low rate. + framerate = Some(static_cast<float>(frameCount) / + (aTime - mStartTime).ToSeconds()); + } + } + + if (framerate) { + for (VideoSegment::ConstChunkIterator iter(aSegment); !iter.IsEnded(); + iter.Next()) { + if (iter->IsNull()) { + continue; + } + + gfx::IntSize imgsize = iter->mFrame.GetImage()->GetSize(); + gfx::IntSize intrinsicSize = iter->mFrame.GetIntrinsicSize(); + nsresult rv = Init(imgsize.width, imgsize.height, intrinsicSize.width, + intrinsicSize.height, *framerate); + + if (NS_SUCCEEDED(rv)) { + TRACK_LOG(LogLevel::Info, + ("[VideoTrackEncoder %p]: Successfully initialized!", this)); + return; + } + + TRACK_LOG( + LogLevel::Error, + ("[VideoTrackEncoder %p]: Failed to initialize the encoder!", this)); + OnError(); + break; + } + } + + if (((aTime - mStartTime).ToSeconds() > VIDEO_INIT_FAILED_DURATION) && + mInitCounter > 1) { + TRACK_LOG(LogLevel::Warning, + ("[VideoTrackEncoder %p]: No successful init for %ds.", this, + VIDEO_INIT_FAILED_DURATION)); + OnError(); + return; + } +} + +void VideoTrackEncoder::Cancel() { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + TRACK_LOG(LogLevel::Info, ("[VideoTrackEncoder %p]: Cancel()", this)); + mCanceled = true; + mEndOfStream = true; + mIncomingBuffer.Clear(); + mOutgoingBuffer.Clear(); + mLastChunk.SetNull(0); + mEncodedDataQueue.Finish(); +} + +void VideoTrackEncoder::NotifyEndOfStream() { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + + if (mCanceled) { + return; + } + + if (mEndOfStream) { + // We have already been notified. + return; + } + + mEndOfStream = true; + TRACK_LOG(LogLevel::Info, + ("[VideoTrackEncoder %p]: NotifyEndOfStream()", this)); + + if (!mLastChunk.IsNull()) { + RefPtr<layers::Image> lastImage = mLastChunk.mFrame.GetImage(); + const TimeStamp now = TimeStamp::Now(); + TimeStamp currentTime = mSuspended ? mSuspendTime : mCurrentTime; + currentTime = mDriftCompensator->GetVideoTime(now, currentTime); + TimeDuration absoluteEndTime = currentTime - mStartTime; + CheckedInt64 duration = + UsecsToFrames(absoluteEndTime.ToMicroseconds(), mTrackRate) - + mEncodedTicks; + if (duration.isValid() && duration.value() > 0) { + mEncodedTicks += duration.value(); + TRACK_LOG(LogLevel::Debug, + ("[VideoTrackEncoder %p]: Appending last video frame %p at pos " + "%.3fs, " + "track-end=%.3fs", + this, lastImage.get(), + (mLastChunk.mTimeStamp - mStartTime).ToSeconds(), + absoluteEndTime.ToSeconds())); + mOutgoingBuffer.AppendFrame( + lastImage.forget(), mLastChunk.mFrame.GetIntrinsicSize(), + PRINCIPAL_HANDLE_NONE, mLastChunk.mFrame.GetForceBlack() || !mEnabled, + mLastChunk.mTimeStamp); + mOutgoingBuffer.ExtendLastFrameBy(duration.value()); + } + + if (!mInitialized) { + // Try to init without waiting for an accurate framerate. + Init(mOutgoingBuffer, currentTime, 0); + } + } + + if (mCanceled) { + // Previous Init failed and we got canceled. Nothing to do here. + return; + } + + mIncomingBuffer.Clear(); + mLastChunk.SetNull(0); + + if (NS_WARN_IF(!mInitialized)) { + // Still not initialized. There was probably no real frame at all, perhaps + // by muting. Initialize the encoder with default frame width, frame + // height, and frame rate. + Init(DEFAULT_FRAME_WIDTH, DEFAULT_FRAME_HEIGHT, DEFAULT_FRAME_WIDTH, + DEFAULT_FRAME_HEIGHT, DEFAULT_FRAME_RATE); + } + + if (NS_FAILED(Encode(&mOutgoingBuffer))) { + OnError(); + } + + MOZ_ASSERT(mOutgoingBuffer.IsEmpty()); +} + +void VideoTrackEncoder::SetStartOffset(const TimeStamp& aStartOffset) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + MOZ_ASSERT(mCurrentTime.IsNull()); + TRACK_LOG(LogLevel::Info, ("[VideoTrackEncoder %p]: SetStartOffset()", this)); + mStartTime = aStartOffset; + mCurrentTime = aStartOffset; +} + +void VideoTrackEncoder::AdvanceCurrentTime(const TimeStamp& aTime) { + AUTO_PROFILER_LABEL("VideoTrackEncoder::AdvanceCurrentTime", OTHER); + + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + MOZ_ASSERT(!mStartTime.IsNull()); + MOZ_ASSERT(!mCurrentTime.IsNull()); + + if (mCanceled) { + return; + } + + if (mEndOfStream) { + return; + } + + if (mSuspended) { + TRACK_LOG( + LogLevel::Verbose, + ("[VideoTrackEncoder %p]: AdvanceCurrentTime() suspended at %.3fs", + this, (mCurrentTime - mStartTime).ToSeconds())); + mCurrentTime = aTime; + mIncomingBuffer.ForgetUpToTime(mCurrentTime); + return; + } + + TRACK_LOG(LogLevel::Verbose, + ("[VideoTrackEncoder %p]: AdvanceCurrentTime() to %.3fs", this, + (aTime - mStartTime).ToSeconds())); + + // Grab frames within the currentTime range from the incoming buffer. + VideoSegment tempSegment; + { + VideoChunk* previousChunk = &mLastChunk; + auto appendDupes = [&](const TimeStamp& aUpTo) { + while ((aUpTo - previousChunk->mTimeStamp).ToSeconds() > 1.0) { + // We encode at least one frame per second, even if there are none + // flowing. + previousChunk->mTimeStamp += TimeDuration::FromSeconds(1.0); + tempSegment.AppendFrame( + do_AddRef(previousChunk->mFrame.GetImage()), + previousChunk->mFrame.GetIntrinsicSize(), + previousChunk->mFrame.GetPrincipalHandle(), + previousChunk->mFrame.GetForceBlack() || !mEnabled, + previousChunk->mTimeStamp); + TRACK_LOG( + LogLevel::Verbose, + ("[VideoTrackEncoder %p]: Duplicating video frame (%p) at pos %.3f", + this, previousChunk->mFrame.GetImage(), + (previousChunk->mTimeStamp - mStartTime).ToSeconds())); + } + }; + for (VideoSegment::ChunkIterator iter(mIncomingBuffer); !iter.IsEnded(); + iter.Next()) { + MOZ_ASSERT(!iter->IsNull()); + if (!previousChunk->IsNull() && + iter->mTimeStamp <= previousChunk->mTimeStamp) { + // This frame starts earlier than previousChunk. Skip. + continue; + } + if (iter->mTimeStamp >= aTime) { + // This frame starts in the future. Stop. + break; + } + if (!previousChunk->IsNull()) { + appendDupes(iter->mTimeStamp); + } + tempSegment.AppendFrame( + do_AddRef(iter->mFrame.GetImage()), iter->mFrame.GetIntrinsicSize(), + iter->mFrame.GetPrincipalHandle(), + iter->mFrame.GetForceBlack() || !mEnabled, iter->mTimeStamp); + TRACK_LOG(LogLevel::Verbose, + ("[VideoTrackEncoder %p]: Taking video frame (%p) at pos %.3f", + this, iter->mFrame.GetImage(), + (iter->mTimeStamp - mStartTime).ToSeconds())); + previousChunk = &*iter; + } + if (!previousChunk->IsNull()) { + appendDupes(aTime); + } + } + mCurrentTime = aTime; + mIncomingBuffer.ForgetUpToTime(mCurrentTime); + + // Convert tempSegment timestamps to durations and add chunks with known + // duration to mOutgoingBuffer. + const TimeStamp now = TimeStamp::Now(); + for (VideoSegment::ConstChunkIterator iter(tempSegment); !iter.IsEnded(); + iter.Next()) { + VideoChunk chunk = *iter; + + if (mLastChunk.mTimeStamp.IsNull()) { + // This is the first real chunk in the track. Make it start at the + // beginning of the track. + MOZ_ASSERT(!iter->mTimeStamp.IsNull()); + + TRACK_LOG( + LogLevel::Verbose, + ("[VideoTrackEncoder %p]: Got the first video frame (%p) at pos %.3f " + "(moving it to beginning)", + this, iter->mFrame.GetImage(), + (iter->mTimeStamp - mStartTime).ToSeconds())); + + mLastChunk = *iter; + mLastChunk.mTimeStamp = mStartTime; + continue; + } + + MOZ_ASSERT(!mLastChunk.IsNull()); + MOZ_ASSERT(!chunk.IsNull()); + + TimeDuration absoluteEndTime = + mDriftCompensator->GetVideoTime(now, chunk.mTimeStamp) - mStartTime; + TRACK_LOG(LogLevel::Verbose, + ("[VideoTrackEncoder %p]: Appending video frame %p, at pos %.3fs " + "until %.3fs", + this, mLastChunk.mFrame.GetImage(), + (mDriftCompensator->GetVideoTime(now, mLastChunk.mTimeStamp) - + mStartTime) + .ToSeconds(), + absoluteEndTime.ToSeconds())); + CheckedInt64 duration = + UsecsToFrames(absoluteEndTime.ToMicroseconds(), mTrackRate) - + mEncodedTicks; + if (!duration.isValid()) { + NS_ERROR("Duration overflow"); + return; + } + + if (duration.value() <= 0) { + // A frame either started before the last frame (can happen when + // multiple frames are added before SetStartOffset), or + // two frames were so close together that they ended up at the same + // position. We handle both cases by ignoring the previous frame. + + TRACK_LOG(LogLevel::Verbose, + ("[VideoTrackEncoder %p]: Duration from frame %p to frame %p " + "is %" PRId64 ". Ignoring %p", + this, mLastChunk.mFrame.GetImage(), iter->mFrame.GetImage(), + duration.value(), mLastChunk.mFrame.GetImage())); + + TimeStamp t = mLastChunk.mTimeStamp; + mLastChunk = *iter; + mLastChunk.mTimeStamp = t; + continue; + } + + mEncodedTicks += duration.value(); + mOutgoingBuffer.AppendFrame( + do_AddRef(mLastChunk.mFrame.GetImage()), + mLastChunk.mFrame.GetIntrinsicSize(), PRINCIPAL_HANDLE_NONE, + mLastChunk.mFrame.GetForceBlack() || !mEnabled, mLastChunk.mTimeStamp); + mOutgoingBuffer.ExtendLastFrameBy(duration.value()); + mLastChunk = chunk; + } + + if (mOutgoingBuffer.IsEmpty()) { + return; + } + + Init(mOutgoingBuffer, mCurrentTime, FRAMERATE_DETECTION_MIN_CHUNKS); + + if (!mInitialized) { + return; + } + + if (NS_FAILED(Encode(&mOutgoingBuffer))) { + OnError(); + return; + } + + MOZ_ASSERT(mOutgoingBuffer.IsEmpty()); +} + +size_t VideoTrackEncoder::SizeOfExcludingThis( + mozilla::MallocSizeOf aMallocSizeOf) { + MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn()); + return mIncomingBuffer.SizeOfExcludingThis(aMallocSizeOf) + + mOutgoingBuffer.SizeOfExcludingThis(aMallocSizeOf); +} + +} // namespace mozilla + +#undef TRACK_LOG diff --git a/dom/media/encoder/TrackEncoder.h b/dom/media/encoder/TrackEncoder.h new file mode 100644 index 0000000000..879949874f --- /dev/null +++ b/dom/media/encoder/TrackEncoder.h @@ -0,0 +1,501 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef TrackEncoder_h_ +#define TrackEncoder_h_ + +#include "AudioSegment.h" +#include "EncodedFrame.h" +#include "MediaQueue.h" +#include "MediaTrackGraph.h" +#include "TrackMetadataBase.h" +#include "VideoSegment.h" + +namespace mozilla { + +class AbstractThread; +class DriftCompensator; +class TrackEncoder; + +class TrackEncoderListener { + public: + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(TrackEncoderListener) + + /** + * Called when the TrackEncoder has received its first real data. + */ + virtual void Started(TrackEncoder* aEncoder) = 0; + + /** + * Called when the TrackEncoder's underlying encoder has been successfully + * initialized and there's non-null data ready to be encoded. + */ + virtual void Initialized(TrackEncoder* aEncoder) = 0; + + /** + * Called after the TrackEncoder hit an unexpected error, causing it to + * abort operation. + */ + virtual void Error(TrackEncoder* aEncoder) = 0; + + protected: + virtual ~TrackEncoderListener() = default; +}; + +/** + * Base class of AudioTrackEncoder and VideoTrackEncoder. Lifetime managed by + * MediaEncoder. All methods are to be called only on the worker thread. + * + * The control APIs are all called by MediaEncoder on its dedicated thread. Data + * is encoded as soon as it has been appended (and time has advanced past its + * end in case of video) and pushed to mEncodedDataQueue. + */ +class TrackEncoder { + public: + TrackEncoder(TrackRate aTrackRate, + MediaQueue<EncodedFrame>& aEncodedDataQueue); + + /** + * Called by MediaEncoder to cancel the encoding. + */ + virtual void Cancel() = 0; + + /** + * Notifies us that we have reached the end of the stream and no more data + * will be appended. + */ + virtual void NotifyEndOfStream() = 0; + + /** + * Creates and sets up meta data for a specific codec, called on the worker + * thread. + */ + virtual already_AddRefed<TrackMetadataBase> GetMetadata() = 0; + + /** + * MediaQueue containing encoded data, that is pushed as soon as it's ready. + */ + MediaQueue<EncodedFrame>& EncodedDataQueue() { return mEncodedDataQueue; } + + /** + * Returns true once this TrackEncoder is initialized. + */ + bool IsInitialized(); + + /** + * Returns true once this TrackEncoder has received some data. + */ + bool IsStarted(); + + /** + * True if the track encoder has encoded all source segments coming from + * MediaTrackGraph. Call on the worker thread. + */ + bool IsEncodingComplete() const; + + /** + * Registers a listener to events from this TrackEncoder. + * We hold a strong reference to the listener. + */ + void RegisterListener(TrackEncoderListener* aListener); + + /** + * Unregisters a listener from events from this TrackEncoder. + * The listener will stop receiving events synchronously. + */ + bool UnregisterListener(TrackEncoderListener* aListener); + + virtual void SetBitrate(const uint32_t aBitrate) = 0; + + /** + * It's optional to set the worker thread, but if you do we'll assert that + * we are in the worker thread in every method that gets called. + */ + void SetWorkerThread(AbstractThread* aWorkerThread); + + /** + * Measure size of internal buffers. + */ + virtual size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) = 0; + + protected: + virtual ~TrackEncoder() { MOZ_ASSERT(mListeners.IsEmpty()); } + + /** + * If this TrackEncoder was not already initialized, it is set to initialized + * and listeners are notified. + */ + void SetInitialized(); + + /** + * If this TrackEncoder was not already marked started, its started state is + * set and listeners are notified. + */ + void SetStarted(); + + /** + * Called after an error. Cancels the encoding and notifies listeners. + */ + void OnError(); + + /** + * True if the track encoder has been initialized successfully. + */ + bool mInitialized; + + /** + * True if the track encoder has received data. + */ + bool mStarted; + + /** + * True once all data until the end of the input track has been received. + */ + bool mEndOfStream; + + /** + * True once this encoding has been cancelled. + */ + bool mCanceled; + + // How many times we have tried to initialize the encoder. + uint32_t mInitCounter; + + /** + * True if this TrackEncoder is currently suspended. + */ + bool mSuspended; + + /** + * The track rate of source media. + */ + const TrackRate mTrackRate; + + /** + * If set we assert that all methods are called on this thread. + */ + RefPtr<AbstractThread> mWorkerThread; + + /** + * MediaQueue where encoded data ends up. Note that metadata goes out of band. + */ + MediaQueue<EncodedFrame>& mEncodedDataQueue; + + nsTArray<RefPtr<TrackEncoderListener>> mListeners; +}; + +class AudioTrackEncoder : public TrackEncoder { + public: + AudioTrackEncoder(TrackRate aTrackRate, + MediaQueue<EncodedFrame>& aEncodedDataQueue) + : TrackEncoder(aTrackRate, aEncodedDataQueue), + mChannels(0), + mNotInitDuration(0), + mAudioBitrate(0) {} + + /** + * Suspends encoding from now, i.e., all future audio data received through + * AppendAudioSegment() until the next Resume() will be dropped. + */ + void Suspend(); + + /** + * Resumes encoding starting now, i.e., data from the next + * AppendAudioSegment() will get encoded. + */ + void Resume(); + + /** + * Appends and consumes track data from aSegment. + */ + void AppendAudioSegment(AudioSegment&& aSegment); + + template <typename T> + static void InterleaveTrackData(nsTArray<const T*>& aInput, int32_t aDuration, + uint32_t aOutputChannels, + AudioDataValue* aOutput, float aVolume) { + if (aInput.Length() < aOutputChannels) { + // Up-mix. This might make the mChannelData have more than aChannels. + AudioChannelsUpMix(&aInput, aOutputChannels, + SilentChannel::ZeroChannel<T>()); + } + + if (aInput.Length() > aOutputChannels) { + DownmixAndInterleave(aInput, aDuration, aVolume, aOutputChannels, + aOutput); + } else { + InterleaveAndConvertBuffer(aInput.Elements(), aDuration, aVolume, + aOutputChannels, aOutput); + } + } + + /** + * Interleaves the track data and stores the result into aOutput. Might need + * to up-mix or down-mix the channel data if the channels number of this chunk + * is different from aOutputChannels. The channel data from aChunk might be + * modified by up-mixing. + */ + static void InterleaveTrackData(AudioChunk& aChunk, int32_t aDuration, + uint32_t aOutputChannels, + AudioDataValue* aOutput); + + /** + * De-interleaves the aInput data and stores the result into aOutput. + * No up-mix or down-mix operations inside. + */ + static void DeInterleaveTrackData(AudioDataValue* aInput, int32_t aDuration, + int32_t aChannels, AudioDataValue* aOutput); + + /** + * Measure size of internal buffers. + */ + size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) override; + + void SetBitrate(const uint32_t aBitrate) override { + mAudioBitrate = aBitrate; + } + + /** + * Tries to initiate the AudioEncoder based on data in aSegment. + * This can be re-called often, as it will exit early should we already be + * initiated. mInitiated will only be set if there was enough data in + * aSegment to infer metadata. If mInitiated gets set, listeners are notified. + * + * Not having enough data in aSegment to initiate the encoder for an + * accumulated aDuration of one second will make us initiate with a default + * number of channels. + * + * If we attempt to initiate the underlying encoder but fail, we Cancel() and + * notify listeners. + */ + void TryInit(const AudioSegment& aSegment, TrackTime aDuration); + + void Cancel() override; + + /** + * Dispatched from MediaTrackGraph when we have finished feeding data to + * mOutgoingBuffer. + */ + void NotifyEndOfStream() override; + + protected: + /** + * Number of samples per channel in a pcm buffer. This is also the value of + * frame size required by audio encoder, and listeners will be notified when + * at least this much data has been added to mOutgoingBuffer. + */ + virtual int NumInputFramesPerPacket() const { return 0; } + + /** + * Initializes the audio encoder. The call of this method is delayed until we + * have received the first valid track from MediaTrackGraph. + */ + virtual nsresult Init(int aChannels) = 0; + + /** + * Encodes buffered data and pushes it to mEncodedDataQueue. + */ + virtual nsresult Encode(AudioSegment* aSegment) = 0; + + /** + * The number of channels are used for processing PCM data in the audio + * encoder. This value comes from the first valid audio chunk. If encoder + * can't support the channels in the chunk, downmix PCM stream can be + * performed. This value also be used to initialize the audio encoder. + */ + int mChannels; + + /** + * A segment queue of outgoing audio track data to the encoder. + * The contents of mOutgoingBuffer will always be what has been appended on + * the encoder thread but not yet consumed by the encoder sub class. + */ + AudioSegment mOutgoingBuffer; + + TrackTime mNotInitDuration; + + uint32_t mAudioBitrate; +}; + +enum class FrameDroppingMode { + ALLOW, // Allowed to drop frames to keep up under load + DISALLOW, // Must not drop any frames, even if it means we will OOM +}; + +class VideoTrackEncoder : public TrackEncoder { + public: + VideoTrackEncoder(RefPtr<DriftCompensator> aDriftCompensator, + TrackRate aTrackRate, + MediaQueue<EncodedFrame>& aEncodedDataQueue, + FrameDroppingMode aFrameDroppingMode); + + /** + * Suspends encoding from aTime, i.e., all video frame with a timestamp + * between aTime and the timestamp of the next Resume() will be dropped. + */ + void Suspend(const TimeStamp& aTime); + + /** + * Resumes encoding starting at aTime. + */ + void Resume(const TimeStamp& aTime); + + /** + * Makes the video black from aTime. + */ + void Disable(const TimeStamp& aTime); + + /** + * Makes the video non-black from aTime. + * + * NB that it could still be forced black for other reasons, like principals. + */ + void Enable(const TimeStamp& aTime); + + /** + * Appends source video frames to mIncomingBuffer. We only append the source + * chunk if the image is different from mLastChunk's image. Called on the + * MediaTrackGraph thread. + */ + void AppendVideoSegment(VideoSegment&& aSegment); + + /** + * Measure size of internal buffers. + */ + size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) override; + + void SetBitrate(const uint32_t aBitrate) override { + mVideoBitrate = aBitrate; + } + + /** + * Tries to initiate the VideoEncoder based on data in aSegment. + * This can be re-called often, as it will exit early should we already be + * initiated. mInitiated will only be set if there was enough data in + * aSegment to infer metadata. If mInitiated gets set, listeners are notified. + * The amount of chunks needed can be controlled by + * aFrameRateDetectionMinChunks which denotes the minimum number of chunks + * needed to infer the framerate. + * + * Failing to initiate the encoder for an accumulated aDuration of 30 seconds + * is seen as an error and will cancel the current encoding. + */ + void Init(const VideoSegment& aSegment, const TimeStamp& aTime, + size_t aFrameRateDetectionMinChunks); + + TrackTime SecondsToMediaTime(double aS) const { + NS_ASSERTION(0 <= aS && aS <= TRACK_TICKS_MAX / TRACK_RATE_MAX, + "Bad seconds"); + return mTrackRate * aS; + } + + /** + * MediaTrackGraph notifies us about the time of the track's start. + * This gets called on the MediaEncoder thread after a dispatch. + */ + void SetStartOffset(const TimeStamp& aStartOffset); + + void Cancel() override; + + /** + * Notifies us that we have reached the end of the stream and no more data + * will be appended to mIncomingBuffer. + */ + void NotifyEndOfStream() override; + + /** + * Dispatched from MediaTrackGraph when it has run an iteration so we can + * hand more data to the encoder. + */ + void AdvanceCurrentTime(const TimeStamp& aTime); + + protected: + /** + * Initialize the video encoder. In order to collect the value of width and + * height of source frames, this initialization is delayed until we have + * received the first valid video frame from MediaTrackGraph. + * Listeners will be notified after it has been successfully initialized. + */ + virtual nsresult Init(int32_t aWidth, int32_t aHeight, int32_t aDisplayWidth, + int32_t aDisplayHeight, float aEstimatedFrameRate) = 0; + + /** + * Encodes data in the outgoing buffer and pushes it to mEncodedDataQueue. + */ + virtual nsresult Encode(VideoSegment* aSegment) = 0; + + /** + * Drift compensator for re-clocking incoming video frame wall-clock + * timestamps to audio time. + */ + const RefPtr<DriftCompensator> mDriftCompensator; + + /** + * The last unique frame and duration so far handled by + * NotifyAdvanceCurrentTime. When a new frame is detected, mLastChunk is added + * to mOutgoingBuffer. + */ + VideoChunk mLastChunk; + + /** + * A segment queue of incoming video track data, from listeners. + * The duration of mIncomingBuffer is irrelevant as we only look at TimeStamps + * of frames. Consumed data is replaced by null data. + */ + VideoSegment mIncomingBuffer; + + /** + * A segment queue of outgoing video track data to the encoder. + * The contents of mOutgoingBuffer will always be what has been consumed from + * mIncomingBuffer (up to mCurrentTime) but not yet consumed by the encoder + * sub class. There won't be any null data at the beginning of mOutgoingBuffer + * unless explicitly pushed by the producer. + */ + VideoSegment mOutgoingBuffer; + + /** + * The number of mTrackRate ticks we have passed to mOutgoingBuffer. + */ + TrackTime mEncodedTicks; + + /** + * The time up to which we have forwarded data from mIncomingBuffer to + * mOutgoingBuffer. + */ + TimeStamp mCurrentTime; + + /** + * The time the video track started, so the start of the video track can be + * synced to the start of the audio track. + * + * Note that this time will progress during suspension, to make sure the + * incoming frames stay in sync with the output. + */ + TimeStamp mStartTime; + + /** + * The time Suspend was called on the MediaRecorder, so we can calculate the + * duration on the next Resume(). + */ + TimeStamp mSuspendTime; + + uint32_t mVideoBitrate; + + /** + * ALLOW to drop frames under load. + * DISALLOW to encode all frames, mainly for testing. + */ + FrameDroppingMode mFrameDroppingMode; + + /** + * True if the video MediaTrackTrack this VideoTrackEncoder is attached to is + * currently enabled. While false, we encode all frames as black. + */ + bool mEnabled; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/encoder/TrackMetadataBase.h b/dom/media/encoder/TrackMetadataBase.h new file mode 100644 index 0000000000..503b52e5ec --- /dev/null +++ b/dom/media/encoder/TrackMetadataBase.h @@ -0,0 +1,76 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef TrackMetadataBase_h_ +#define TrackMetadataBase_h_ + +#include "nsTArray.h" +#include "nsCOMPtr.h" +namespace mozilla { + +// A class represent meta data for various codec format. Only support one track +// information. +class TrackMetadataBase { + public: + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(TrackMetadataBase) + enum MetadataKind { + METADATA_OPUS, // Represent the Opus metadata + METADATA_VP8, + METADATA_VORBIS, + METADATA_AVC, + METADATA_AAC, + METADATA_AMR, + METADATA_EVRC, + METADATA_UNKNOWN // Metadata Kind not set + }; + // Return the specific metadata kind + virtual MetadataKind GetKind() const = 0; + + protected: + // Protected destructor, to discourage deletion outside of Release(): + virtual ~TrackMetadataBase() = default; +}; + +// The base class for audio metadata. +class AudioTrackMetadata : public TrackMetadataBase { + public: + // The duration of each sample set generated by encoder. (counted by samples) + // If the duration is variant, this value should return 0. + virtual uint32_t GetAudioFrameDuration() = 0; + + // The size of each sample set generated by encoder. (counted by byte) + // If the size is variant, this value should return 0. + virtual uint32_t GetAudioFrameSize() = 0; + + // AudioSampleRate is the number of audio sample per second. + virtual uint32_t GetAudioSampleRate() = 0; + + virtual uint32_t GetAudioChannels() = 0; +}; + +// The base class for video metadata. +class VideoTrackMetadata : public TrackMetadataBase { + public: + // VideoHeight and VideoWidth are the frame size of the elementary stream. + virtual uint32_t GetVideoHeight() = 0; + virtual uint32_t GetVideoWidth() = 0; + + // VideoDisplayHeight and VideoDisplayWidth are the display frame size. + virtual uint32_t GetVideoDisplayHeight() = 0; + virtual uint32_t GetVideoDisplayWidth() = 0; + + // VideoClockRate is the number of samples per second in video frame's + // timestamp. + // For example, if VideoClockRate is 90k Hz and VideoFrameRate is + // 30 fps, each frame's sample duration will be 3000 Hz. + virtual uint32_t GetVideoClockRate() = 0; + + // VideoFrameRate is numner of frames per second. + virtual uint32_t GetVideoFrameRate() = 0; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/encoder/VP8TrackEncoder.cpp b/dom/media/encoder/VP8TrackEncoder.cpp new file mode 100644 index 0000000000..6412592ed1 --- /dev/null +++ b/dom/media/encoder/VP8TrackEncoder.cpp @@ -0,0 +1,720 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "VP8TrackEncoder.h" + +#include "DriftCompensation.h" +#include "ImageToI420.h" +#include "mozilla/gfx/2D.h" +#include "prsystem.h" +#include "VideoSegment.h" +#include "VideoUtils.h" +#include "vpx/vp8cx.h" +#include "vpx/vpx_encoder.h" +#include "WebMWriter.h" +#include "mozilla/media/MediaUtils.h" +#include "mozilla/dom/ImageUtils.h" +#include "mozilla/dom/ImageBitmapBinding.h" +#include "mozilla/ProfilerLabels.h" + +namespace mozilla { + +LazyLogModule gVP8TrackEncoderLog("VP8TrackEncoder"); +#define VP8LOG(level, msg, ...) \ + MOZ_LOG(gVP8TrackEncoderLog, level, (msg, ##__VA_ARGS__)) + +constexpr int DEFAULT_BITRATE_BPS = 2500000; +constexpr int DEFAULT_KEYFRAME_INTERVAL_MS = 10000; +constexpr int DYNAMIC_MAXKFDIST_CHECK_INTERVAL = 5; +constexpr float DYNAMIC_MAXKFDIST_DIFFACTOR = 0.4; +constexpr float DYNAMIC_MAXKFDIST_KFINTERVAL_FACTOR = 0.75; +constexpr int I420_STRIDE_ALIGN = 16; + +using namespace mozilla::gfx; +using namespace mozilla::layers; +using namespace mozilla::media; +using namespace mozilla::dom; + +namespace { + +template <int N> +static int Aligned(int aValue) { + if (aValue < N) { + return N; + } + + // The `- 1` avoids overreaching when `aValue % N == 0`. + return (((aValue - 1) / N) + 1) * N; +} + +template <int Alignment> +size_t I420Size(int aWidth, int aHeight) { + int yStride = Aligned<Alignment>(aWidth); + int yHeight = aHeight; + size_t yPlaneSize = yStride * yHeight; + + int uvStride = Aligned<Alignment>((aWidth + 1) / 2); + int uvHeight = (aHeight + 1) / 2; + size_t uvPlaneSize = uvStride * uvHeight; + + return yPlaneSize + uvPlaneSize * 2; +} + +nsresult CreateEncoderConfig(int32_t aWidth, int32_t aHeight, + uint32_t aVideoBitrate, TrackRate aTrackRate, + int32_t aMaxKeyFrameDistance, + vpx_codec_enc_cfg_t* config) { + // Encoder configuration structure. + memset(config, 0, sizeof(vpx_codec_enc_cfg_t)); + if (vpx_codec_enc_config_default(vpx_codec_vp8_cx(), config, 0)) { + VP8LOG(LogLevel::Error, "Failed to get default configuration"); + return NS_ERROR_FAILURE; + } + + config->g_w = aWidth; + config->g_h = aHeight; + // TODO: Maybe we should have various aFrameRate bitrate pair for each + // devices? or for different platform + + // rc_target_bitrate needs kbit/s + config->rc_target_bitrate = std::max( + 1U, (aVideoBitrate != 0 ? aVideoBitrate : DEFAULT_BITRATE_BPS) / 1000); + + // Setting the time base of the codec + config->g_timebase.num = 1; + config->g_timebase.den = aTrackRate; + + // No error resilience as this is not intended for UDP transports + config->g_error_resilient = 0; + + // Allow some frame lagging for large timeslices (when low latency is not + // needed) + /*std::min(10U, mKeyFrameInterval / 200)*/ + config->g_lag_in_frames = 0; + + int32_t number_of_cores = PR_GetNumberOfProcessors(); + if (aWidth * aHeight > 1920 * 1080 && number_of_cores >= 8) { + config->g_threads = 4; // 4 threads for > 1080p. + } else if (aWidth * aHeight > 1280 * 960 && number_of_cores >= 6) { + config->g_threads = 3; // 3 threads for 1080p. + } else if (aWidth * aHeight > 640 * 480 && number_of_cores >= 3) { + config->g_threads = 2; // 2 threads for qHD/HD. + } else { + config->g_threads = 1; // 1 thread for VGA or less + } + + // rate control settings + + // No frame dropping + config->rc_dropframe_thresh = 0; + // Variable bitrate + config->rc_end_usage = VPX_VBR; + // Single pass encoding + config->g_pass = VPX_RC_ONE_PASS; + // ffmpeg doesn't currently support streams that use resize. + // Therefore, for safety, we should turn it off until it does. + config->rc_resize_allowed = 0; + // Allows 100% under target bitrate to compensate for prior overshoot + config->rc_undershoot_pct = 100; + // Allows 15% over target bitrate to compensate for prior undershoot + config->rc_overshoot_pct = 15; + // Tells the decoding application to buffer 500ms before beginning playback + config->rc_buf_initial_sz = 500; + // The decoding application will try to keep 600ms of buffer during playback + config->rc_buf_optimal_sz = 600; + // The decoding application may buffer 1000ms worth of encoded data + config->rc_buf_sz = 1000; + + // We set key frame interval to automatic and try to set kf_max_dist so that + // the encoder chooses to put keyframes slightly more often than + // mKeyFrameInterval (which will encode with VPX_EFLAG_FORCE_KF when reached). + config->kf_mode = VPX_KF_AUTO; + config->kf_max_dist = aMaxKeyFrameDistance; + + return NS_OK; +} +} // namespace + +VP8TrackEncoder::VP8TrackEncoder(RefPtr<DriftCompensator> aDriftCompensator, + TrackRate aTrackRate, + MediaQueue<EncodedFrame>& aEncodedDataQueue, + FrameDroppingMode aFrameDroppingMode, + Maybe<float> aKeyFrameIntervalFactor) + : VideoTrackEncoder(std::move(aDriftCompensator), aTrackRate, + aEncodedDataQueue, aFrameDroppingMode), + mKeyFrameInterval( + TimeDuration::FromMilliseconds(DEFAULT_KEYFRAME_INTERVAL_MS)), + mKeyFrameIntervalFactor(aKeyFrameIntervalFactor.valueOr( + DYNAMIC_MAXKFDIST_KFINTERVAL_FACTOR)) { + MOZ_COUNT_CTOR(VP8TrackEncoder); + CalculateMaxKeyFrameDistance().apply( + [&](auto aKfd) { SetMaxKeyFrameDistance(aKfd); }); +} + +VP8TrackEncoder::~VP8TrackEncoder() { + Destroy(); + MOZ_COUNT_DTOR(VP8TrackEncoder); +} + +void VP8TrackEncoder::Destroy() { + if (mInitialized) { + vpx_codec_destroy(&mVPXContext); + } + + mInitialized = false; +} + +Maybe<int32_t> VP8TrackEncoder::CalculateMaxKeyFrameDistance( + Maybe<float> aEstimatedFrameRate /* = Nothing() */) const { + if (!aEstimatedFrameRate && mMeanFrameDuration.empty()) { + // Not enough data to make a new calculation. + return Nothing(); + } + + // Calculate an estimation of our current framerate + const float estimatedFrameRate = aEstimatedFrameRate.valueOrFrom( + [&] { return 1.0f / mMeanFrameDuration.mean().ToSeconds(); }); + // Set a kf_max_dist that should avoid triggering the VPX_EFLAG_FORCE_KF flag + return Some(std::max( + 1, static_cast<int32_t>(estimatedFrameRate * mKeyFrameIntervalFactor * + mKeyFrameInterval.ToSeconds()))); +} + +void VP8TrackEncoder::SetMaxKeyFrameDistance(int32_t aMaxKeyFrameDistance) { + if (mInitialized) { + VP8LOG( + LogLevel::Debug, + "%p SetMaxKeyFrameDistance() set kf_max_dist to %d based on estimated " + "framerate %.2ffps keyframe-factor %.2f and keyframe-interval %.2fs", + this, aMaxKeyFrameDistance, 1 / mMeanFrameDuration.mean().ToSeconds(), + mKeyFrameIntervalFactor, mKeyFrameInterval.ToSeconds()); + DebugOnly<nsresult> rv = + Reconfigure(mFrameWidth, mFrameHeight, aMaxKeyFrameDistance); + MOZ_ASSERT( + NS_SUCCEEDED(rv), + "Reconfig for new key frame distance with proven size should succeed"); + } else { + VP8LOG(LogLevel::Debug, "%p SetMaxKeyFrameDistance() distance=%d", this, + aMaxKeyFrameDistance); + mMaxKeyFrameDistance = Some(aMaxKeyFrameDistance); + } +} + +nsresult VP8TrackEncoder::Init(int32_t aWidth, int32_t aHeight, + int32_t aDisplayWidth, int32_t aDisplayHeight, + float aEstimatedFrameRate) { + if (aDisplayWidth < 1 || aDisplayHeight < 1) { + return NS_ERROR_FAILURE; + } + + if (aEstimatedFrameRate <= 0) { + return NS_ERROR_FAILURE; + } + + int32_t maxKeyFrameDistance = + *CalculateMaxKeyFrameDistance(Some(aEstimatedFrameRate)); + + nsresult rv = InitInternal(aWidth, aHeight, maxKeyFrameDistance); + NS_ENSURE_SUCCESS(rv, rv); + + MOZ_ASSERT(!mI420Frame); + MOZ_ASSERT(mI420FrameSize == 0); + const size_t neededSize = I420Size<I420_STRIDE_ALIGN>(aWidth, aHeight); + mI420Frame.reset(new (fallible) uint8_t[neededSize]); + mI420FrameSize = mI420Frame ? neededSize : 0; + if (!mI420Frame) { + VP8LOG(LogLevel::Warning, "Allocating I420 frame of size %zu failed", + neededSize); + return NS_ERROR_FAILURE; + } + vpx_img_wrap(&mVPXImageWrapper, VPX_IMG_FMT_I420, aWidth, aHeight, + I420_STRIDE_ALIGN, mI420Frame.get()); + + if (!mMetadata) { + mMetadata = MakeAndAddRef<VP8Metadata>(); + mMetadata->mWidth = aWidth; + mMetadata->mHeight = aHeight; + mMetadata->mDisplayWidth = aDisplayWidth; + mMetadata->mDisplayHeight = aDisplayHeight; + + VP8LOG(LogLevel::Info, + "%p Init() created metadata. width=%d, height=%d, displayWidth=%d, " + "displayHeight=%d, framerate=%.2f", + this, mMetadata->mWidth, mMetadata->mHeight, + mMetadata->mDisplayWidth, mMetadata->mDisplayHeight, + aEstimatedFrameRate); + + SetInitialized(); + } + + return NS_OK; +} + +nsresult VP8TrackEncoder::InitInternal(int32_t aWidth, int32_t aHeight, + int32_t aMaxKeyFrameDistance) { + if (aWidth < 1 || aHeight < 1) { + return NS_ERROR_FAILURE; + } + + if (mInitialized) { + MOZ_ASSERT(false); + return NS_ERROR_FAILURE; + } + + VP8LOG(LogLevel::Debug, + "%p InitInternal(). width=%d, height=%d, kf_max_dist=%d", this, aWidth, + aHeight, aMaxKeyFrameDistance); + + // Encoder configuration structure. + vpx_codec_enc_cfg_t config; + nsresult rv = CreateEncoderConfig(aWidth, aHeight, mVideoBitrate, mTrackRate, + aMaxKeyFrameDistance, &config); + NS_ENSURE_SUCCESS(rv, NS_ERROR_FAILURE); + + vpx_codec_flags_t flags = 0; + flags |= VPX_CODEC_USE_OUTPUT_PARTITION; + if (vpx_codec_enc_init(&mVPXContext, vpx_codec_vp8_cx(), &config, flags)) { + return NS_ERROR_FAILURE; + } + + vpx_codec_control(&mVPXContext, VP8E_SET_STATIC_THRESHOLD, 1); + vpx_codec_control(&mVPXContext, VP8E_SET_CPUUSED, 15); + vpx_codec_control(&mVPXContext, VP8E_SET_TOKEN_PARTITIONS, + VP8_TWO_TOKENPARTITION); + + mFrameWidth = aWidth; + mFrameHeight = aHeight; + mMaxKeyFrameDistance = Some(aMaxKeyFrameDistance); + + return NS_OK; +} + +nsresult VP8TrackEncoder::Reconfigure(int32_t aWidth, int32_t aHeight, + int32_t aMaxKeyFrameDistance) { + if (aWidth <= 0 || aHeight <= 0) { + MOZ_ASSERT(false); + return NS_ERROR_FAILURE; + } + + if (!mInitialized) { + MOZ_ASSERT(false); + return NS_ERROR_FAILURE; + } + + bool needsReInit = aMaxKeyFrameDistance != *mMaxKeyFrameDistance; + + if (aWidth != mFrameWidth || aHeight != mFrameHeight) { + VP8LOG(LogLevel::Info, "Dynamic resolution change (%dx%d -> %dx%d).", + mFrameWidth, mFrameHeight, aWidth, aHeight); + const size_t neededSize = I420Size<I420_STRIDE_ALIGN>(aWidth, aHeight); + if (neededSize > mI420FrameSize) { + needsReInit = true; + mI420Frame.reset(new (fallible) uint8_t[neededSize]); + mI420FrameSize = mI420Frame ? neededSize : 0; + } + if (!mI420Frame) { + VP8LOG(LogLevel::Warning, "Allocating I420 frame of size %zu failed", + neededSize); + return NS_ERROR_FAILURE; + } + vpx_img_wrap(&mVPXImageWrapper, VPX_IMG_FMT_I420, aWidth, aHeight, + I420_STRIDE_ALIGN, mI420Frame.get()); + } + + if (needsReInit) { + Destroy(); + mMaxKeyFrameDistance = Some(aMaxKeyFrameDistance); + nsresult rv = InitInternal(aWidth, aHeight, aMaxKeyFrameDistance); + NS_ENSURE_SUCCESS(rv, NS_ERROR_FAILURE); + mInitialized = true; + return NS_OK; + } + + // Encoder configuration structure. + vpx_codec_enc_cfg_t config; + nsresult rv = CreateEncoderConfig(aWidth, aHeight, mVideoBitrate, mTrackRate, + aMaxKeyFrameDistance, &config); + NS_ENSURE_SUCCESS(rv, NS_ERROR_FAILURE); + // Set new configuration + if (vpx_codec_enc_config_set(&mVPXContext, &config) != VPX_CODEC_OK) { + VP8LOG(LogLevel::Error, "Failed to set new configuration"); + return NS_ERROR_FAILURE; + } + + mFrameWidth = aWidth; + mFrameHeight = aHeight; + + return NS_OK; +} + +already_AddRefed<TrackMetadataBase> VP8TrackEncoder::GetMetadata() { + AUTO_PROFILER_LABEL("VP8TrackEncoder::GetMetadata", OTHER); + + MOZ_ASSERT(mInitialized); + + if (!mInitialized) { + return nullptr; + } + + MOZ_ASSERT(mMetadata); + return do_AddRef(mMetadata); +} + +Result<RefPtr<EncodedFrame>, nsresult> VP8TrackEncoder::ExtractEncodedData() { + vpx_codec_iter_t iter = nullptr; + EncodedFrame::FrameType frameType = EncodedFrame::VP8_P_FRAME; + auto frameData = MakeRefPtr<EncodedFrame::FrameData>(); + const vpx_codec_cx_pkt_t* pkt = nullptr; + while ((pkt = vpx_codec_get_cx_data(&mVPXContext, &iter)) != nullptr) { + switch (pkt->kind) { + case VPX_CODEC_CX_FRAME_PKT: { + // Copy the encoded data from libvpx to frameData + frameData->AppendElements((uint8_t*)pkt->data.frame.buf, + pkt->data.frame.sz); + break; + } + default: { + break; + } + } + // End of frame + if ((pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT) == 0) { + if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { + frameType = EncodedFrame::VP8_I_FRAME; + } + break; + } + } + + if (frameData->IsEmpty()) { + return RefPtr<EncodedFrame>(nullptr); + } + + if (!pkt) { + // This check silences a coverity warning about accessing a null pkt below. + return RefPtr<EncodedFrame>(nullptr); + } + + if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { + // Update the since-last-keyframe counter, and account for this frame's + // time. + TrackTime frameTime = pkt->data.frame.pts; + DebugOnly<TrackTime> frameDuration = pkt->data.frame.duration; + MOZ_ASSERT(frameTime + frameDuration <= mEncodedTimestamp); + mDurationSinceLastKeyframe = + std::min(mDurationSinceLastKeyframe, mEncodedTimestamp - frameTime); + } + + // Convert the timestamp and duration to Usecs. + media::TimeUnit timestamp = media::TimeUnit(pkt->data.frame.pts, mTrackRate); + if (!timestamp.IsValid()) { + NS_ERROR("Microsecond timestamp overflow"); + return Err(NS_ERROR_DOM_MEDIA_OVERFLOW_ERR); + } + + mExtractedDuration += pkt->data.frame.duration; + if (!mExtractedDuration.isValid()) { + NS_ERROR("Duration overflow"); + return Err(NS_ERROR_DOM_MEDIA_OVERFLOW_ERR); + } + + media::TimeUnit totalDuration = + media::TimeUnit(mExtractedDuration.value(), mTrackRate); + if (!totalDuration.IsValid()) { + NS_ERROR("Duration overflow"); + return Err(NS_ERROR_DOM_MEDIA_OVERFLOW_ERR); + } + + media::TimeUnit duration = totalDuration - mExtractedDurationUs; + if (!duration.IsValid()) { + NS_ERROR("Duration overflow"); + return Err(NS_ERROR_DOM_MEDIA_OVERFLOW_ERR); + } + + mExtractedDurationUs = totalDuration; + + VP8LOG(LogLevel::Verbose, + "ExtractEncodedData TimeStamp %.2f, Duration %.2f, FrameType %d", + timestamp.ToSeconds(), duration.ToSeconds(), frameType); + + if (static_cast<int>(totalDuration.ToSeconds()) / + DYNAMIC_MAXKFDIST_CHECK_INTERVAL > + static_cast<int>(mLastKeyFrameDistanceUpdate.ToSeconds()) / + DYNAMIC_MAXKFDIST_CHECK_INTERVAL) { + // The interval has passed since the last keyframe update. Update again. + mLastKeyFrameDistanceUpdate = totalDuration; + const int32_t maxKfDistance = + CalculateMaxKeyFrameDistance().valueOr(*mMaxKeyFrameDistance); + const float diffFactor = + static_cast<float>(maxKfDistance) / *mMaxKeyFrameDistance; + VP8LOG(LogLevel::Debug, "maxKfDistance: %d, factor: %.2f", maxKfDistance, + diffFactor); + if (std::abs(1.0 - diffFactor) > DYNAMIC_MAXKFDIST_DIFFACTOR) { + SetMaxKeyFrameDistance(maxKfDistance); + } + } + + return MakeRefPtr<EncodedFrame>(timestamp, duration.ToMicroseconds(), + PR_USEC_PER_SEC, frameType, + std::move(frameData)); +} + +/** + * Encoding flow in Encode(): + * 1: Assert valid state. + * 2: Encode the video chunks in mSourceSegment in a for-loop. + * 2.1: The duration is taken straight from the video chunk's duration. + * 2.2: Setup the video chunk with mVPXImageWrapper by PrepareRawFrame(). + * 2.3: Pass frame to vp8 encoder by vpx_codec_encode(). + * 2.4: Extract the encoded frame from encoder by ExtractEncodedData(). + * 2.5: Set the nextEncodeOperation for the next frame. + * 2.6: If we are not skipping the next frame, add the encoded frame to + * mEncodedDataQueue. If we are skipping the next frame, extend the encoded + * frame's duration in the next run of the loop. + * 3. Clear aSegment. + */ +nsresult VP8TrackEncoder::Encode(VideoSegment* aSegment) { + MOZ_ASSERT(mInitialized); + MOZ_ASSERT(!IsEncodingComplete()); + + AUTO_PROFILER_LABEL("VP8TrackEncoder::Encode", OTHER); + + EncodeOperation nextEncodeOperation = ENCODE_NORMAL_FRAME; + + RefPtr<EncodedFrame> encodedFrame; + for (VideoSegment::ChunkIterator iter(*aSegment); !iter.IsEnded(); + iter.Next()) { + VideoChunk& chunk = *iter; + + VP8LOG(LogLevel::Verbose, + "nextEncodeOperation is %d for frame of duration %" PRId64, + nextEncodeOperation, chunk.GetDuration()); + + TimeStamp timebase = TimeStamp::Now(); + + // Encode frame. + if (nextEncodeOperation != SKIP_FRAME) { + MOZ_ASSERT(!encodedFrame); + nsresult rv = PrepareRawFrame(chunk); + NS_ENSURE_SUCCESS(rv, NS_ERROR_FAILURE); + + // Encode the data with VP8 encoder + int flags = 0; + if (nextEncodeOperation == ENCODE_I_FRAME) { + VP8LOG(LogLevel::Warning, + "MediaRecorder lagging behind. Encoding keyframe."); + flags |= VPX_EFLAG_FORCE_KF; + } + + // Sum duration of non-key frames and force keyframe if exceeded the + // given keyframe interval + if (mKeyFrameInterval > TimeDuration::FromSeconds(0)) { + if (media::TimeUnit(mDurationSinceLastKeyframe, mTrackRate) + .ToTimeDuration() >= mKeyFrameInterval) { + VP8LOG(LogLevel::Warning, + "Reached mKeyFrameInterval without seeing a keyframe. Forcing " + "one. time: %.2f, interval: %.2f", + media::TimeUnit(mDurationSinceLastKeyframe, mTrackRate) + .ToSeconds(), + mKeyFrameInterval.ToSeconds()); + mDurationSinceLastKeyframe = 0; + flags |= VPX_EFLAG_FORCE_KF; + } + mDurationSinceLastKeyframe += chunk.GetDuration(); + } + + if (vpx_codec_encode(&mVPXContext, &mVPXImageWrapper, mEncodedTimestamp, + (unsigned long)chunk.GetDuration(), flags, + VPX_DL_REALTIME)) { + VP8LOG(LogLevel::Error, "vpx_codec_encode failed to encode the frame."); + return NS_ERROR_FAILURE; + } + + // Move forward the mEncodedTimestamp. + mEncodedTimestamp += chunk.GetDuration(); + + // Extract the encoded data from the underlying encoder and push it to + // mEncodedDataQueue. + auto result = ExtractEncodedData(); + if (result.isErr()) { + VP8LOG(LogLevel::Error, "ExtractEncodedData failed."); + return NS_ERROR_FAILURE; + } + + MOZ_ASSERT(result.inspect(), + "We expected a frame here. EOS is handled explicitly later"); + encodedFrame = result.unwrap(); + } else { + // SKIP_FRAME + + MOZ_DIAGNOSTIC_ASSERT(encodedFrame); + + if (mKeyFrameInterval > TimeDuration::FromSeconds(0)) { + mDurationSinceLastKeyframe += chunk.GetDuration(); + } + + // Move forward the mEncodedTimestamp. + mEncodedTimestamp += chunk.GetDuration(); + + // Extend the duration of the last encoded frame in mEncodedDataQueue + // because this frame will be skipped. + VP8LOG(LogLevel::Warning, + "MediaRecorder lagging behind. Skipping a frame."); + + mExtractedDuration += chunk.mDuration; + if (!mExtractedDuration.isValid()) { + NS_ERROR("skipped duration overflow"); + return NS_ERROR_DOM_MEDIA_OVERFLOW_ERR; + } + + media::TimeUnit totalDuration = + media::TimeUnit(mExtractedDuration.value(), mTrackRate); + media::TimeUnit skippedDuration = totalDuration - mExtractedDurationUs; + mExtractedDurationUs = totalDuration; + if (!skippedDuration.IsValid()) { + NS_ERROR("skipped duration overflow"); + return NS_ERROR_DOM_MEDIA_OVERFLOW_ERR; + } + + encodedFrame = MakeRefPtr<EncodedFrame>( + encodedFrame->mTime, + encodedFrame->mDuration + skippedDuration.ToMicroseconds(), + encodedFrame->mDurationBase, encodedFrame->mFrameType, + encodedFrame->mFrameData); + } + + mMeanFrameEncodeDuration.insert(TimeStamp::Now() - timebase); + mMeanFrameDuration.insert( + media::TimeUnit(chunk.GetDuration(), mTrackRate).ToTimeDuration()); + nextEncodeOperation = GetNextEncodeOperation( + mMeanFrameEncodeDuration.mean(), mMeanFrameDuration.mean()); + + if (nextEncodeOperation != SKIP_FRAME) { + // Note that the next operation might be SKIP_FRAME even if there is no + // next frame. + mEncodedDataQueue.Push(encodedFrame.forget()); + } + } + + if (encodedFrame) { + // Push now if we ended on a SKIP_FRAME before. + mEncodedDataQueue.Push(encodedFrame.forget()); + } + + // Remove the chunks we have processed. + aSegment->Clear(); + + if (mEndOfStream) { + // EOS: Extract the remaining frames from the underlying encoder. + VP8LOG(LogLevel::Debug, "mEndOfStream is true"); + // No more frames will be encoded. Clearing temporary frames saves some + // memory. + if (mI420Frame) { + mI420Frame = nullptr; + mI420FrameSize = 0; + } + // mMuteFrame must be released before gfx shutdown. We do it now since it + // may be too late when this VP8TrackEncoder gets destroyed. + mMuteFrame = nullptr; + // Bug 1243611, keep calling vpx_codec_encode and vpx_codec_get_cx_data + // until vpx_codec_get_cx_data return null. + while (true) { + if (vpx_codec_encode(&mVPXContext, nullptr, mEncodedTimestamp, 0, 0, + VPX_DL_REALTIME)) { + return NS_ERROR_FAILURE; + } + auto result = ExtractEncodedData(); + if (result.isErr()) { + return NS_ERROR_FAILURE; + } + if (!result.inspect()) { + // Null means end-of-stream. + break; + } + mEncodedDataQueue.Push(result.unwrap().forget()); + } + mEncodedDataQueue.Finish(); + } + + return NS_OK; +} + +nsresult VP8TrackEncoder::PrepareRawFrame(VideoChunk& aChunk) { + gfx::IntSize intrinsicSize = aChunk.mFrame.GetIntrinsicSize(); + RefPtr<Image> img; + if (aChunk.mFrame.GetForceBlack() || aChunk.IsNull()) { + if (!mMuteFrame || mMuteFrame->GetSize() != intrinsicSize) { + mMuteFrame = mozilla::VideoFrame::CreateBlackImage(intrinsicSize); + } + if (!mMuteFrame) { + VP8LOG(LogLevel::Warning, "Failed to allocate black image of size %dx%d", + intrinsicSize.width, intrinsicSize.height); + return NS_OK; + } + img = mMuteFrame; + } else { + img = aChunk.mFrame.GetImage(); + } + + gfx::IntSize imgSize = img->GetSize(); + if (imgSize != IntSize(mFrameWidth, mFrameHeight)) { + nsresult rv = + Reconfigure(imgSize.width, imgSize.height, *mMaxKeyFrameDistance); + NS_ENSURE_SUCCESS(rv, rv); + } + + MOZ_ASSERT(mFrameWidth == imgSize.width); + MOZ_ASSERT(mFrameHeight == imgSize.height); + + nsresult rv = ConvertToI420(img, mVPXImageWrapper.planes[VPX_PLANE_Y], + mVPXImageWrapper.stride[VPX_PLANE_Y], + mVPXImageWrapper.planes[VPX_PLANE_U], + mVPXImageWrapper.stride[VPX_PLANE_U], + mVPXImageWrapper.planes[VPX_PLANE_V], + mVPXImageWrapper.stride[VPX_PLANE_V]); + if (NS_FAILED(rv)) { + VP8LOG(LogLevel::Error, "Converting to I420 failed"); + return rv; + } + + return NS_OK; +} + +// These two define value used in GetNextEncodeOperation to determine the +// EncodeOperation for next target frame. +#define I_FRAME_RATIO (0.85) // Effectively disabled, because perceived quality +#define SKIP_FRAME_RATIO (0.85) + +/** + * Compares the elapsed time from the beginning of GetEncodedTrack and + * the processed frame duration in mSourceSegment + * in order to set the nextEncodeOperation for next target frame. + */ +VP8TrackEncoder::EncodeOperation VP8TrackEncoder::GetNextEncodeOperation( + TimeDuration aTimeElapsed, TimeDuration aProcessedDuration) { + if (mFrameDroppingMode == FrameDroppingMode::DISALLOW) { + return ENCODE_NORMAL_FRAME; + } + + if (aTimeElapsed.ToSeconds() > + aProcessedDuration.ToSeconds() * SKIP_FRAME_RATIO) { + // The encoder is too slow. + // We should skip next frame to consume the mSourceSegment. + return SKIP_FRAME; + } + + if (aTimeElapsed.ToSeconds() > + aProcessedDuration.ToSeconds() * I_FRAME_RATIO) { + // The encoder is a little slow. + // We force the encoder to encode an I-frame to accelerate. + return ENCODE_I_FRAME; + } + + return ENCODE_NORMAL_FRAME; +} + +} // namespace mozilla + +#undef VP8LOG diff --git a/dom/media/encoder/VP8TrackEncoder.h b/dom/media/encoder/VP8TrackEncoder.h new file mode 100644 index 0000000000..c0e0d3a929 --- /dev/null +++ b/dom/media/encoder/VP8TrackEncoder.h @@ -0,0 +1,167 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef VP8TrackEncoder_h_ +#define VP8TrackEncoder_h_ + +#include "TrackEncoder.h" + +#include "mozilla/RollingMean.h" +#include "TimeUnits.h" +#include "vpx/vpx_codec.h" + +namespace mozilla { + +typedef struct vpx_codec_ctx vpx_codec_ctx_t; +typedef struct vpx_codec_enc_cfg vpx_codec_enc_cfg_t; +typedef struct vpx_image vpx_image_t; + +class VP8Metadata; + +/** + * VP8TrackEncoder implements VideoTrackEncoder by using the libvpx library. + * We implement a realtime and variable frame rate encoder. In order to achieve + * that, there is a frame-drop encoding policy implemented in Encode(). + */ +class VP8TrackEncoder : public VideoTrackEncoder { + enum EncodeOperation { + ENCODE_NORMAL_FRAME, // VP8 track encoder works normally. + ENCODE_I_FRAME, // The next frame will be encoded as I-Frame. + SKIP_FRAME, // Skip the next frame. + }; + + public: + VP8TrackEncoder(RefPtr<DriftCompensator> aDriftCompensator, + TrackRate aTrackRate, + MediaQueue<EncodedFrame>& aEncodedDataQueue, + FrameDroppingMode aFrameDroppingMode, + Maybe<float> aKeyFrameIntervalFactor = Nothing()); + virtual ~VP8TrackEncoder(); + + already_AddRefed<TrackMetadataBase> GetMetadata() final; + + protected: + nsresult Init(int32_t aWidth, int32_t aHeight, int32_t aDisplayWidth, + int32_t aDisplayHeight, float aEstimatedFrameRate) final; + + private: + // Initiates the underlying vpx encoder. + nsresult InitInternal(int32_t aWidth, int32_t aHeight, + int32_t aMaxKeyFrameDistance); + + // Get the EncodeOperation for next target frame. + EncodeOperation GetNextEncodeOperation(TimeDuration aTimeElapsed, + TimeDuration aProcessedDuration); + + // Extracts the encoded data from the underlying encoder and returns it. + // Return value: An EncodedFrame if a frame was extracted. + // nullptr if we reached end-of-stream or nothing was available + // from the underlying encoder. + // An error nsresult otherwise. + Result<RefPtr<EncodedFrame>, nsresult> ExtractEncodedData(); + + // Takes the data in aSegment, encodes it, extracts it, and pushes it to + // mEncodedDataQueue. + nsresult Encode(VideoSegment* aSegment) final; + + // Prepare the input data to the mVPXImageWrapper for encoding. + nsresult PrepareRawFrame(VideoChunk& aChunk); + + // Re-configures an existing encoder with a new frame size. + nsresult Reconfigure(int32_t aWidth, int32_t aHeight, + int32_t aMaxKeyFrameDistance); + + // Destroys the context and image wrapper. Does not de-allocate the structs. + void Destroy(); + + // Helper that calculates the desired max keyframe distance (vp8 config's + // max_kf_dist) based on configured key frame interval and recent framerate. + // Returns Nothing if not enough input data is available. + Maybe<int32_t> CalculateMaxKeyFrameDistance( + Maybe<float> aEstimatedFrameRate = Nothing()) const; + + void SetMaxKeyFrameDistance(int32_t aMaxKeyFrameDistance); + + // VP8 Metadata, set on successfuly Init and never modified again. + RefPtr<VP8Metadata> mMetadata; + + // The width the encoder is currently configured with. The input frames to the + // underlying encoder must match this width, i.e., the underlying encoder will + // not do any resampling. + int mFrameWidth = 0; + + // The height the encoder is currently configured with. The input frames to + // the underlying encoder must match this height, i.e., the underlying encoder + // will not do any resampling. + int mFrameHeight = 0; + + // Encoded timestamp. + TrackTime mEncodedTimestamp = 0; + + // Total duration in mTrackRate extracted from the underlying encoder. + CheckedInt64 mExtractedDuration; + + // Total duration extracted from the underlying encoder. + media::TimeUnit mExtractedDurationUs; + + // Muted frame, we only create it once. + RefPtr<layers::Image> mMuteFrame; + + // I420 frame, for converting to I420. + UniquePtr<uint8_t[]> mI420Frame; + size_t mI420FrameSize = 0; + + /** + * A duration of non-key frames in mTrackRate. + */ + TrackTime mDurationSinceLastKeyframe = 0; + + /** + * The max interval at which a keyframe gets forced (causing video quality + * degradation). The encoder is configured to encode keyframes more often than + * this, though it can vary based on frame rate. + */ + const TimeDuration mKeyFrameInterval; + + /** + * A factor used to multiply the estimated key-frame-interval based on + * mKeyFrameInterval (ms) with when configuring kf_max_dist in the encoder. + * The goal is to set it a bit below 1.0 to avoid falling back to forcing + * keyframes. + * NB that for purposes of testing the mKeyFrameInterval fallback this may be + * set to values higher than 1.0. + */ + float mKeyFrameIntervalFactor; + + /** + * Time when we last updated the key-frame-distance. + */ + media::TimeUnit mLastKeyFrameDistanceUpdate; + + /** + * The frame duration value last used to configure kf_max_dist. + */ + Maybe<int32_t> mMaxKeyFrameDistance; + + /** + * The mean duration of recent frames. + */ + RollingMean<TimeDuration, TimeDuration> mMeanFrameDuration{30}; + + /** + * The mean wall-clock time it took to encode recent frames. + */ + RollingMean<TimeDuration, TimeDuration> mMeanFrameEncodeDuration{30}; + + // VP8 relative members. + // Codec context structure. + vpx_codec_ctx_t mVPXContext; + // Image Descriptor. + vpx_image_t mVPXImageWrapper; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/encoder/moz.build b/dom/media/encoder/moz.build new file mode 100644 index 0000000000..f995ecdc1c --- /dev/null +++ b/dom/media/encoder/moz.build @@ -0,0 +1,42 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +with Files("*"): + BUG_COMPONENT = ("Core", "Audio/Video: Recording") + +EXPORTS += [ + "ContainerWriter.h", + "EncodedFrame.h", + "MediaEncoder.h", + "OpusTrackEncoder.h", + "TrackEncoder.h", + "TrackMetadataBase.h", + "VP8TrackEncoder.h", +] + +UNIFIED_SOURCES += [ + "MediaEncoder.cpp", + "Muxer.cpp", + "OpusTrackEncoder.cpp", + "TrackEncoder.cpp", + "VP8TrackEncoder.cpp", +] + +FINAL_LIBRARY = "xul" + +LOCAL_INCLUDES += [ + "/dom/media", + "/ipc/chromium/src", + "/media/libyuv/libyuv/include", +] + +include("/ipc/chromium/chromium-config.mozbuild") + +# Suppress some GCC warnings being treated as errors: +# - about attributes on forward declarations for types that are already +# defined, which complains about an important MOZ_EXPORT for android::AString +if CONFIG["CC_TYPE"] in ("clang", "gcc"): + CXXFLAGS += ["-Wno-error=attributes"] |