summaryrefslogtreecommitdiffstats
path: root/dom/media/encoder
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 17:32:43 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 17:32:43 +0000
commit6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch)
treea68f146d7fa01f0134297619fbe7e33db084e0aa /dom/media/encoder
parentInitial commit. (diff)
downloadthunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz
thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'dom/media/encoder')
-rw-r--r--dom/media/encoder/ContainerWriter.h75
-rw-r--r--dom/media/encoder/EncodedFrame.h64
-rw-r--r--dom/media/encoder/MediaEncoder.cpp1142
-rw-r--r--dom/media/encoder/MediaEncoder.h400
-rw-r--r--dom/media/encoder/Muxer.cpp185
-rw-r--r--dom/media/encoder/Muxer.h71
-rw-r--r--dom/media/encoder/OpusTrackEncoder.cpp454
-rw-r--r--dom/media/encoder/OpusTrackEncoder.h117
-rw-r--r--dom/media/encoder/TrackEncoder.cpp822
-rw-r--r--dom/media/encoder/TrackEncoder.h501
-rw-r--r--dom/media/encoder/TrackMetadataBase.h76
-rw-r--r--dom/media/encoder/VP8TrackEncoder.cpp720
-rw-r--r--dom/media/encoder/VP8TrackEncoder.h167
-rw-r--r--dom/media/encoder/moz.build42
14 files changed, 4836 insertions, 0 deletions
diff --git a/dom/media/encoder/ContainerWriter.h b/dom/media/encoder/ContainerWriter.h
new file mode 100644
index 0000000000..724c8b90c9
--- /dev/null
+++ b/dom/media/encoder/ContainerWriter.h
@@ -0,0 +1,75 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ContainerWriter_h_
+#define ContainerWriter_h_
+
+#include "nsTArray.h"
+#include "EncodedFrame.h"
+#include "TrackMetadataBase.h"
+
+namespace mozilla {
+/**
+ * ContainerWriter packs encoded track data into a specific media container.
+ */
+class ContainerWriter {
+ public:
+ ContainerWriter() : mInitialized(false), mIsWritingComplete(false) {}
+ virtual ~ContainerWriter() {}
+ // Mapping to DOMMediaStream::TrackTypeHints
+ enum {
+ CREATE_AUDIO_TRACK = 1 << 0,
+ CREATE_VIDEO_TRACK = 1 << 1,
+ };
+ enum { END_OF_STREAM = 1 << 0 };
+
+ /**
+ * Writes encoded track data from aData into the internal stream of container
+ * writer. aFlags is used to signal the impl of different conditions
+ * such as END_OF_STREAM. Each impl may handle different flags, and should be
+ * documented accordingly. Currently, WriteEncodedTrack doesn't support
+ * explicit track specification, though each impl may provide logic to
+ * allocate frames into different tracks.
+ */
+ virtual nsresult WriteEncodedTrack(
+ const nsTArray<RefPtr<EncodedFrame>>& aData, uint32_t aFlags = 0) = 0;
+
+ /**
+ * Stores the metadata for all given tracks to the muxer.
+ *
+ * This method checks the integrity of aMetadata.
+ * If the metadata isn't well formatted, this method returns NS_ERROR_FAILURE.
+ * If the metadata is well formatted, it stores the metadata and returns
+ * NS_OK.
+ */
+ virtual nsresult SetMetadata(
+ const nsTArray<RefPtr<TrackMetadataBase>>& aMetadata) = 0;
+
+ /**
+ * Indicate if the writer has finished to output data
+ */
+ virtual bool IsWritingComplete() { return mIsWritingComplete; }
+
+ enum { FLUSH_NEEDED = 1 << 0, GET_HEADER = 1 << 1 };
+
+ /**
+ * Copies the final container data to a buffer if it has accumulated enough
+ * packets from WriteEncodedTrack. This buffer of data is appended to
+ * aOutputBufs, and existing elements of aOutputBufs should not be modified.
+ * aFlags is true with FLUSH_NEEDED will force OggWriter to flush an ogg page
+ * even it is not full, and copy these container data to a buffer for
+ * aOutputBufs to append.
+ */
+ virtual nsresult GetContainerData(nsTArray<nsTArray<uint8_t>>* aOutputBufs,
+ uint32_t aFlags = 0) = 0;
+
+ protected:
+ bool mInitialized;
+ bool mIsWritingComplete;
+};
+
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/encoder/EncodedFrame.h b/dom/media/encoder/EncodedFrame.h
new file mode 100644
index 0000000000..e76babef89
--- /dev/null
+++ b/dom/media/encoder/EncodedFrame.h
@@ -0,0 +1,64 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef EncodedFrame_h_
+#define EncodedFrame_h_
+
+#include "nsISupportsImpl.h"
+#include "mozilla/media/MediaUtils.h"
+#include "TimeUnits.h"
+#include "VideoUtils.h"
+
+namespace mozilla {
+
+// Represent an encoded frame emitted by an encoder
+class EncodedFrame final {
+ NS_INLINE_DECL_THREADSAFE_REFCOUNTING(EncodedFrame)
+ public:
+ enum FrameType {
+ VP8_I_FRAME, // VP8 intraframe
+ VP8_P_FRAME, // VP8 predicted frame
+ OPUS_AUDIO_FRAME, // Opus audio frame
+ UNKNOWN // FrameType not set
+ };
+ using ConstFrameData = const media::Refcountable<nsTArray<uint8_t>>;
+ using FrameData = media::Refcountable<nsTArray<uint8_t>>;
+ EncodedFrame(const media::TimeUnit& aTime, uint64_t aDuration,
+ uint64_t aDurationBase, FrameType aFrameType,
+ RefPtr<ConstFrameData> aData)
+ : mTime(aTime),
+ mDuration(aDuration),
+ mDurationBase(aDurationBase),
+ mFrameType(aFrameType),
+ mFrameData(std::move(aData)) {
+ MOZ_ASSERT(mFrameData);
+ MOZ_ASSERT_IF(mFrameType == VP8_I_FRAME, mDurationBase == PR_USEC_PER_SEC);
+ MOZ_ASSERT_IF(mFrameType == VP8_P_FRAME, mDurationBase == PR_USEC_PER_SEC);
+ MOZ_ASSERT_IF(mFrameType == OPUS_AUDIO_FRAME, mDurationBase == 48000);
+ }
+ // Timestamp in microseconds
+ const media::TimeUnit mTime;
+ // The playback duration of this packet in mDurationBase.
+ const uint64_t mDuration;
+ // The time base of mDuration.
+ const uint64_t mDurationBase;
+ // Represent what is in the FrameData
+ const FrameType mFrameType;
+ // Encoded data
+ const RefPtr<ConstFrameData> mFrameData;
+
+ // The end time of the frame in microseconds.
+ media::TimeUnit GetEndTime() const {
+ return mTime + media::TimeUnit(mDuration, mDurationBase);
+ }
+
+ private:
+ // Private destructor, to discourage deletion outside of Release():
+ ~EncodedFrame() = default;
+};
+
+} // namespace mozilla
+
+#endif // EncodedFrame_h_
diff --git a/dom/media/encoder/MediaEncoder.cpp b/dom/media/encoder/MediaEncoder.cpp
new file mode 100644
index 0000000000..4eca742c77
--- /dev/null
+++ b/dom/media/encoder/MediaEncoder.cpp
@@ -0,0 +1,1142 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "MediaEncoder.h"
+
+#include <algorithm>
+#include "AudioNodeEngine.h"
+#include "AudioNodeTrack.h"
+#include "DriftCompensation.h"
+#include "MediaDecoder.h"
+#include "MediaTrackGraphImpl.h"
+#include "MediaTrackListener.h"
+#include "mozilla/dom/AudioNode.h"
+#include "mozilla/dom/AudioStreamTrack.h"
+#include "mozilla/dom/Blob.h"
+#include "mozilla/dom/BlobImpl.h"
+#include "mozilla/dom/MediaStreamTrack.h"
+#include "mozilla/dom/MutableBlobStorage.h"
+#include "mozilla/dom/VideoStreamTrack.h"
+#include "mozilla/gfx/Point.h" // IntSize
+#include "mozilla/Logging.h"
+#include "mozilla/Preferences.h"
+#include "mozilla/ProfilerLabels.h"
+#include "mozilla/StaticPrefs_media.h"
+#include "mozilla/StaticPtr.h"
+#include "mozilla/TaskQueue.h"
+#include "mozilla/Unused.h"
+#include "Muxer.h"
+#include "nsMimeTypes.h"
+#include "nsThreadUtils.h"
+#include "OggWriter.h"
+#include "OpusTrackEncoder.h"
+#include "TimeUnits.h"
+#include "Tracing.h"
+
+#include "VP8TrackEncoder.h"
+#include "WebMWriter.h"
+
+mozilla::LazyLogModule gMediaEncoderLog("MediaEncoder");
+#define LOG(type, msg) MOZ_LOG(gMediaEncoderLog, type, msg)
+
+namespace mozilla {
+
+using namespace dom;
+using namespace media;
+
+namespace {
+class BlobStorer : public MutableBlobStorageCallback {
+ MozPromiseHolder<MediaEncoder::BlobPromise> mHolder;
+
+ virtual ~BlobStorer() = default;
+
+ public:
+ BlobStorer() = default;
+
+ NS_INLINE_DECL_THREADSAFE_REFCOUNTING(BlobStorer, override)
+
+ void BlobStoreCompleted(MutableBlobStorage*, BlobImpl* aBlobImpl,
+ nsresult aRv) override {
+ MOZ_ASSERT(NS_IsMainThread());
+ if (NS_FAILED(aRv)) {
+ mHolder.Reject(aRv, __func__);
+ return;
+ }
+
+ mHolder.Resolve(aBlobImpl, __func__);
+ }
+
+ RefPtr<MediaEncoder::BlobPromise> Promise() {
+ return mHolder.Ensure(__func__);
+ }
+};
+} // namespace
+
+class MediaEncoder::AudioTrackListener : public DirectMediaTrackListener {
+ public:
+ AudioTrackListener(RefPtr<DriftCompensator> aDriftCompensator,
+ RefPtr<MediaEncoder> aMediaEncoder)
+ : mDirectConnected(false),
+ mInitialized(false),
+ mRemoved(false),
+ mDriftCompensator(std::move(aDriftCompensator)),
+ mMediaEncoder(std::move(aMediaEncoder)),
+ mEncoderThread(mMediaEncoder->mEncoderThread),
+ mShutdownPromise(mShutdownHolder.Ensure(__func__)) {
+ MOZ_ASSERT(mMediaEncoder);
+ MOZ_ASSERT(mMediaEncoder->mAudioEncoder);
+ MOZ_ASSERT(mEncoderThread);
+ }
+
+ void NotifyDirectListenerInstalled(InstallationResult aResult) override {
+ if (aResult == InstallationResult::SUCCESS) {
+ LOG(LogLevel::Info, ("Audio track direct listener installed"));
+ mDirectConnected = true;
+ } else {
+ LOG(LogLevel::Info, ("Audio track failed to install direct listener"));
+ MOZ_ASSERT(!mDirectConnected);
+ }
+ }
+
+ void NotifyDirectListenerUninstalled() override {
+ mDirectConnected = false;
+
+ if (mRemoved) {
+ mMediaEncoder = nullptr;
+ mEncoderThread = nullptr;
+ }
+ }
+
+ void NotifyQueuedChanges(MediaTrackGraph* aGraph, TrackTime aTrackOffset,
+ const MediaSegment& aQueuedMedia) override {
+ TRACE_COMMENT("MediaEncoder::NotifyQueuedChanges", "%p",
+ mMediaEncoder.get());
+ MOZ_ASSERT(mMediaEncoder);
+ MOZ_ASSERT(mEncoderThread);
+
+ if (!mInitialized) {
+ mDriftCompensator->NotifyAudioStart(TimeStamp::Now());
+ mInitialized = true;
+ }
+
+ mDriftCompensator->NotifyAudio(aQueuedMedia.GetDuration());
+
+ const AudioSegment& audio = static_cast<const AudioSegment&>(aQueuedMedia);
+
+ AudioSegment copy;
+ copy.AppendSlice(audio, 0, audio.GetDuration());
+
+ nsresult rv = mEncoderThread->Dispatch(NS_NewRunnableFunction(
+ "mozilla::AudioTrackEncoder::AppendAudioSegment",
+ [encoder = mMediaEncoder, copy = std::move(copy)]() mutable {
+ encoder->mAudioEncoder->AppendAudioSegment(std::move(copy));
+ }));
+ MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+ Unused << rv;
+ }
+
+ void NotifyEnded(MediaTrackGraph* aGraph) override {
+ MOZ_ASSERT(mMediaEncoder);
+ MOZ_ASSERT(mMediaEncoder->mAudioEncoder);
+ MOZ_ASSERT(mEncoderThread);
+
+ nsresult rv = mEncoderThread->Dispatch(
+ NS_NewRunnableFunction("mozilla::AudioTrackEncoder::NotifyEndOfStream",
+ [encoder = mMediaEncoder] {
+ encoder->mAudioEncoder->NotifyEndOfStream();
+ }));
+ MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+ Unused << rv;
+ }
+
+ void NotifyRemoved(MediaTrackGraph* aGraph) override {
+ nsresult rv = mEncoderThread->Dispatch(
+ NS_NewRunnableFunction("mozilla::AudioTrackEncoder::NotifyEndOfStream",
+ [encoder = mMediaEncoder] {
+ encoder->mAudioEncoder->NotifyEndOfStream();
+ }));
+ MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+ Unused << rv;
+
+ mRemoved = true;
+
+ if (!mDirectConnected) {
+ mMediaEncoder = nullptr;
+ mEncoderThread = nullptr;
+ }
+
+ mShutdownHolder.Resolve(true, __func__);
+ }
+
+ const RefPtr<GenericNonExclusivePromise>& OnShutdown() const {
+ return mShutdownPromise;
+ }
+
+ private:
+ bool mDirectConnected;
+ bool mInitialized;
+ bool mRemoved;
+ const RefPtr<DriftCompensator> mDriftCompensator;
+ RefPtr<MediaEncoder> mMediaEncoder;
+ RefPtr<TaskQueue> mEncoderThread;
+ MozPromiseHolder<GenericNonExclusivePromise> mShutdownHolder;
+ const RefPtr<GenericNonExclusivePromise> mShutdownPromise;
+};
+
+class MediaEncoder::VideoTrackListener : public DirectMediaTrackListener {
+ public:
+ explicit VideoTrackListener(RefPtr<MediaEncoder> aMediaEncoder)
+ : mDirectConnected(false),
+ mInitialized(false),
+ mRemoved(false),
+ mPendingAdvanceCurrentTime(false),
+ mMediaEncoder(std::move(aMediaEncoder)),
+ mEncoderThread(mMediaEncoder->mEncoderThread),
+ mShutdownPromise(mShutdownHolder.Ensure(__func__)) {
+ MOZ_ASSERT(mMediaEncoder);
+ MOZ_ASSERT(mEncoderThread);
+ }
+
+ void NotifyDirectListenerInstalled(InstallationResult aResult) override {
+ if (aResult == InstallationResult::SUCCESS) {
+ LOG(LogLevel::Info, ("Video track direct listener installed"));
+ mDirectConnected = true;
+ } else {
+ LOG(LogLevel::Info, ("Video track failed to install direct listener"));
+ MOZ_ASSERT(!mDirectConnected);
+ return;
+ }
+ }
+
+ void NotifyDirectListenerUninstalled() override {
+ mDirectConnected = false;
+
+ if (mRemoved) {
+ mMediaEncoder = nullptr;
+ mEncoderThread = nullptr;
+ }
+ }
+
+ void NotifyQueuedChanges(MediaTrackGraph* aGraph, TrackTime aTrackOffset,
+ const MediaSegment& aQueuedMedia) override {
+ TRACE_COMMENT("MediaEncoder::NotifyQueuedChanges", "%p",
+ mMediaEncoder.get());
+ MOZ_ASSERT(mMediaEncoder);
+ MOZ_ASSERT(mMediaEncoder->mVideoEncoder);
+ MOZ_ASSERT(mEncoderThread);
+
+ mCurrentTime = TimeStamp::Now();
+ if (!mInitialized) {
+ nsresult rv = mEncoderThread->Dispatch(
+ NS_NewRunnableFunction("mozilla::VideoTrackEncoder::SetStartOffset",
+ [encoder = mMediaEncoder, now = mCurrentTime] {
+ encoder->mVideoEncoder->SetStartOffset(now);
+ }));
+ MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+ Unused << rv;
+ mInitialized = true;
+ }
+
+ if (!mPendingAdvanceCurrentTime) {
+ mPendingAdvanceCurrentTime = true;
+ nsresult rv = mEncoderThread->Dispatch(NS_NewRunnableFunction(
+ "mozilla::VideoTrackEncoder::AdvanceCurrentTime",
+ [encoder = mMediaEncoder, now = mCurrentTime] {
+ encoder->mVideoListener->mPendingAdvanceCurrentTime = false;
+ encoder->mVideoEncoder->AdvanceCurrentTime(now);
+ }));
+ MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+ Unused << rv;
+ }
+ }
+
+ void NotifyRealtimeTrackData(MediaTrackGraph* aGraph, TrackTime aTrackOffset,
+ const MediaSegment& aMedia) override {
+ TRACE_COMMENT("MediaEncoder::NotifyRealtimeTrackData", "%p",
+ mMediaEncoder.get());
+ MOZ_ASSERT(mMediaEncoder);
+ MOZ_ASSERT(mMediaEncoder->mVideoEncoder);
+ MOZ_ASSERT(mEncoderThread);
+ MOZ_ASSERT(aMedia.GetType() == MediaSegment::VIDEO);
+
+ const VideoSegment& video = static_cast<const VideoSegment&>(aMedia);
+ VideoSegment copy;
+ for (VideoSegment::ConstChunkIterator iter(video); !iter.IsEnded();
+ iter.Next()) {
+ copy.AppendFrame(do_AddRef(iter->mFrame.GetImage()),
+ iter->mFrame.GetIntrinsicSize(),
+ iter->mFrame.GetPrincipalHandle(),
+ iter->mFrame.GetForceBlack(), iter->mTimeStamp);
+ }
+
+ nsresult rv = mEncoderThread->Dispatch(NS_NewRunnableFunction(
+ "mozilla::VideoTrackEncoder::AppendVideoSegment",
+ [encoder = mMediaEncoder, copy = std::move(copy)]() mutable {
+ encoder->mVideoEncoder->AppendVideoSegment(std::move(copy));
+ }));
+ MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+ Unused << rv;
+ }
+
+ void NotifyEnabledStateChanged(MediaTrackGraph* aGraph,
+ bool aEnabled) override {
+ MOZ_ASSERT(mMediaEncoder);
+ MOZ_ASSERT(mMediaEncoder->mVideoEncoder);
+ MOZ_ASSERT(mEncoderThread);
+
+ nsresult rv;
+ if (aEnabled) {
+ rv = mEncoderThread->Dispatch(NS_NewRunnableFunction(
+ "mozilla::VideoTrackEncoder::Enable",
+ [encoder = mMediaEncoder, now = TimeStamp::Now()] {
+ encoder->mVideoEncoder->Enable(now);
+ }));
+ } else {
+ rv = mEncoderThread->Dispatch(NS_NewRunnableFunction(
+ "mozilla::VideoTrackEncoder::Disable",
+ [encoder = mMediaEncoder, now = TimeStamp::Now()] {
+ encoder->mVideoEncoder->Disable(now);
+ }));
+ }
+ MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+ Unused << rv;
+ }
+
+ void NotifyEnded(MediaTrackGraph* aGraph) override {
+ MOZ_ASSERT(mMediaEncoder);
+ MOZ_ASSERT(mMediaEncoder->mVideoEncoder);
+ MOZ_ASSERT(mEncoderThread);
+
+ nsresult rv = mEncoderThread->Dispatch(NS_NewRunnableFunction(
+ "mozilla::VideoTrackEncoder::NotifyEndOfStream",
+ [encoder = mMediaEncoder, now = mCurrentTime] {
+ if (!now.IsNull()) {
+ encoder->mVideoEncoder->AdvanceCurrentTime(now);
+ }
+ encoder->mVideoEncoder->NotifyEndOfStream();
+ }));
+ MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+ Unused << rv;
+ }
+
+ void NotifyRemoved(MediaTrackGraph* aGraph) override {
+ nsresult rv = mEncoderThread->Dispatch(NS_NewRunnableFunction(
+ "mozilla::VideoTrackEncoder::NotifyEndOfStream",
+ [encoder = mMediaEncoder, now = mCurrentTime] {
+ if (!now.IsNull()) {
+ encoder->mVideoEncoder->AdvanceCurrentTime(now);
+ }
+ encoder->mVideoEncoder->NotifyEndOfStream();
+ }));
+ MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+ Unused << rv;
+
+ mRemoved = true;
+
+ if (!mDirectConnected) {
+ mMediaEncoder = nullptr;
+ mEncoderThread = nullptr;
+ }
+
+ mShutdownHolder.Resolve(true, __func__);
+ }
+
+ const RefPtr<GenericNonExclusivePromise>& OnShutdown() const {
+ return mShutdownPromise;
+ }
+
+ private:
+ bool mDirectConnected;
+ bool mInitialized;
+ bool mRemoved;
+ TimeStamp mCurrentTime;
+ Atomic<bool> mPendingAdvanceCurrentTime;
+ RefPtr<MediaEncoder> mMediaEncoder;
+ RefPtr<TaskQueue> mEncoderThread;
+ MozPromiseHolder<GenericNonExclusivePromise> mShutdownHolder;
+ const RefPtr<GenericNonExclusivePromise> mShutdownPromise;
+};
+
+class MediaEncoder::EncoderListener : public TrackEncoderListener {
+ public:
+ EncoderListener(TaskQueue* aEncoderThread, MediaEncoder* aEncoder)
+ : mEncoderThread(aEncoderThread), mEncoder(aEncoder) {}
+
+ void Forget() {
+ MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+ mEncoder = nullptr;
+ }
+
+ void Initialized(TrackEncoder* aTrackEncoder) override {
+ MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+ MOZ_ASSERT(aTrackEncoder->IsInitialized());
+
+ if (!mEncoder) {
+ return;
+ }
+
+ mEncoder->UpdateInitialized();
+ }
+
+ void Started(TrackEncoder* aTrackEncoder) override {
+ MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+ MOZ_ASSERT(aTrackEncoder->IsStarted());
+
+ if (!mEncoder) {
+ return;
+ }
+
+ mEncoder->UpdateStarted();
+ }
+
+ void Error(TrackEncoder* aTrackEncoder) override {
+ MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+
+ if (!mEncoder) {
+ return;
+ }
+
+ mEncoder->SetError();
+ }
+
+ protected:
+ RefPtr<TaskQueue> mEncoderThread;
+ RefPtr<MediaEncoder> mEncoder;
+};
+
+MediaEncoder::MediaEncoder(
+ RefPtr<TaskQueue> aEncoderThread,
+ RefPtr<DriftCompensator> aDriftCompensator,
+ UniquePtr<ContainerWriter> aWriter,
+ UniquePtr<AudioTrackEncoder> aAudioEncoder,
+ UniquePtr<VideoTrackEncoder> aVideoEncoder,
+ UniquePtr<MediaQueue<EncodedFrame>> aEncodedAudioQueue,
+ UniquePtr<MediaQueue<EncodedFrame>> aEncodedVideoQueue,
+ TrackRate aTrackRate, const nsAString& aMimeType, uint64_t aMaxMemory,
+ TimeDuration aTimeslice)
+ : mMainThread(GetMainThreadSerialEventTarget()),
+ mEncoderThread(std::move(aEncoderThread)),
+ mEncodedAudioQueue(std::move(aEncodedAudioQueue)),
+ mEncodedVideoQueue(std::move(aEncodedVideoQueue)),
+ mMuxer(MakeUnique<Muxer>(std::move(aWriter), *mEncodedAudioQueue,
+ *mEncodedVideoQueue)),
+ mAudioEncoder(std::move(aAudioEncoder)),
+ mAudioListener(mAudioEncoder ? MakeAndAddRef<AudioTrackListener>(
+ std::move(aDriftCompensator), this)
+ : nullptr),
+ mVideoEncoder(std::move(aVideoEncoder)),
+ mVideoListener(mVideoEncoder ? MakeAndAddRef<VideoTrackListener>(this)
+ : nullptr),
+ mEncoderListener(MakeAndAddRef<EncoderListener>(mEncoderThread, this)),
+ mMimeType(aMimeType),
+ mMaxMemory(aMaxMemory),
+ mTimeslice(aTimeslice),
+ mStartTime(TimeStamp::Now()),
+ mInitialized(false),
+ mStarted(false),
+ mCompleted(false),
+ mError(false) {
+ if (mAudioEncoder) {
+ mAudioPushListener = mEncodedAudioQueue->PushEvent().Connect(
+ mEncoderThread, this, &MediaEncoder::OnEncodedAudioPushed);
+ mAudioFinishListener = mEncodedAudioQueue->FinishEvent().Connect(
+ mEncoderThread, this, &MediaEncoder::MaybeShutdown);
+ nsresult rv = mEncoderThread->Dispatch(NS_NewRunnableFunction(
+ "mozilla::AudioTrackEncoder::RegisterListener",
+ [self = RefPtr<MediaEncoder>(this), this] {
+ mAudioEncoder->RegisterListener(mEncoderListener);
+ }));
+ MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+ Unused << rv;
+ } else {
+ mMuxedAudioEndTime = TimeUnit::FromInfinity();
+ mEncodedAudioQueue->Finish();
+ }
+ if (mVideoEncoder) {
+ mVideoPushListener = mEncodedVideoQueue->PushEvent().Connect(
+ mEncoderThread, this, &MediaEncoder::OnEncodedVideoPushed);
+ mVideoFinishListener = mEncodedVideoQueue->FinishEvent().Connect(
+ mEncoderThread, this, &MediaEncoder::MaybeShutdown);
+ nsresult rv = mEncoderThread->Dispatch(NS_NewRunnableFunction(
+ "mozilla::VideoTrackEncoder::RegisterListener",
+ [self = RefPtr<MediaEncoder>(this), this] {
+ mVideoEncoder->RegisterListener(mEncoderListener);
+ }));
+ MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
+ Unused << rv;
+ } else {
+ mMuxedVideoEndTime = TimeUnit::FromInfinity();
+ mEncodedVideoQueue->Finish();
+ }
+}
+
+MediaEncoder::~MediaEncoder() {
+ MOZ_ASSERT(!mAudioTrack);
+ MOZ_ASSERT(!mVideoTrack);
+ MOZ_ASSERT(!mAudioNode);
+ MOZ_ASSERT(!mInputPort);
+ MOZ_ASSERT(!mPipeTrack);
+}
+
+void MediaEncoder::EnsureGraphTrackFrom(MediaTrack* aTrack) {
+ if (mGraphTrack) {
+ return;
+ }
+ MOZ_DIAGNOSTIC_ASSERT(!aTrack->IsDestroyed());
+ mGraphTrack = MakeAndAddRef<SharedDummyTrack>(
+ aTrack->GraphImpl()->CreateSourceTrack(MediaSegment::VIDEO));
+}
+
+void MediaEncoder::RunOnGraph(already_AddRefed<Runnable> aRunnable) {
+ MOZ_ASSERT(mGraphTrack);
+ class Message : public ControlMessage {
+ public:
+ explicit Message(already_AddRefed<Runnable> aRunnable)
+ : ControlMessage(nullptr), mRunnable(aRunnable) {}
+ void Run() override {
+ TRACE("MediaEncoder::RunOnGraph");
+ mRunnable->Run();
+ }
+ const RefPtr<Runnable> mRunnable;
+ };
+ mGraphTrack->mTrack->GraphImpl()->AppendMessage(
+ MakeUnique<Message>(std::move(aRunnable)));
+}
+
+void MediaEncoder::Suspend() {
+ RunOnGraph(NS_NewRunnableFunction(
+ "MediaEncoder::Suspend (graph)",
+ [self = RefPtr<MediaEncoder>(this), this] {
+ if (NS_FAILED(mEncoderThread->Dispatch(
+ NS_NewRunnableFunction("MediaEncoder::Suspend (encoder)",
+ [self, this, now = TimeStamp::Now()] {
+ if (mAudioEncoder) {
+ mAudioEncoder->Suspend();
+ }
+ if (mVideoEncoder) {
+ mVideoEncoder->Suspend(now);
+ }
+ })))) {
+ // RunOnGraph added an extra async step, and now `thread` has shut
+ // down.
+ return;
+ }
+ }));
+}
+
+void MediaEncoder::Resume() {
+ RunOnGraph(NS_NewRunnableFunction(
+ "MediaEncoder::Resume (graph)",
+ [self = RefPtr<MediaEncoder>(this), this] {
+ if (NS_FAILED(mEncoderThread->Dispatch(
+ NS_NewRunnableFunction("MediaEncoder::Resume (encoder)",
+ [self, this, now = TimeStamp::Now()] {
+ if (mAudioEncoder) {
+ mAudioEncoder->Resume();
+ }
+ if (mVideoEncoder) {
+ mVideoEncoder->Resume(now);
+ }
+ })))) {
+ // RunOnGraph added an extra async step, and now `thread` has shut
+ // down.
+ return;
+ }
+ }));
+}
+
+void MediaEncoder::ConnectAudioNode(AudioNode* aNode, uint32_t aOutput) {
+ MOZ_ASSERT(NS_IsMainThread());
+
+ if (mAudioNode) {
+ MOZ_ASSERT(false, "Only one audio node supported");
+ return;
+ }
+
+ // Only AudioNodeTrack of kind EXTERNAL_OUTPUT stores output audio data in
+ // the track (see AudioNodeTrack::AdvanceOutputSegment()). That means
+ // forwarding input track in recorder session won't be able to copy data from
+ // the track of non-destination node. Create a pipe track in this case.
+ if (aNode->NumberOfOutputs() > 0) {
+ AudioContext* ctx = aNode->Context();
+ AudioNodeEngine* engine = new AudioNodeEngine(nullptr);
+ AudioNodeTrack::Flags flags = AudioNodeTrack::EXTERNAL_OUTPUT |
+ AudioNodeTrack::NEED_MAIN_THREAD_ENDED;
+ mPipeTrack = AudioNodeTrack::Create(ctx, engine, flags, ctx->Graph());
+ AudioNodeTrack* ns = aNode->GetTrack();
+ if (ns) {
+ mInputPort = mPipeTrack->AllocateInputPort(aNode->GetTrack(), 0, aOutput);
+ }
+ }
+
+ mAudioNode = aNode;
+
+ if (mPipeTrack) {
+ mPipeTrack->AddListener(mAudioListener);
+ EnsureGraphTrackFrom(mPipeTrack);
+ } else {
+ mAudioNode->GetTrack()->AddListener(mAudioListener);
+ EnsureGraphTrackFrom(mAudioNode->GetTrack());
+ }
+}
+
+void MediaEncoder::ConnectMediaStreamTrack(MediaStreamTrack* aTrack) {
+ MOZ_ASSERT(NS_IsMainThread());
+
+ if (aTrack->Ended()) {
+ MOZ_ASSERT_UNREACHABLE("Cannot connect ended track");
+ return;
+ }
+
+ EnsureGraphTrackFrom(aTrack->GetTrack());
+
+ if (AudioStreamTrack* audio = aTrack->AsAudioStreamTrack()) {
+ if (!mAudioEncoder) {
+ // No audio encoder for this audio track. It could be disabled.
+ LOG(LogLevel::Warning, ("Cannot connect to audio track - no encoder"));
+ return;
+ }
+
+ MOZ_ASSERT(!mAudioTrack, "Only one audio track supported.");
+ MOZ_ASSERT(mAudioListener, "No audio listener for this audio track");
+
+ LOG(LogLevel::Info, ("Connected to audio track %p", aTrack));
+
+ mAudioTrack = audio;
+ audio->AddListener(mAudioListener);
+ } else if (VideoStreamTrack* video = aTrack->AsVideoStreamTrack()) {
+ if (!mVideoEncoder) {
+ // No video encoder for this video track. It could be disabled.
+ LOG(LogLevel::Warning, ("Cannot connect to video track - no encoder"));
+ return;
+ }
+
+ MOZ_ASSERT(!mVideoTrack, "Only one video track supported.");
+ MOZ_ASSERT(mVideoListener, "No video listener for this video track");
+
+ LOG(LogLevel::Info, ("Connected to video track %p", aTrack));
+
+ mVideoTrack = video;
+ video->AddDirectListener(mVideoListener);
+ video->AddListener(mVideoListener);
+ } else {
+ MOZ_ASSERT(false, "Unknown track type");
+ }
+}
+
+void MediaEncoder::RemoveMediaStreamTrack(MediaStreamTrack* aTrack) {
+ if (!aTrack) {
+ MOZ_ASSERT(false);
+ return;
+ }
+
+ if (AudioStreamTrack* audio = aTrack->AsAudioStreamTrack()) {
+ if (audio != mAudioTrack) {
+ MOZ_ASSERT(false, "Not connected to this audio track");
+ return;
+ }
+
+ if (mAudioListener) {
+ audio->RemoveDirectListener(mAudioListener);
+ audio->RemoveListener(mAudioListener);
+ }
+ mAudioTrack = nullptr;
+ } else if (VideoStreamTrack* video = aTrack->AsVideoStreamTrack()) {
+ if (video != mVideoTrack) {
+ MOZ_ASSERT(false, "Not connected to this video track");
+ return;
+ }
+
+ if (mVideoListener) {
+ video->RemoveDirectListener(mVideoListener);
+ video->RemoveListener(mVideoListener);
+ }
+ mVideoTrack = nullptr;
+ }
+}
+
+/* static */
+already_AddRefed<MediaEncoder> MediaEncoder::CreateEncoder(
+ RefPtr<TaskQueue> aEncoderThread, const nsAString& aMimeType,
+ uint32_t aAudioBitrate, uint32_t aVideoBitrate, uint8_t aTrackTypes,
+ TrackRate aTrackRate, uint64_t aMaxMemory, TimeDuration aTimeslice) {
+ AUTO_PROFILER_LABEL("MediaEncoder::CreateEncoder", OTHER);
+
+ UniquePtr<ContainerWriter> writer;
+ UniquePtr<AudioTrackEncoder> audioEncoder;
+ UniquePtr<VideoTrackEncoder> videoEncoder;
+ auto encodedAudioQueue = MakeUnique<MediaQueue<EncodedFrame>>();
+ auto encodedVideoQueue = MakeUnique<MediaQueue<EncodedFrame>>();
+ auto driftCompensator =
+ MakeRefPtr<DriftCompensator>(aEncoderThread, aTrackRate);
+
+ Maybe<MediaContainerType> mimeType = MakeMediaContainerType(aMimeType);
+ if (!mimeType) {
+ return nullptr;
+ }
+
+ for (const auto& codec : mimeType->ExtendedType().Codecs().Range()) {
+ if (codec.EqualsLiteral("opus")) {
+ MOZ_ASSERT(!audioEncoder);
+ audioEncoder =
+ MakeUnique<OpusTrackEncoder>(aTrackRate, *encodedAudioQueue);
+ } else if (codec.EqualsLiteral("vp8") || codec.EqualsLiteral("vp8.0")) {
+ MOZ_ASSERT(!videoEncoder);
+ if (Preferences::GetBool("media.recorder.video.frame_drops", true)) {
+ videoEncoder = MakeUnique<VP8TrackEncoder>(driftCompensator, aTrackRate,
+ *encodedVideoQueue,
+ FrameDroppingMode::ALLOW);
+ } else {
+ videoEncoder = MakeUnique<VP8TrackEncoder>(driftCompensator, aTrackRate,
+ *encodedVideoQueue,
+ FrameDroppingMode::DISALLOW);
+ }
+ } else {
+ MOZ_CRASH("Unknown codec");
+ }
+ }
+
+ if (mimeType->Type() == MEDIAMIMETYPE(VIDEO_WEBM) ||
+ mimeType->Type() == MEDIAMIMETYPE(AUDIO_WEBM)) {
+ MOZ_ASSERT_IF(mimeType->Type() == MEDIAMIMETYPE(AUDIO_WEBM), !videoEncoder);
+ writer = MakeUnique<WebMWriter>();
+ } else if (mimeType->Type() == MEDIAMIMETYPE(AUDIO_OGG)) {
+ MOZ_ASSERT(audioEncoder);
+ MOZ_ASSERT(!videoEncoder);
+ writer = MakeUnique<OggWriter>();
+ }
+ NS_ENSURE_TRUE(writer, nullptr);
+
+ LOG(LogLevel::Info,
+ ("Create encoder result:a[%p](%u bps) v[%p](%u bps) w[%p] mimeType = "
+ "%s.",
+ audioEncoder.get(), aAudioBitrate, videoEncoder.get(), aVideoBitrate,
+ writer.get(), NS_ConvertUTF16toUTF8(aMimeType).get()));
+
+ if (audioEncoder) {
+ audioEncoder->SetWorkerThread(aEncoderThread);
+ if (aAudioBitrate != 0) {
+ audioEncoder->SetBitrate(aAudioBitrate);
+ }
+ }
+ if (videoEncoder) {
+ videoEncoder->SetWorkerThread(aEncoderThread);
+ if (aVideoBitrate != 0) {
+ videoEncoder->SetBitrate(aVideoBitrate);
+ }
+ }
+ return MakeAndAddRef<MediaEncoder>(
+ std::move(aEncoderThread), std::move(driftCompensator), std::move(writer),
+ std::move(audioEncoder), std::move(videoEncoder),
+ std::move(encodedAudioQueue), std::move(encodedVideoQueue), aTrackRate,
+ aMimeType, aMaxMemory, aTimeslice);
+}
+
+nsresult MediaEncoder::GetEncodedData(
+ nsTArray<nsTArray<uint8_t>>* aOutputBufs) {
+ AUTO_PROFILER_LABEL("MediaEncoder::GetEncodedData", OTHER);
+
+ MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+
+ LOG(LogLevel::Verbose,
+ ("GetEncodedData TimeStamp = %f", GetEncodeTimeStamp()));
+
+ if (!mInitialized) {
+ return NS_ERROR_NOT_INITIALIZED;
+ }
+
+ nsresult rv = mMuxer->GetData(aOutputBufs);
+ if (mMuxer->IsFinished()) {
+ mCompleted = true;
+ }
+
+ LOG(LogLevel::Verbose,
+ ("END GetEncodedData TimeStamp=%f "
+ "mCompleted=%d, aComplete=%d, vComplete=%d",
+ GetEncodeTimeStamp(), mCompleted,
+ !mAudioEncoder || mAudioEncoder->IsEncodingComplete(),
+ !mVideoEncoder || mVideoEncoder->IsEncodingComplete()));
+
+ return rv;
+}
+
+void MediaEncoder::MaybeShutdown() {
+ MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+ if (!mEncodedAudioQueue->IsFinished()) {
+ LOG(LogLevel::Debug,
+ ("MediaEncoder %p not shutting down, audio is still live", this));
+ return;
+ }
+
+ if (!mEncodedVideoQueue->IsFinished()) {
+ LOG(LogLevel::Debug,
+ ("MediaEncoder %p not shutting down, video is still live", this));
+ return;
+ }
+
+ mShutdownEvent.Notify();
+
+ // Stop will Shutdown() gracefully.
+ Unused << InvokeAsync(mMainThread, this, __func__, &MediaEncoder::Stop);
+}
+
+RefPtr<GenericNonExclusivePromise> MediaEncoder::Shutdown() {
+ MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+ if (mShutdownPromise) {
+ return mShutdownPromise;
+ }
+
+ LOG(LogLevel::Info, ("MediaEncoder is shutting down."));
+
+ AutoTArray<RefPtr<GenericNonExclusivePromise>, 2> shutdownPromises;
+ if (mAudioListener) {
+ shutdownPromises.AppendElement(mAudioListener->OnShutdown());
+ }
+ if (mVideoListener) {
+ shutdownPromises.AppendElement(mVideoListener->OnShutdown());
+ }
+
+ mShutdownPromise =
+ GenericNonExclusivePromise::All(mEncoderThread, shutdownPromises)
+ ->Then(mEncoderThread, __func__,
+ [](const GenericNonExclusivePromise::AllPromiseType::
+ ResolveOrRejectValue& aValue) {
+ if (aValue.IsResolve()) {
+ return GenericNonExclusivePromise::CreateAndResolve(
+ true, __func__);
+ }
+ return GenericNonExclusivePromise::CreateAndReject(
+ aValue.RejectValue(), __func__);
+ });
+
+ mShutdownPromise->Then(
+ mEncoderThread, __func__, [self = RefPtr<MediaEncoder>(this), this] {
+ if (mAudioEncoder) {
+ mAudioEncoder->UnregisterListener(mEncoderListener);
+ }
+ if (mVideoEncoder) {
+ mVideoEncoder->UnregisterListener(mEncoderListener);
+ }
+ mEncoderListener->Forget();
+ mMuxer->Disconnect();
+ mAudioPushListener.DisconnectIfExists();
+ mAudioFinishListener.DisconnectIfExists();
+ mVideoPushListener.DisconnectIfExists();
+ mVideoFinishListener.DisconnectIfExists();
+ });
+
+ return mShutdownPromise;
+}
+
+RefPtr<GenericNonExclusivePromise> MediaEncoder::Stop() {
+ MOZ_ASSERT(NS_IsMainThread());
+
+ LOG(LogLevel::Info, ("MediaEncoder %p Stop", this));
+
+ DisconnectTracks();
+
+ return InvokeAsync(mEncoderThread, this, __func__, &MediaEncoder::Shutdown);
+}
+
+RefPtr<GenericNonExclusivePromise> MediaEncoder::Cancel() {
+ MOZ_ASSERT(NS_IsMainThread());
+
+ LOG(LogLevel::Info, ("MediaEncoder %p Cancel", this));
+
+ DisconnectTracks();
+
+ return InvokeAsync(mEncoderThread, __func__,
+ [self = RefPtr<MediaEncoder>(this), this]() {
+ if (mAudioEncoder) {
+ mAudioEncoder->Cancel();
+ }
+ if (mVideoEncoder) {
+ mVideoEncoder->Cancel();
+ }
+ return Shutdown();
+ });
+}
+
+bool MediaEncoder::HasError() {
+ MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+ return mError;
+}
+
+void MediaEncoder::SetError() {
+ MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+
+ if (mError) {
+ return;
+ }
+
+ mError = true;
+ mErrorEvent.Notify();
+}
+
+auto MediaEncoder::RequestData() -> RefPtr<BlobPromise> {
+ MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+ TimeUnit muxedEndTime = std::min(mMuxedAudioEndTime, mMuxedVideoEndTime);
+ mLastBlobTime = muxedEndTime;
+ mLastExtractTime = muxedEndTime;
+ return Extract()->Then(
+ mMainThread, __func__,
+ [this, self = RefPtr<MediaEncoder>(this)](
+ const GenericPromise::ResolveOrRejectValue& aValue) {
+ // Even if rejected, we want to gather what has already been
+ // extracted into the current blob and expose that.
+ Unused << NS_WARN_IF(aValue.IsReject());
+ return GatherBlob();
+ });
+}
+
+void MediaEncoder::MaybeCreateMutableBlobStorage() {
+ MOZ_ASSERT(NS_IsMainThread());
+ if (!mMutableBlobStorage) {
+ mMutableBlobStorage = new MutableBlobStorage(
+ MutableBlobStorage::eCouldBeInTemporaryFile, nullptr, mMaxMemory);
+ }
+}
+
+void MediaEncoder::OnEncodedAudioPushed(const RefPtr<EncodedFrame>& aFrame) {
+ MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+ mMuxedAudioEndTime = aFrame->GetEndTime();
+ MaybeExtractOrGatherBlob();
+}
+
+void MediaEncoder::OnEncodedVideoPushed(const RefPtr<EncodedFrame>& aFrame) {
+ MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+ mMuxedVideoEndTime = aFrame->GetEndTime();
+ MaybeExtractOrGatherBlob();
+}
+
+void MediaEncoder::MaybeExtractOrGatherBlob() {
+ MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+
+ TimeUnit muxedEndTime = std::min(mMuxedAudioEndTime, mMuxedVideoEndTime);
+ if ((muxedEndTime - mLastBlobTime).ToTimeDuration() >= mTimeslice) {
+ LOG(LogLevel::Verbose, ("MediaEncoder %p Muxed %.2fs of data since last "
+ "blob. Issuing new blob.",
+ this, (muxedEndTime - mLastBlobTime).ToSeconds()));
+ RequestData()->Then(mEncoderThread, __func__,
+ [this, self = RefPtr<MediaEncoder>(this)](
+ const BlobPromise::ResolveOrRejectValue& aValue) {
+ if (aValue.IsReject()) {
+ SetError();
+ return;
+ }
+ RefPtr<BlobImpl> blob = aValue.ResolveValue();
+ mDataAvailableEvent.Notify(std::move(blob));
+ });
+ }
+
+ if (muxedEndTime - mLastExtractTime > TimeUnit::FromSeconds(1)) {
+ // Extract data from the muxer at least every second.
+ LOG(LogLevel::Verbose,
+ ("MediaEncoder %p Muxed %.2fs of data since last "
+ "extract. Extracting more data into blob.",
+ this, (muxedEndTime - mLastExtractTime).ToSeconds()));
+ mLastExtractTime = muxedEndTime;
+ Unused << Extract();
+ }
+}
+
+// Pull encoded media data from MediaEncoder and put into MutableBlobStorage.
+RefPtr<GenericPromise> MediaEncoder::Extract() {
+ MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+
+ LOG(LogLevel::Debug, ("MediaEncoder %p Extract", this));
+
+ AUTO_PROFILER_LABEL("MediaEncoder::Extract", OTHER);
+
+ // Pull encoded media data from MediaEncoder
+ nsTArray<nsTArray<uint8_t>> buffer;
+ nsresult rv = GetEncodedData(&buffer);
+ MOZ_ASSERT(rv != NS_ERROR_INVALID_ARG, "Invalid args can be prevented.");
+ if (NS_FAILED(rv)) {
+ MOZ_RELEASE_ASSERT(buffer.IsEmpty());
+ // Even if we failed to encode more data, it might be time to push a blob
+ // with already encoded data.
+ }
+
+ // To ensure Extract() promises are resolved in calling order, we always
+ // invoke the main thread. Even when the encoded buffer is empty.
+ return InvokeAsync(
+ mMainThread, __func__,
+ [self = RefPtr<MediaEncoder>(this), this, buffer = std::move(buffer)] {
+ MaybeCreateMutableBlobStorage();
+ for (const auto& part : buffer) {
+ if (part.IsEmpty()) {
+ continue;
+ }
+
+ nsresult rv =
+ mMutableBlobStorage->Append(part.Elements(), part.Length());
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return GenericPromise::CreateAndReject(rv, __func__);
+ }
+ }
+ return GenericPromise::CreateAndResolve(true, __func__);
+ });
+}
+
+auto MediaEncoder::GatherBlob() -> RefPtr<BlobPromise> {
+ MOZ_ASSERT(NS_IsMainThread());
+ if (!mBlobPromise) {
+ return mBlobPromise = GatherBlobImpl();
+ }
+ return mBlobPromise = mBlobPromise->Then(mMainThread, __func__,
+ [self = RefPtr<MediaEncoder>(this)] {
+ return self->GatherBlobImpl();
+ });
+}
+
+auto MediaEncoder::GatherBlobImpl() -> RefPtr<BlobPromise> {
+ RefPtr<BlobStorer> storer = MakeAndAddRef<BlobStorer>();
+ MaybeCreateMutableBlobStorage();
+ mMutableBlobStorage->GetBlobImplWhenReady(NS_ConvertUTF16toUTF8(mMimeType),
+ storer);
+ mMutableBlobStorage = nullptr;
+
+ storer->Promise()->Then(
+ mMainThread, __func__,
+ [self = RefPtr<MediaEncoder>(this), p = storer->Promise()] {
+ if (self->mBlobPromise == p) {
+ // Reset BlobPromise.
+ self->mBlobPromise = nullptr;
+ }
+ });
+
+ return storer->Promise();
+}
+
+void MediaEncoder::DisconnectTracks() {
+ MOZ_ASSERT(NS_IsMainThread());
+
+ if (mAudioNode) {
+ mAudioNode->GetTrack()->RemoveListener(mAudioListener);
+ if (mInputPort) {
+ mInputPort->Destroy();
+ mInputPort = nullptr;
+ }
+ if (mPipeTrack) {
+ mPipeTrack->RemoveListener(mAudioListener);
+ mPipeTrack->Destroy();
+ mPipeTrack = nullptr;
+ }
+ mAudioNode = nullptr;
+ }
+
+ if (mAudioTrack) {
+ RemoveMediaStreamTrack(mAudioTrack);
+ }
+
+ if (mVideoTrack) {
+ RemoveMediaStreamTrack(mVideoTrack);
+ }
+}
+
+bool MediaEncoder::IsWebMEncoderEnabled() {
+ return StaticPrefs::media_encoder_webm_enabled();
+}
+
+void MediaEncoder::UpdateInitialized() {
+ MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+
+ if (mInitialized) {
+ // This could happen if an encoder re-inits due to a resolution change.
+ return;
+ }
+
+ if (mAudioEncoder && !mAudioEncoder->IsInitialized()) {
+ LOG(LogLevel::Debug,
+ ("MediaEncoder %p UpdateInitialized waiting for audio", this));
+ return;
+ }
+
+ if (mVideoEncoder && !mVideoEncoder->IsInitialized()) {
+ LOG(LogLevel::Debug,
+ ("MediaEncoder %p UpdateInitialized waiting for video", this));
+ return;
+ }
+
+ MOZ_ASSERT(mMuxer->NeedsMetadata());
+ nsTArray<RefPtr<TrackMetadataBase>> meta;
+ if (mAudioEncoder && !*meta.AppendElement(mAudioEncoder->GetMetadata())) {
+ LOG(LogLevel::Error, ("Audio metadata is null"));
+ SetError();
+ return;
+ }
+ if (mVideoEncoder && !*meta.AppendElement(mVideoEncoder->GetMetadata())) {
+ LOG(LogLevel::Error, ("Video metadata is null"));
+ SetError();
+ return;
+ }
+
+ if (NS_FAILED(mMuxer->SetMetadata(meta))) {
+ LOG(LogLevel::Error, ("SetMetadata failed"));
+ SetError();
+ return;
+ }
+
+ LOG(LogLevel::Info,
+ ("MediaEncoder %p UpdateInitialized set metadata in muxer", this));
+
+ mInitialized = true;
+}
+
+void MediaEncoder::UpdateStarted() {
+ MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
+
+ if (mStarted) {
+ return;
+ }
+
+ if (mAudioEncoder && !mAudioEncoder->IsStarted()) {
+ return;
+ }
+
+ if (mVideoEncoder && !mVideoEncoder->IsStarted()) {
+ return;
+ }
+
+ mStarted = true;
+
+ // Start issuing timeslice-based blobs.
+ MOZ_ASSERT(mLastBlobTime == TimeUnit::Zero());
+
+ mStartedEvent.Notify();
+}
+
+/*
+ * SizeOfExcludingThis measures memory being used by the Media Encoder.
+ * Currently it measures the size of the Encoder buffer and memory occupied
+ * by mAudioEncoder, mVideoEncoder, and any current blob storage.
+ */
+auto MediaEncoder::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf)
+ -> RefPtr<SizeOfPromise> {
+ MOZ_ASSERT(NS_IsMainThread());
+ size_t blobStorageSize =
+ mMutableBlobStorage ? mMutableBlobStorage->SizeOfCurrentMemoryBuffer()
+ : 0;
+
+ return InvokeAsync(
+ mEncoderThread, __func__,
+ [self = RefPtr<MediaEncoder>(this), this, blobStorageSize,
+ aMallocSizeOf]() {
+ size_t size = 0;
+ if (mAudioEncoder) {
+ size += mAudioEncoder->SizeOfExcludingThis(aMallocSizeOf);
+ }
+ if (mVideoEncoder) {
+ size += mVideoEncoder->SizeOfExcludingThis(aMallocSizeOf);
+ }
+ return SizeOfPromise::CreateAndResolve(blobStorageSize + size,
+ __func__);
+ });
+}
+
+} // namespace mozilla
+
+#undef LOG
diff --git a/dom/media/encoder/MediaEncoder.h b/dom/media/encoder/MediaEncoder.h
new file mode 100644
index 0000000000..dae887edc6
--- /dev/null
+++ b/dom/media/encoder/MediaEncoder.h
@@ -0,0 +1,400 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MediaEncoder_h_
+#define MediaEncoder_h_
+
+#include "ContainerWriter.h"
+#include "CubebUtils.h"
+#include "MediaQueue.h"
+#include "MediaTrackGraph.h"
+#include "MediaTrackListener.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/MemoryReporting.h"
+#include "mozilla/MozPromise.h"
+#include "mozilla/UniquePtr.h"
+#include "nsIMemoryReporter.h"
+#include "TrackEncoder.h"
+
+namespace mozilla {
+
+class DriftCompensator;
+class Muxer;
+class Runnable;
+class TaskQueue;
+
+namespace dom {
+class AudioNode;
+class AudioStreamTrack;
+class BlobImpl;
+class MediaStreamTrack;
+class MutableBlobStorage;
+class VideoStreamTrack;
+} // namespace dom
+
+class DriftCompensator;
+
+/**
+ * MediaEncoder is the framework of encoding module, it controls and manages
+ * procedures between Muxer, ContainerWriter and TrackEncoder. ContainerWriter
+ * writes the encoded track data into a specific container (e.g. ogg, webm).
+ * AudioTrackEncoder and VideoTrackEncoder are subclasses of TrackEncoder, and
+ * are responsible for encoding raw data coming from MediaStreamTracks.
+ *
+ * MediaEncoder solves threading issues by doing message passing to a TaskQueue
+ * (the "encoder thread") as passed in to the constructor. Each
+ * MediaStreamTrack to be recorded is set up with a MediaTrackListener.
+ * Typically there are a non-direct track listeners for audio, direct listeners
+ * for video, and there is always a non-direct listener on each track for
+ * time-keeping. The listeners forward data to their corresponding TrackEncoders
+ * on the encoder thread.
+ *
+ * The MediaEncoder listens to events from all TrackEncoders, and in turn
+ * signals events to interested parties. Typically a MediaRecorder::Session.
+ * The MediaEncoder automatically encodes incoming data, muxes it, writes it
+ * into a container and stores the container data into a MutableBlobStorage.
+ * It is timeslice-aware so that it can notify listeners when it's time to
+ * expose a blob due to filling the timeslice.
+ *
+ * MediaEncoder is designed to be a passive component, neither does it own or is
+ * in charge of managing threads. Instead this is done by its owner.
+ *
+ * For example, usage from MediaRecorder of this component would be:
+ * 1) Create an encoder with a valid MIME type. Note that there are more
+ * configuration options, see the docs on MediaEncoder::CreateEncoder.
+ * => encoder = MediaEncoder::CreateEncoder(aMIMEType);
+ * It then creates track encoders and the appropriate ContainerWriter
+ * according to the MIME type
+ *
+ * 2) Connect handlers through MediaEventListeners to the MediaEncoder's
+ * MediaEventSources, StartedEvent(), DataAvailableEvent(), ErrorEvent() and
+ * ShutdownEvent().
+ * => listener = encoder->DataAvailableEvent().Connect(mainThread, &OnBlob);
+ *
+ * 3) Connect the sources to be recorded. Either through:
+ * => encoder->ConnectAudioNode(node);
+ * or
+ * => encoder->ConnectMediaStreamTrack(track);
+ * These should not be mixed. When connecting MediaStreamTracks there is
+ * support for at most one of each kind.
+ *
+ * 4) MediaEncoder automatically encodes data from the connected tracks, muxes
+ * them and writes it all into a blob, including metadata. When the blob
+ * contains at least `timeslice` worth of data it notifies the
+ * DataAvailableEvent that was connected in step 2.
+ * => void OnBlob(RefPtr<BlobImpl> aBlob) {
+ * => DispatchBlobEvent(Blob::Create(GetOwnerGlobal(), aBlob));
+ * => };
+ *
+ * 5) To stop encoding, there are multiple options:
+ *
+ * 5.1) Stop() for a graceful stop.
+ * => encoder->Stop();
+ *
+ * 5.2) Cancel() for an immediate stop, if you don't need the data currently
+ * buffered.
+ * => encoder->Cancel();
+ *
+ * 5.3) When all input tracks end, the MediaEncoder will automatically stop
+ * and shut down.
+ */
+class MediaEncoder {
+ private:
+ class AudioTrackListener;
+ class VideoTrackListener;
+ class EncoderListener;
+
+ public:
+ using BlobPromise =
+ MozPromise<RefPtr<dom::BlobImpl>, nsresult, false /* IsExclusive */>;
+ using SizeOfPromise = MozPromise<size_t, size_t, true /* IsExclusive */>;
+
+ NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MediaEncoder)
+
+ MediaEncoder(RefPtr<TaskQueue> aEncoderThread,
+ RefPtr<DriftCompensator> aDriftCompensator,
+ UniquePtr<ContainerWriter> aWriter,
+ UniquePtr<AudioTrackEncoder> aAudioEncoder,
+ UniquePtr<VideoTrackEncoder> aVideoEncoder,
+ UniquePtr<MediaQueue<EncodedFrame>> aEncodedAudioQueue,
+ UniquePtr<MediaQueue<EncodedFrame>> aEncodedVideoQueue,
+ TrackRate aTrackRate, const nsAString& aMIMEType,
+ uint64_t aMaxMemory, TimeDuration aTimeslice);
+
+ /**
+ * Called on main thread from MediaRecorder::Pause.
+ */
+ void Suspend();
+
+ /**
+ * Called on main thread from MediaRecorder::Resume.
+ */
+ void Resume();
+
+ /**
+ * Disconnects the input tracks, causing the encoding to stop.
+ */
+ void DisconnectTracks();
+
+ /**
+ * Connects an AudioNode with the appropriate encoder.
+ */
+ void ConnectAudioNode(dom::AudioNode* aNode, uint32_t aOutput);
+
+ /**
+ * Connects a MediaStreamTrack with the appropriate encoder.
+ */
+ void ConnectMediaStreamTrack(dom::MediaStreamTrack* aTrack);
+
+ /**
+ * Removes a connected MediaStreamTrack.
+ */
+ void RemoveMediaStreamTrack(dom::MediaStreamTrack* aTrack);
+
+ /**
+ * Creates an encoder with the given MIME type. This must be a valid MIME type
+ * or we will crash hard.
+ * Bitrates are given either explicit, or with 0 for defaults.
+ * aTrackRate is the rate in which data will be fed to the TrackEncoders.
+ * aMaxMemory is the maximum number of bytes of muxed data allowed in memory.
+ * Beyond that the blob is moved to a temporary file.
+ * aTimeslice is the minimum duration of muxed data we gather before
+ * automatically issuing a dataavailable event.
+ */
+ static already_AddRefed<MediaEncoder> CreateEncoder(
+ RefPtr<TaskQueue> aEncoderThread, const nsAString& aMimeType,
+ uint32_t aAudioBitrate, uint32_t aVideoBitrate, uint8_t aTrackTypes,
+ TrackRate aTrackRate, uint64_t aMaxMemory, TimeDuration aTimeslice);
+
+ /**
+ * Encodes raw data for all tracks to aOutputBufs. The buffer of container
+ * data is allocated in ContainerWriter::GetContainerData().
+ *
+ * On its first call, metadata is also encoded. TrackEncoders must have been
+ * initialized before this is called.
+ */
+ nsresult GetEncodedData(nsTArray<nsTArray<uint8_t>>* aOutputBufs);
+
+ /**
+ * Asserts that Shutdown() has been called. Reasons are encoding
+ * complete, encounter an error, or being canceled by its caller.
+ */
+ void AssertShutdownCalled() { MOZ_ASSERT(mShutdownPromise); }
+
+ /**
+ * Stops (encoding any data currently buffered) the encoding and shuts down
+ * the encoder using Shutdown().
+ */
+ RefPtr<GenericNonExclusivePromise> Stop();
+
+ /**
+ * Cancels (discarding any data currently buffered) the encoding and shuts
+ * down the encoder using Shutdown().
+ */
+ RefPtr<GenericNonExclusivePromise> Cancel();
+
+ bool HasError();
+
+ static bool IsWebMEncoderEnabled();
+
+ /**
+ * Updates internal state when track encoders are all initialized.
+ */
+ void UpdateInitialized();
+
+ /**
+ * Updates internal state when track encoders are all initialized, and
+ * notifies listeners that this MediaEncoder has been started.
+ */
+ void UpdateStarted();
+
+ MOZ_DEFINE_MALLOC_SIZE_OF(MallocSizeOf)
+ /*
+ * Measure the size of the buffer, and heap memory in bytes occupied by
+ * mAudioEncoder and mVideoEncoder.
+ */
+ RefPtr<SizeOfPromise> SizeOfExcludingThis(
+ mozilla::MallocSizeOf aMallocSizeOf);
+
+ /**
+ * Encode, mux and store into blob storage what has been buffered until now,
+ * then return the blob backed by that storage.
+ */
+ RefPtr<BlobPromise> RequestData();
+
+ // Event that gets notified when all track encoders have received data.
+ MediaEventSource<void>& StartedEvent() { return mStartedEvent; }
+ // Event that gets notified when there was an error preventing continued
+ // recording somewhere in the MediaEncoder stack.
+ MediaEventSource<void>& ErrorEvent() { return mErrorEvent; }
+ // Event that gets notified when the MediaEncoder stack has been shut down.
+ MediaEventSource<void>& ShutdownEvent() { return mShutdownEvent; }
+ // Event that gets notified after we have muxed at least mTimeslice worth of
+ // data into the current blob storage.
+ MediaEventSource<RefPtr<dom::BlobImpl>>& DataAvailableEvent() {
+ return mDataAvailableEvent;
+ }
+
+ protected:
+ ~MediaEncoder();
+
+ private:
+ /**
+ * Sets mGraphTrack if not already set, using a new stream from aTrack's
+ * graph.
+ */
+ void EnsureGraphTrackFrom(MediaTrack* aTrack);
+
+ /**
+ * Takes a regular runnable and dispatches it to the graph wrapped in a
+ * ControlMessage.
+ */
+ void RunOnGraph(already_AddRefed<Runnable> aRunnable);
+
+ /**
+ * Shuts down gracefully if there is no remaining live track encoder.
+ */
+ void MaybeShutdown();
+
+ /**
+ * Waits for TrackEncoders to shut down, then shuts down the MediaEncoder and
+ * cleans up track encoders.
+ */
+ RefPtr<GenericNonExclusivePromise> Shutdown();
+
+ /**
+ * Sets mError to true, notifies listeners of the error if mError changed,
+ * and stops encoding.
+ */
+ void SetError();
+
+ /**
+ * Creates a new MutableBlobStorage if one doesn't exist.
+ */
+ void MaybeCreateMutableBlobStorage();
+
+ /**
+ * Called when an encoded audio frame has been pushed by the audio encoder.
+ */
+ void OnEncodedAudioPushed(const RefPtr<EncodedFrame>& aFrame);
+
+ /**
+ * Called when an encoded video frame has been pushed by the video encoder.
+ */
+ void OnEncodedVideoPushed(const RefPtr<EncodedFrame>& aFrame);
+
+ /**
+ * If enough data has been pushed to the muxer, extract it into the current
+ * blob storage. If more than mTimeslice data has been pushed to the muxer
+ * since the last DataAvailableEvent was notified, also gather the blob and
+ * notify MediaRecorder.
+ */
+ void MaybeExtractOrGatherBlob();
+
+ // Extracts encoded and muxed data into the current blob storage, creating one
+ // if it doesn't exist. The returned promise resolves when data has been
+ // stored into the blob.
+ RefPtr<GenericPromise> Extract();
+
+ // Stops gathering data into the current blob and resolves when the current
+ // blob is available. Future data will be stored in a new blob.
+ // Should a previous async GatherBlob() operation still be in progress, we'll
+ // wait for it to finish before starting this one.
+ RefPtr<BlobPromise> GatherBlob();
+
+ RefPtr<BlobPromise> GatherBlobImpl();
+
+ const RefPtr<nsISerialEventTarget> mMainThread;
+ const RefPtr<TaskQueue> mEncoderThread;
+ const RefPtr<DriftCompensator> mDriftCompensator;
+
+ const UniquePtr<MediaQueue<EncodedFrame>> mEncodedAudioQueue;
+ const UniquePtr<MediaQueue<EncodedFrame>> mEncodedVideoQueue;
+
+ const UniquePtr<Muxer> mMuxer;
+ const UniquePtr<AudioTrackEncoder> mAudioEncoder;
+ const RefPtr<AudioTrackListener> mAudioListener;
+ const UniquePtr<VideoTrackEncoder> mVideoEncoder;
+ const RefPtr<VideoTrackListener> mVideoListener;
+ const RefPtr<EncoderListener> mEncoderListener;
+
+ public:
+ const nsString mMimeType;
+
+ // Max memory to use for the MutableBlobStorage.
+ const uint64_t mMaxMemory;
+
+ // The interval of passing encoded data from MutableBlobStorage to
+ // onDataAvailable handler.
+ const TimeDuration mTimeslice;
+
+ private:
+ MediaEventListener mAudioPushListener;
+ MediaEventListener mAudioFinishListener;
+ MediaEventListener mVideoPushListener;
+ MediaEventListener mVideoFinishListener;
+
+ MediaEventProducer<void> mStartedEvent;
+ MediaEventProducer<void> mErrorEvent;
+ MediaEventProducer<void> mShutdownEvent;
+ MediaEventProducer<RefPtr<dom::BlobImpl>> mDataAvailableEvent;
+
+ // The AudioNode we are encoding.
+ // Will be null when input is media stream or destination node.
+ RefPtr<dom::AudioNode> mAudioNode;
+ // Pipe-track for allowing a track listener on a non-destination AudioNode.
+ // Will be null when input is media stream or destination node.
+ RefPtr<AudioNodeTrack> mPipeTrack;
+ // Input port that connect mAudioNode to mPipeTrack.
+ // Will be null when input is media stream or destination node.
+ RefPtr<MediaInputPort> mInputPort;
+ // An audio track that we are encoding. Will be null if the input stream
+ // doesn't contain audio on start() or if the input is an AudioNode.
+ RefPtr<dom::AudioStreamTrack> mAudioTrack;
+ // A video track that we are encoding. Will be null if the input stream
+ // doesn't contain video on start() or if the input is an AudioNode.
+ RefPtr<dom::VideoStreamTrack> mVideoTrack;
+
+ // A stream to keep the MediaTrackGraph alive while we're recording.
+ RefPtr<SharedDummyTrack> mGraphTrack;
+
+ // A buffer to cache muxed encoded data.
+ RefPtr<dom::MutableBlobStorage> mMutableBlobStorage;
+ // If set, is a promise for the latest GatherBlob() operation. Allows
+ // GatherBlob() operations to be serialized in order to avoid races.
+ RefPtr<BlobPromise> mBlobPromise;
+ // The end time of the muxed data in the last gathered blob. If more than one
+ // track is present, this is the end time of the track that ends the earliest
+ // in the last blob. Encoder thread only.
+ media::TimeUnit mLastBlobTime;
+ // The end time of the muxed data in the current blob storage. If more than
+ // one track is present, this is the end time of the track that ends the
+ // earliest in the current blob storage. Encoder thread only.
+ media::TimeUnit mLastExtractTime;
+ // The end time of encoded audio data sent to the muxer. Positive infinity if
+ // there is no audio encoder. Encoder thread only.
+ media::TimeUnit mMuxedAudioEndTime;
+ // The end time of encoded video data sent to the muxer. Positive infinity if
+ // there is no video encoder. Encoder thread only.
+ media::TimeUnit mMuxedVideoEndTime;
+
+ TimeStamp mStartTime;
+ bool mInitialized;
+ bool mStarted;
+ bool mCompleted;
+ bool mError;
+ // Set when shutdown starts.
+ RefPtr<GenericNonExclusivePromise> mShutdownPromise;
+ // Get duration from create encoder, for logging purpose
+ double GetEncodeTimeStamp() {
+ TimeDuration decodeTime;
+ decodeTime = TimeStamp::Now() - mStartTime;
+ return decodeTime.ToMilliseconds();
+ }
+};
+
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/encoder/Muxer.cpp b/dom/media/encoder/Muxer.cpp
new file mode 100644
index 0000000000..8225062ee5
--- /dev/null
+++ b/dom/media/encoder/Muxer.cpp
@@ -0,0 +1,185 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "Muxer.h"
+
+#include "ContainerWriter.h"
+
+namespace mozilla {
+
+LazyLogModule gMuxerLog("Muxer");
+#define LOG(type, ...) MOZ_LOG(gMuxerLog, type, (__VA_ARGS__))
+
+Muxer::Muxer(UniquePtr<ContainerWriter> aWriter,
+ MediaQueue<EncodedFrame>& aEncodedAudioQueue,
+ MediaQueue<EncodedFrame>& aEncodedVideoQueue)
+ : mEncodedAudioQueue(aEncodedAudioQueue),
+ mEncodedVideoQueue(aEncodedVideoQueue),
+ mWriter(std::move(aWriter)) {}
+
+void Muxer::Disconnect() {
+ mAudioPushListener.DisconnectIfExists();
+ mAudioFinishListener.DisconnectIfExists();
+ mVideoPushListener.DisconnectIfExists();
+ mVideoFinishListener.DisconnectIfExists();
+}
+
+bool Muxer::IsFinished() { return mWriter->IsWritingComplete(); }
+
+nsresult Muxer::SetMetadata(
+ const nsTArray<RefPtr<TrackMetadataBase>>& aMetadata) {
+ MOZ_DIAGNOSTIC_ASSERT(!mMetadataSet);
+ MOZ_DIAGNOSTIC_ASSERT(!mHasAudio);
+ MOZ_DIAGNOSTIC_ASSERT(!mHasVideo);
+ nsresult rv = mWriter->SetMetadata(aMetadata);
+ if (NS_FAILED(rv)) {
+ LOG(LogLevel::Error, "%p Setting metadata failed, tracks=%zu", this,
+ aMetadata.Length());
+ return rv;
+ }
+
+ for (const auto& track : aMetadata) {
+ switch (track->GetKind()) {
+ case TrackMetadataBase::METADATA_OPUS:
+ case TrackMetadataBase::METADATA_VORBIS:
+ case TrackMetadataBase::METADATA_AAC:
+ case TrackMetadataBase::METADATA_AMR:
+ case TrackMetadataBase::METADATA_EVRC:
+ MOZ_ASSERT(!mHasAudio, "Only one audio track supported");
+ mHasAudio = true;
+ break;
+ case TrackMetadataBase::METADATA_VP8:
+ MOZ_ASSERT(!mHasVideo, "Only one video track supported");
+ mHasVideo = true;
+ break;
+ default:
+ MOZ_CRASH("Unknown codec metadata");
+ };
+ }
+ mMetadataSet = true;
+ MOZ_ASSERT(mHasAudio || mHasVideo);
+ LOG(LogLevel::Info, "%p Metadata set; audio=%d, video=%d", this, mHasAudio,
+ mHasVideo);
+ return NS_OK;
+}
+
+nsresult Muxer::GetData(nsTArray<nsTArray<uint8_t>>* aOutputBuffers) {
+ MOZ_ASSERT(mHasAudio || mHasVideo);
+
+ nsresult rv;
+ if (!mMetadataEncoded) {
+ rv = mWriter->GetContainerData(aOutputBuffers, ContainerWriter::GET_HEADER);
+ if (NS_FAILED(rv)) {
+ LOG(LogLevel::Error, "%p Failed getting metadata from writer", this);
+ return rv;
+ }
+ mMetadataEncoded = true;
+ }
+
+ if (mEncodedAudioQueue.GetSize() == 0 && !mEncodedAudioQueue.IsFinished() &&
+ mEncodedVideoQueue.GetSize() == 0 && !mEncodedVideoQueue.IsFinished()) {
+ // Nothing to mux.
+ return NS_OK;
+ }
+
+ rv = Mux();
+ if (NS_FAILED(rv)) {
+ LOG(LogLevel::Error, "%p Failed muxing data into writer", this);
+ return rv;
+ }
+
+ MOZ_ASSERT_IF(
+ mEncodedAudioQueue.IsFinished() && mEncodedVideoQueue.IsFinished(),
+ mEncodedAudioQueue.AtEndOfStream());
+ MOZ_ASSERT_IF(
+ mEncodedAudioQueue.IsFinished() && mEncodedVideoQueue.IsFinished(),
+ mEncodedVideoQueue.AtEndOfStream());
+ uint32_t flags =
+ mEncodedAudioQueue.AtEndOfStream() && mEncodedVideoQueue.AtEndOfStream()
+ ? ContainerWriter::FLUSH_NEEDED
+ : 0;
+
+ if (mEncodedAudioQueue.AtEndOfStream() &&
+ mEncodedVideoQueue.AtEndOfStream()) {
+ LOG(LogLevel::Info, "%p All data written", this);
+ }
+
+ return mWriter->GetContainerData(aOutputBuffers, flags);
+}
+
+nsresult Muxer::Mux() {
+ MOZ_ASSERT(mMetadataSet);
+ MOZ_ASSERT(mHasAudio || mHasVideo);
+
+ nsTArray<RefPtr<EncodedFrame>> frames;
+ // The times at which we expect our next video and audio frames. These are
+ // based on the time + duration (GetEndTime()) of the last seen frames.
+ // Assumes that the encoders write the correct duration for frames.;
+ media::TimeUnit expectedNextVideoTime;
+ media::TimeUnit expectedNextAudioTime;
+ // Interleave frames until we're out of audio or video
+ while (mEncodedVideoQueue.GetSize() > 0 && mEncodedAudioQueue.GetSize() > 0) {
+ RefPtr<EncodedFrame> videoFrame = mEncodedVideoQueue.PeekFront();
+ RefPtr<EncodedFrame> audioFrame = mEncodedAudioQueue.PeekFront();
+ // For any expected time our frames should occur at or after that time.
+ MOZ_ASSERT(videoFrame->mTime >= expectedNextVideoTime);
+ MOZ_ASSERT(audioFrame->mTime >= expectedNextAudioTime);
+ if (videoFrame->mTime <= audioFrame->mTime) {
+ expectedNextVideoTime = videoFrame->GetEndTime();
+ RefPtr<EncodedFrame> frame = mEncodedVideoQueue.PopFront();
+ frames.AppendElement(std::move(frame));
+ } else {
+ expectedNextAudioTime = audioFrame->GetEndTime();
+ RefPtr<EncodedFrame> frame = mEncodedAudioQueue.PopFront();
+ frames.AppendElement(std::move(frame));
+ }
+ }
+
+ // If we're out of audio we still may be able to add more video...
+ if (mEncodedAudioQueue.GetSize() == 0) {
+ while (mEncodedVideoQueue.GetSize() > 0) {
+ if (!mEncodedAudioQueue.AtEndOfStream() &&
+ mEncodedVideoQueue.PeekFront()->mTime > expectedNextAudioTime) {
+ // Audio encoding is not complete and since the video frame comes
+ // after our next audio frame we cannot safely add it.
+ break;
+ }
+ frames.AppendElement(mEncodedVideoQueue.PopFront());
+ }
+ }
+
+ // If we're out of video we still may be able to add more audio...
+ if (mEncodedVideoQueue.GetSize() == 0) {
+ while (mEncodedAudioQueue.GetSize() > 0) {
+ if (!mEncodedVideoQueue.AtEndOfStream() &&
+ mEncodedAudioQueue.PeekFront()->mTime > expectedNextVideoTime) {
+ // Video encoding is not complete and since the audio frame comes
+ // after our next video frame we cannot safely add it.
+ break;
+ }
+ frames.AppendElement(mEncodedAudioQueue.PopFront());
+ }
+ }
+
+ LOG(LogLevel::Debug,
+ "%p Muxed data, remaining-audio=%zu, remaining-video=%zu", this,
+ mEncodedAudioQueue.GetSize(), mEncodedVideoQueue.GetSize());
+
+ // If encoding is complete for both encoders we should signal end of stream,
+ // otherwise we keep going.
+ uint32_t flags =
+ mEncodedVideoQueue.AtEndOfStream() && mEncodedAudioQueue.AtEndOfStream()
+ ? ContainerWriter::END_OF_STREAM
+ : 0;
+ nsresult rv = mWriter->WriteEncodedTrack(frames, flags);
+ if (NS_FAILED(rv)) {
+ LOG(LogLevel::Error, "Error! Failed to write muxed data to the container");
+ }
+ return rv;
+}
+
+} // namespace mozilla
+
+#undef LOG
diff --git a/dom/media/encoder/Muxer.h b/dom/media/encoder/Muxer.h
new file mode 100644
index 0000000000..983e260230
--- /dev/null
+++ b/dom/media/encoder/Muxer.h
@@ -0,0 +1,71 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef DOM_MEDIA_ENCODER_MUXER_H_
+#define DOM_MEDIA_ENCODER_MUXER_H_
+
+#include "MediaQueue.h"
+#include "mozilla/media/MediaUtils.h"
+
+namespace mozilla {
+
+class ContainerWriter;
+class EncodedFrame;
+class TrackMetadataBase;
+
+// Generic Muxer class that helps pace the output from track encoders to the
+// ContainerWriter, so time never appears to go backwards.
+// Note that the entire class is written for single threaded access.
+class Muxer {
+ public:
+ Muxer(UniquePtr<ContainerWriter> aWriter,
+ MediaQueue<EncodedFrame>& aEncodedAudioQueue,
+ MediaQueue<EncodedFrame>& aEncodedVideoQueue);
+ ~Muxer() = default;
+
+ // Disconnects MediaQueues such that they will no longer be consumed.
+ // Idempotent.
+ void Disconnect();
+
+ // Returns true when all tracks have ended, and all data has been muxed and
+ // fetched.
+ bool IsFinished();
+
+ // Returns true if this muxer has not been given metadata yet.
+ bool NeedsMetadata() const { return !mMetadataSet; }
+
+ // Sets metadata for all tracks. This may only be called once.
+ nsresult SetMetadata(const nsTArray<RefPtr<TrackMetadataBase>>& aMetadata);
+
+ // Gets the data that has been muxed and written into the container so far.
+ nsresult GetData(nsTArray<nsTArray<uint8_t>>* aOutputBuffers);
+
+ private:
+ // Writes data in MediaQueues to the ContainerWriter.
+ nsresult Mux();
+
+ // Audio frames that have been encoded and are pending write to the muxer.
+ MediaQueue<EncodedFrame>& mEncodedAudioQueue;
+ // Video frames that have been encoded and are pending write to the muxer.
+ MediaQueue<EncodedFrame>& mEncodedVideoQueue;
+ // Listeners driving the muxing as encoded data gets produced.
+ MediaEventListener mAudioPushListener;
+ MediaEventListener mAudioFinishListener;
+ MediaEventListener mVideoPushListener;
+ MediaEventListener mVideoFinishListener;
+ // The writer for the specific container we're recording into.
+ UniquePtr<ContainerWriter> mWriter;
+ // True once metadata has been set in the muxer.
+ bool mMetadataSet = false;
+ // True once metadata has been written to file.
+ bool mMetadataEncoded = false;
+ // True if metadata is set and contains an audio track.
+ bool mHasAudio = false;
+ // True if metadata is set and contains a video track.
+ bool mHasVideo = false;
+};
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/encoder/OpusTrackEncoder.cpp b/dom/media/encoder/OpusTrackEncoder.cpp
new file mode 100644
index 0000000000..16b71d378e
--- /dev/null
+++ b/dom/media/encoder/OpusTrackEncoder.cpp
@@ -0,0 +1,454 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "OpusTrackEncoder.h"
+#include "nsString.h"
+#include "mozilla/CheckedInt.h"
+#include "mozilla/ProfilerLabels.h"
+#include "VideoUtils.h"
+
+#include <opus/opus.h>
+
+#define LOG(args, ...)
+
+namespace mozilla {
+
+// The Opus format supports up to 8 channels, and supports multitrack audio up
+// to 255 channels, but the current implementation supports only mono and
+// stereo, and downmixes any more than that.
+constexpr int MAX_SUPPORTED_AUDIO_CHANNELS = 8;
+
+// http://www.opus-codec.org/docs/html_api-1.0.2/group__opus__encoder.html
+// In section "opus_encoder_init", channels must be 1 or 2 of input signal.
+constexpr int MAX_CHANNELS = 2;
+
+// A maximum data bytes for Opus to encode.
+constexpr int MAX_DATA_BYTES = 4096;
+
+// http://tools.ietf.org/html/draft-ietf-codec-oggopus-00#section-4
+// Second paragraph, " The granule position of an audio data page is in units
+// of PCM audio samples at a fixed rate of 48 kHz."
+constexpr int kOpusSamplingRate = 48000;
+
+// The duration of an Opus frame, and it must be 2.5, 5, 10, 20, 40 or 60 ms.
+constexpr int kFrameDurationMs = 20;
+
+// The supported sampling rate of input signal (Hz),
+// must be one of the following. Will resampled to 48kHz otherwise.
+constexpr int kOpusSupportedInputSamplingRates[] = {8000, 12000, 16000, 24000,
+ 48000};
+
+namespace {
+
+// An endian-neutral serialization of integers. Serializing T in little endian
+// format to aOutput, where T is a 16 bits or 32 bits integer.
+template <typename T>
+static void SerializeToBuffer(T aValue, nsTArray<uint8_t>* aOutput) {
+ for (uint32_t i = 0; i < sizeof(T); i++) {
+ aOutput->AppendElement((uint8_t)(0x000000ff & (aValue >> (i * 8))));
+ }
+}
+
+static inline void SerializeToBuffer(const nsCString& aComment,
+ nsTArray<uint8_t>* aOutput) {
+ // Format of serializing a string to buffer is, the length of string (32 bits,
+ // little endian), and the string.
+ SerializeToBuffer((uint32_t)(aComment.Length()), aOutput);
+ aOutput->AppendElements(aComment.get(), aComment.Length());
+}
+
+static void SerializeOpusIdHeader(uint8_t aChannelCount, uint16_t aPreskip,
+ uint32_t aInputSampleRate,
+ nsTArray<uint8_t>* aOutput) {
+ // The magic signature, null terminator has to be stripped off from strings.
+ constexpr uint8_t magic[] = "OpusHead";
+ aOutput->AppendElements(magic, sizeof(magic) - 1);
+
+ // The version must always be 1 (8 bits, unsigned).
+ aOutput->AppendElement(1);
+
+ // Number of output channels (8 bits, unsigned).
+ aOutput->AppendElement(aChannelCount);
+
+ // Number of samples (at 48 kHz) to discard from the decoder output when
+ // starting playback (16 bits, unsigned, little endian).
+ SerializeToBuffer(aPreskip, aOutput);
+
+ // The sampling rate of input source (32 bits, unsigned, little endian).
+ SerializeToBuffer(aInputSampleRate, aOutput);
+
+ // Output gain, an encoder should set this field to zero (16 bits, signed,
+ // little endian).
+ SerializeToBuffer((int16_t)0, aOutput);
+
+ // Channel mapping family. Family 0 allows only 1 or 2 channels (8 bits,
+ // unsigned).
+ aOutput->AppendElement(0);
+}
+
+static void SerializeOpusCommentHeader(const nsCString& aVendor,
+ const nsTArray<nsCString>& aComments,
+ nsTArray<uint8_t>* aOutput) {
+ // The magic signature, null terminator has to be stripped off.
+ constexpr uint8_t magic[] = "OpusTags";
+ aOutput->AppendElements(magic, sizeof(magic) - 1);
+
+ // The vendor; Should append in the following order:
+ // vendor string length (32 bits, unsigned, little endian)
+ // vendor string.
+ SerializeToBuffer(aVendor, aOutput);
+
+ // Add comments; Should append in the following order:
+ // comment list length (32 bits, unsigned, little endian)
+ // comment #0 string length (32 bits, unsigned, little endian)
+ // comment #0 string
+ // comment #1 string length (32 bits, unsigned, little endian)
+ // comment #1 string ...
+ SerializeToBuffer((uint32_t)aComments.Length(), aOutput);
+ for (uint32_t i = 0; i < aComments.Length(); ++i) {
+ SerializeToBuffer(aComments[i], aOutput);
+ }
+}
+
+bool IsSampleRateSupported(TrackRate aSampleRate) {
+ // According to www.opus-codec.org, creating an opus encoder requires the
+ // sampling rate of source signal be one of 8000, 12000, 16000, 24000, or
+ // 48000. If this constraint is not satisfied, we resample the input to 48kHz.
+ AutoTArray<int, 5> supportedSamplingRates;
+ supportedSamplingRates.AppendElements(
+ kOpusSupportedInputSamplingRates,
+ ArrayLength(kOpusSupportedInputSamplingRates));
+ return supportedSamplingRates.Contains(aSampleRate);
+}
+
+} // Anonymous namespace.
+
+OpusTrackEncoder::OpusTrackEncoder(TrackRate aTrackRate,
+ MediaQueue<EncodedFrame>& aEncodedDataQueue)
+ : AudioTrackEncoder(aTrackRate, aEncodedDataQueue),
+ mOutputSampleRate(IsSampleRateSupported(aTrackRate) ? aTrackRate
+ : kOpusSamplingRate),
+ mEncoder(nullptr),
+ mLookahead(0),
+ mLookaheadWritten(0),
+ mResampler(nullptr),
+ mNumOutputFrames(0) {}
+
+OpusTrackEncoder::~OpusTrackEncoder() {
+ if (mEncoder) {
+ opus_encoder_destroy(mEncoder);
+ }
+ if (mResampler) {
+ speex_resampler_destroy(mResampler);
+ mResampler = nullptr;
+ }
+}
+
+nsresult OpusTrackEncoder::Init(int aChannels) {
+ NS_ENSURE_TRUE((aChannels <= MAX_SUPPORTED_AUDIO_CHANNELS) && (aChannels > 0),
+ NS_ERROR_FAILURE);
+
+ // This version of encoder API only support 1 or 2 channels,
+ // So set the mChannels less or equal 2 and
+ // let InterleaveTrackData downmix pcm data.
+ mChannels = aChannels > MAX_CHANNELS ? MAX_CHANNELS : aChannels;
+
+ // Reject non-audio sample rates.
+ NS_ENSURE_TRUE(mTrackRate >= 8000, NS_ERROR_INVALID_ARG);
+ NS_ENSURE_TRUE(mTrackRate <= 192000, NS_ERROR_INVALID_ARG);
+
+ if (NeedsResampler()) {
+ int error;
+ mResampler = speex_resampler_init(mChannels, mTrackRate, kOpusSamplingRate,
+ SPEEX_RESAMPLER_QUALITY_DEFAULT, &error);
+
+ if (error != RESAMPLER_ERR_SUCCESS) {
+ return NS_ERROR_FAILURE;
+ }
+ }
+
+ int error = 0;
+ mEncoder = opus_encoder_create(mOutputSampleRate, mChannels,
+ OPUS_APPLICATION_AUDIO, &error);
+
+ if (error != OPUS_OK) {
+ return NS_ERROR_FAILURE;
+ }
+
+ if (mAudioBitrate) {
+ int bps = static_cast<int>(
+ std::min<uint32_t>(mAudioBitrate, std::numeric_limits<int>::max()));
+ error = opus_encoder_ctl(mEncoder, OPUS_SET_BITRATE(bps));
+ if (error != OPUS_OK) {
+ return NS_ERROR_FAILURE;
+ }
+ }
+
+ // In the case of Opus we need to calculate the codec delay based on the
+ // pre-skip. For more information see:
+ // https://tools.ietf.org/html/rfc7845#section-4.2
+ error = opus_encoder_ctl(mEncoder, OPUS_GET_LOOKAHEAD(&mLookahead));
+ if (error != OPUS_OK) {
+ mLookahead = 0;
+ return NS_ERROR_FAILURE;
+ }
+
+ SetInitialized();
+
+ return NS_OK;
+}
+
+int OpusTrackEncoder::GetLookahead() const {
+ return mLookahead * kOpusSamplingRate / mOutputSampleRate;
+}
+
+int OpusTrackEncoder::NumInputFramesPerPacket() const {
+ return mTrackRate * kFrameDurationMs / 1000;
+}
+
+int OpusTrackEncoder::NumOutputFramesPerPacket() const {
+ return mOutputSampleRate * kFrameDurationMs / 1000;
+}
+
+bool OpusTrackEncoder::NeedsResampler() const {
+ // A resampler is needed when mTrackRate is not supported by the opus encoder.
+ // This is equivalent to !IsSampleRateSupported(mTrackRate) but less cycles.
+ return mTrackRate != mOutputSampleRate &&
+ mOutputSampleRate == kOpusSamplingRate;
+}
+
+already_AddRefed<TrackMetadataBase> OpusTrackEncoder::GetMetadata() {
+ AUTO_PROFILER_LABEL("OpusTrackEncoder::GetMetadata", OTHER);
+
+ MOZ_ASSERT(mInitialized);
+
+ if (!mInitialized) {
+ return nullptr;
+ }
+
+ RefPtr<OpusMetadata> meta = new OpusMetadata();
+ meta->mChannels = mChannels;
+ meta->mSamplingFrequency = mTrackRate;
+
+ // Ogg and Webm timestamps are always sampled at 48k for Opus.
+ SerializeOpusIdHeader(mChannels,
+ mLookahead * (kOpusSamplingRate / mOutputSampleRate),
+ mTrackRate, &meta->mIdHeader);
+
+ nsCString vendor;
+ vendor.AppendASCII(opus_get_version_string());
+
+ nsTArray<nsCString> comments;
+ comments.AppendElement(
+ nsLiteralCString("ENCODER=Mozilla" MOZ_APP_UA_VERSION));
+
+ SerializeOpusCommentHeader(vendor, comments, &meta->mCommentHeader);
+
+ return meta.forget();
+}
+
+nsresult OpusTrackEncoder::Encode(AudioSegment* aSegment) {
+ AUTO_PROFILER_LABEL("OpusTrackEncoder::Encode", OTHER);
+
+ MOZ_ASSERT(aSegment);
+ MOZ_ASSERT(mInitialized || mCanceled);
+
+ if (mCanceled || IsEncodingComplete()) {
+ return NS_ERROR_FAILURE;
+ }
+
+ if (!mInitialized) {
+ // calculation below depends on the truth that mInitialized is true.
+ return NS_ERROR_FAILURE;
+ }
+
+ int result = 0;
+ // Loop until we run out of packets of input data
+ while (result >= 0 && !IsEncodingComplete()) {
+ // re-sampled frames left last time which didn't fit into an Opus packet
+ // duration.
+ const int framesLeft = mResampledLeftover.Length() / mChannels;
+ MOZ_ASSERT(NumOutputFramesPerPacket() >= framesLeft);
+ // Fetch input frames such that there will be n frames where (n +
+ // framesLeft) >= NumOutputFramesPerPacket() after re-sampling.
+ const int framesToFetch = NumInputFramesPerPacket() -
+ (framesLeft * mTrackRate / kOpusSamplingRate) +
+ (NeedsResampler() ? 1 : 0);
+
+ if (!mEndOfStream && aSegment->GetDuration() < framesToFetch) {
+ // Not enough raw data
+ return NS_OK;
+ }
+
+ // Start encoding data.
+ AutoTArray<AudioDataValue, 9600> pcm;
+ pcm.SetLength(NumOutputFramesPerPacket() * mChannels);
+
+ int frameCopied = 0;
+
+ for (AudioSegment::ChunkIterator iter(*aSegment);
+ !iter.IsEnded() && frameCopied < framesToFetch; iter.Next()) {
+ AudioChunk chunk = *iter;
+
+ // Chunk to the required frame size.
+ TrackTime frameToCopy =
+ std::min(chunk.GetDuration(),
+ static_cast<TrackTime>(framesToFetch - frameCopied));
+
+ // Possible greatest value of framesToFetch = 3844: see
+ // https://bugzilla.mozilla.org/show_bug.cgi?id=1349421#c8. frameToCopy
+ // should not be able to exceed this value.
+ MOZ_ASSERT(frameToCopy <= 3844, "frameToCopy exceeded expected range");
+
+ if (!chunk.IsNull()) {
+ // Append the interleaved data to the end of pcm buffer.
+ AudioTrackEncoder::InterleaveTrackData(
+ chunk, frameToCopy, mChannels,
+ pcm.Elements() + frameCopied * mChannels);
+ } else {
+ CheckedInt<int> memsetLength =
+ CheckedInt<int>(frameToCopy) * mChannels * sizeof(AudioDataValue);
+ if (!memsetLength.isValid()) {
+ // This should never happen, but we use a defensive check because
+ // we really don't want a bad memset
+ MOZ_ASSERT_UNREACHABLE("memsetLength invalid!");
+ return NS_ERROR_FAILURE;
+ }
+ memset(pcm.Elements() + frameCopied * mChannels, 0,
+ memsetLength.value());
+ }
+
+ frameCopied += frameToCopy;
+ }
+
+ // Possible greatest value of framesToFetch = 3844: see
+ // https://bugzilla.mozilla.org/show_bug.cgi?id=1349421#c8. frameCopied
+ // should not be able to exceed this value.
+ MOZ_ASSERT(frameCopied <= 3844, "frameCopied exceeded expected range");
+
+ int framesInPCM = frameCopied;
+ if (mResampler) {
+ AutoTArray<AudioDataValue, 9600> resamplingDest;
+ uint32_t inframes = frameCopied;
+ uint32_t outframes = inframes * kOpusSamplingRate / mTrackRate + 1;
+
+ // We want to consume all the input data, so we slightly oversize the
+ // resampled data buffer so we can fit the output data in. We cannot
+ // really predict the output frame count at each call.
+ resamplingDest.SetLength(outframes * mChannels);
+
+#if MOZ_SAMPLE_TYPE_S16
+ short* in = reinterpret_cast<short*>(pcm.Elements());
+ short* out = reinterpret_cast<short*>(resamplingDest.Elements());
+ speex_resampler_process_interleaved_int(mResampler, in, &inframes, out,
+ &outframes);
+#else
+ float* in = reinterpret_cast<float*>(pcm.Elements());
+ float* out = reinterpret_cast<float*>(resamplingDest.Elements());
+ speex_resampler_process_interleaved_float(mResampler, in, &inframes, out,
+ &outframes);
+#endif
+
+ MOZ_ASSERT(pcm.Length() >= mResampledLeftover.Length());
+ PodCopy(pcm.Elements(), mResampledLeftover.Elements(),
+ mResampledLeftover.Length());
+
+ uint32_t outframesToCopy = std::min(
+ outframes,
+ static_cast<uint32_t>(NumOutputFramesPerPacket() - framesLeft));
+
+ MOZ_ASSERT(pcm.Length() - mResampledLeftover.Length() >=
+ outframesToCopy * mChannels);
+ PodCopy(pcm.Elements() + mResampledLeftover.Length(),
+ resamplingDest.Elements(), outframesToCopy * mChannels);
+ int frameLeftover = outframes - outframesToCopy;
+ mResampledLeftover.SetLength(frameLeftover * mChannels);
+ PodCopy(mResampledLeftover.Elements(),
+ resamplingDest.Elements() + outframesToCopy * mChannels,
+ mResampledLeftover.Length());
+ // This is always at 48000Hz.
+ framesInPCM = framesLeft + outframesToCopy;
+ }
+
+ // Remove the raw data which has been pulled to pcm buffer.
+ // The value of frameCopied should be equal to (or smaller than, if eos)
+ // NumOutputFramesPerPacket().
+ aSegment->RemoveLeading(frameCopied);
+
+ // Has reached the end of input stream and all queued data has pulled for
+ // encoding.
+ bool isFinalPacket = false;
+ if (aSegment->GetDuration() == 0 && mEndOfStream &&
+ framesInPCM < NumOutputFramesPerPacket()) {
+ // Pad |mLookahead| samples to the end of the track to prevent loss of
+ // original data.
+ const int toWrite = std::min(mLookahead - mLookaheadWritten,
+ NumOutputFramesPerPacket() - framesInPCM);
+ PodZero(pcm.Elements() + framesInPCM * mChannels, toWrite * mChannels);
+ mLookaheadWritten += toWrite;
+ framesInPCM += toWrite;
+ if (mLookaheadWritten == mLookahead) {
+ isFinalPacket = true;
+ }
+ }
+
+ MOZ_ASSERT_IF(!isFinalPacket, framesInPCM == NumOutputFramesPerPacket());
+
+ // Append null data to pcm buffer if the leftover data is not enough for
+ // opus encoder.
+ if (framesInPCM < NumOutputFramesPerPacket() && isFinalPacket) {
+ PodZero(pcm.Elements() + framesInPCM * mChannels,
+ (NumOutputFramesPerPacket() - framesInPCM) * mChannels);
+ }
+ auto frameData = MakeRefPtr<EncodedFrame::FrameData>();
+ // Encode the data with Opus Encoder.
+ frameData->SetLength(MAX_DATA_BYTES);
+ // result is returned as opus error code if it is negative.
+ result = 0;
+#ifdef MOZ_SAMPLE_TYPE_S16
+ const opus_int16* pcmBuf = static_cast<opus_int16*>(pcm.Elements());
+ result = opus_encode(mEncoder, pcmBuf, NumOutputFramesPerPacket(),
+ frameData->Elements(), MAX_DATA_BYTES);
+#else
+ const float* pcmBuf = static_cast<float*>(pcm.Elements());
+ result = opus_encode_float(mEncoder, pcmBuf, NumOutputFramesPerPacket(),
+ frameData->Elements(), MAX_DATA_BYTES);
+#endif
+ frameData->SetLength(result >= 0 ? result : 0);
+
+ if (result < 0) {
+ LOG("[Opus] Fail to encode data! Result: %s.", opus_strerror(result));
+ }
+ if (isFinalPacket) {
+ if (mResampler) {
+ speex_resampler_destroy(mResampler);
+ mResampler = nullptr;
+ }
+ mResampledLeftover.SetLength(0);
+ }
+
+ // timestamp should be the time of the first sample
+ mEncodedDataQueue.Push(MakeAndAddRef<EncodedFrame>(
+ media::TimeUnit(mNumOutputFrames + mLookahead, mOutputSampleRate),
+ static_cast<uint64_t>(framesInPCM) * kOpusSamplingRate /
+ mOutputSampleRate,
+ kOpusSamplingRate, EncodedFrame::OPUS_AUDIO_FRAME,
+ std::move(frameData)));
+
+ mNumOutputFrames += NumOutputFramesPerPacket();
+ LOG("[Opus] mOutputTimeStamp %.3f.",
+ media::TimeUnit(mNumOutputFrames, mOutputSampleRate).ToSeconds());
+
+ if (isFinalPacket) {
+ LOG("[Opus] Done encoding.");
+ mEncodedDataQueue.Finish();
+ }
+ }
+
+ return result >= 0 ? NS_OK : NS_ERROR_FAILURE;
+}
+
+} // namespace mozilla
+
+#undef LOG
diff --git a/dom/media/encoder/OpusTrackEncoder.h b/dom/media/encoder/OpusTrackEncoder.h
new file mode 100644
index 0000000000..5206944169
--- /dev/null
+++ b/dom/media/encoder/OpusTrackEncoder.h
@@ -0,0 +1,117 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef OpusTrackEncoder_h_
+#define OpusTrackEncoder_h_
+
+#include <stdint.h>
+#include <speex/speex_resampler.h>
+#include "TimeUnits.h"
+#include "TrackEncoder.h"
+
+struct OpusEncoder;
+
+namespace mozilla {
+
+// Opus meta data structure
+class OpusMetadata : public TrackMetadataBase {
+ public:
+ // The ID Header of OggOpus. refer to http://wiki.xiph.org/OggOpus.
+ nsTArray<uint8_t> mIdHeader;
+ // The Comment Header of OggOpus.
+ nsTArray<uint8_t> mCommentHeader;
+ int32_t mChannels;
+ float mSamplingFrequency;
+ MetadataKind GetKind() const override { return METADATA_OPUS; }
+};
+
+class OpusTrackEncoder : public AudioTrackEncoder {
+ public:
+ OpusTrackEncoder(TrackRate aTrackRate,
+ MediaQueue<EncodedFrame>& aEncodedDataQueue);
+ virtual ~OpusTrackEncoder();
+
+ already_AddRefed<TrackMetadataBase> GetMetadata() override;
+
+ /**
+ * The encoder lookahead at 48k rate.
+ */
+ int GetLookahead() const;
+
+ protected:
+ /**
+ * The number of frames, in the input rate mTrackRate, needed to fill an
+ * encoded opus packet. A frame is a sample per channel.
+ */
+ int NumInputFramesPerPacket() const override;
+
+ nsresult Init(int aChannels) override;
+
+ /**
+ * Encodes buffered data and pushes it to mEncodedDataQueue.
+ */
+ nsresult Encode(AudioSegment* aSegment) override;
+
+ /**
+ * The number of frames, in the output rate (see GetOutputSampleRate), needed
+ * to fill an encoded opus packet. A frame is a sample per channel.
+ */
+ int NumOutputFramesPerPacket() const;
+
+ /**
+ * True if the input needs to be resampled to be fed to the underlying opus
+ * encoder.
+ */
+ bool NeedsResampler() const;
+
+ public:
+ /**
+ * Get the samplerate of the data to be fed to the Opus encoder. This might be
+ * different from the input samplerate if resampling occurs.
+ */
+ const TrackRate mOutputSampleRate;
+
+ private:
+ /**
+ * The Opus encoder from libopus.
+ */
+ OpusEncoder* mEncoder;
+
+ /**
+ * Total samples of delay added by codec (in rate mOutputSampleRate), can
+ * be queried by the encoder. From the perspective of decoding, real data
+ * begins this many samples late, so the encoder needs to append this many
+ * null samples to the end of stream, in order to align the time of input and
+ * output.
+ */
+ int mLookahead;
+
+ /**
+ * Number of mLookahead samples that has been written. When non-zero and equal
+ * to mLookahead, encoding is complete.
+ */
+ int mLookaheadWritten;
+
+ /**
+ * If the input sample rate does not divide 48kHz evenly, the input data are
+ * resampled.
+ */
+ SpeexResamplerState* mResampler;
+
+ /**
+ * Store the resampled frames that don't fit into an Opus packet duration.
+ * They will be prepended to the resampled frames next encoding cycle.
+ */
+ nsTArray<AudioDataValue> mResampledLeftover;
+
+ /**
+ * Number of audio frames encoded, in kOpusSamplingRate.
+ */
+ uint64_t mNumOutputFrames;
+};
+
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/encoder/TrackEncoder.cpp b/dom/media/encoder/TrackEncoder.cpp
new file mode 100644
index 0000000000..8e03fd6fe3
--- /dev/null
+++ b/dom/media/encoder/TrackEncoder.cpp
@@ -0,0 +1,822 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "TrackEncoder.h"
+
+#include "AudioChannelFormat.h"
+#include "DriftCompensation.h"
+#include "MediaTrackGraph.h"
+#include "MediaTrackListener.h"
+#include "mozilla/AbstractThread.h"
+#include "mozilla/Logging.h"
+#include "mozilla/ProfilerLabels.h"
+#include "mozilla/RollingMean.h"
+#include "VideoUtils.h"
+#include "mozilla/Telemetry.h"
+
+namespace mozilla {
+
+LazyLogModule gTrackEncoderLog("TrackEncoder");
+#define TRACK_LOG(type, msg) MOZ_LOG(gTrackEncoderLog, type, msg)
+
+constexpr int DEFAULT_CHANNELS = 1;
+constexpr int DEFAULT_FRAME_WIDTH = 640;
+constexpr int DEFAULT_FRAME_HEIGHT = 480;
+constexpr int DEFAULT_FRAME_RATE = 30;
+// 10 second threshold if the audio encoder cannot be initialized.
+constexpr int AUDIO_INIT_FAILED_DURATION = 10;
+// 30 second threshold if the video encoder cannot be initialized.
+constexpr int VIDEO_INIT_FAILED_DURATION = 30;
+constexpr int FRAMERATE_DETECTION_ROLLING_WINDOW = 3;
+constexpr size_t FRAMERATE_DETECTION_MIN_CHUNKS = 5;
+constexpr int FRAMERATE_DETECTION_MAX_DURATION_S = 6;
+
+TrackEncoder::TrackEncoder(TrackRate aTrackRate,
+ MediaQueue<EncodedFrame>& aEncodedDataQueue)
+ : mInitialized(false),
+ mStarted(false),
+ mEndOfStream(false),
+ mCanceled(false),
+ mInitCounter(0),
+ mSuspended(false),
+ mTrackRate(aTrackRate),
+ mEncodedDataQueue(aEncodedDataQueue) {}
+
+bool TrackEncoder::IsInitialized() {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+ return mInitialized;
+}
+
+bool TrackEncoder::IsStarted() {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+ return mStarted;
+}
+
+bool TrackEncoder::IsEncodingComplete() const {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+ return mEncodedDataQueue.IsFinished();
+}
+
+void TrackEncoder::SetInitialized() {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+
+ if (mInitialized) {
+ return;
+ }
+
+ mInitialized = true;
+
+ for (auto& l : mListeners.Clone()) {
+ l->Initialized(this);
+ }
+}
+
+void TrackEncoder::SetStarted() {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+
+ if (mStarted) {
+ return;
+ }
+
+ mStarted = true;
+
+ for (auto& l : mListeners.Clone()) {
+ l->Started(this);
+ }
+}
+
+void TrackEncoder::OnError() {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+
+ Cancel();
+
+ for (auto& l : mListeners.Clone()) {
+ l->Error(this);
+ }
+}
+
+void TrackEncoder::RegisterListener(TrackEncoderListener* aListener) {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+ MOZ_ASSERT(!mListeners.Contains(aListener));
+ mListeners.AppendElement(aListener);
+}
+
+bool TrackEncoder::UnregisterListener(TrackEncoderListener* aListener) {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+ return mListeners.RemoveElement(aListener);
+}
+
+void TrackEncoder::SetWorkerThread(AbstractThread* aWorkerThread) {
+ mWorkerThread = aWorkerThread;
+}
+
+void AudioTrackEncoder::Suspend() {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+ TRACK_LOG(LogLevel::Info, ("[AudioTrackEncoder %p]: Suspend(), was %s", this,
+ mSuspended ? "suspended" : "live"));
+
+ if (mSuspended) {
+ return;
+ }
+
+ mSuspended = true;
+}
+
+void AudioTrackEncoder::Resume() {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+ TRACK_LOG(LogLevel::Info, ("[AudioTrackEncoder %p]: Resume(), was %s", this,
+ mSuspended ? "suspended" : "live"));
+
+ if (!mSuspended) {
+ return;
+ }
+
+ mSuspended = false;
+}
+
+void AudioTrackEncoder::AppendAudioSegment(AudioSegment&& aSegment) {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+ AUTO_PROFILER_LABEL("AudioTrackEncoder::AppendAudioSegment", OTHER);
+ TRACK_LOG(LogLevel::Verbose,
+ ("[AudioTrackEncoder %p]: AppendAudioSegment() duration=%" PRIu64,
+ this, aSegment.GetDuration()));
+
+ if (mCanceled) {
+ return;
+ }
+
+ if (mEndOfStream) {
+ return;
+ }
+
+ TryInit(mOutgoingBuffer, aSegment.GetDuration());
+
+ if (mSuspended) {
+ return;
+ }
+
+ SetStarted();
+ mOutgoingBuffer.AppendFrom(&aSegment);
+
+ if (!mInitialized) {
+ return;
+ }
+
+ if (NS_FAILED(Encode(&mOutgoingBuffer))) {
+ OnError();
+ return;
+ }
+
+ MOZ_ASSERT_IF(IsEncodingComplete(), mOutgoingBuffer.IsEmpty());
+}
+
+void AudioTrackEncoder::TryInit(const AudioSegment& aSegment,
+ TrackTime aDuration) {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+
+ if (mInitialized) {
+ return;
+ }
+
+ mInitCounter++;
+ TRACK_LOG(LogLevel::Debug,
+ ("[AudioTrackEncoder %p]: Inited the audio encoder %d times", this,
+ mInitCounter));
+
+ for (AudioSegment::ConstChunkIterator iter(aSegment); !iter.IsEnded();
+ iter.Next()) {
+ // The number of channels is determined by the first non-null chunk, and
+ // thus the audio encoder is initialized at this time.
+ if (iter->IsNull()) {
+ continue;
+ }
+
+ nsresult rv = Init(iter->mChannelData.Length());
+
+ if (NS_SUCCEEDED(rv)) {
+ TRACK_LOG(LogLevel::Info,
+ ("[AudioTrackEncoder %p]: Successfully initialized!", this));
+ return;
+ } else {
+ TRACK_LOG(
+ LogLevel::Error,
+ ("[AudioTrackEncoder %p]: Failed to initialize the encoder!", this));
+ OnError();
+ return;
+ }
+ break;
+ }
+
+ mNotInitDuration += aDuration;
+ if (!mInitialized &&
+ ((mNotInitDuration - 1) / mTrackRate >= AUDIO_INIT_FAILED_DURATION) &&
+ mInitCounter > 1) {
+ // Perform a best effort initialization since we haven't gotten any
+ // data yet. Motivated by issues like Bug 1336367
+ TRACK_LOG(LogLevel::Warning,
+ ("[AudioTrackEncoder]: Initialize failed for %ds. Attempting to "
+ "init with %d (default) channels!",
+ AUDIO_INIT_FAILED_DURATION, DEFAULT_CHANNELS));
+ nsresult rv = Init(DEFAULT_CHANNELS);
+ if (NS_FAILED(rv)) {
+ TRACK_LOG(LogLevel::Error,
+ ("[AudioTrackEncoder %p]: Default-channel-init failed.", this));
+ OnError();
+ return;
+ }
+ }
+}
+
+void AudioTrackEncoder::Cancel() {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+ TRACK_LOG(LogLevel::Info, ("[AudioTrackEncoder %p]: Cancel()", this));
+ mCanceled = true;
+ mEndOfStream = true;
+ mOutgoingBuffer.Clear();
+ mEncodedDataQueue.Finish();
+}
+
+void AudioTrackEncoder::NotifyEndOfStream() {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+ TRACK_LOG(LogLevel::Info,
+ ("[AudioTrackEncoder %p]: NotifyEndOfStream()", this));
+
+ if (!mCanceled && !mInitialized) {
+ // If source audio track is completely silent till the end of encoding,
+ // initialize the encoder with a default channel count.
+ Init(DEFAULT_CHANNELS);
+ }
+
+ if (mEndOfStream) {
+ return;
+ }
+
+ mEndOfStream = true;
+
+ if (NS_FAILED(Encode(&mOutgoingBuffer))) {
+ mOutgoingBuffer.Clear();
+ OnError();
+ }
+
+ MOZ_ASSERT(mOutgoingBuffer.GetDuration() == 0);
+}
+
+/*static*/
+void AudioTrackEncoder::InterleaveTrackData(AudioChunk& aChunk,
+ int32_t aDuration,
+ uint32_t aOutputChannels,
+ AudioDataValue* aOutput) {
+ uint32_t numChannelsToCopy = std::min(
+ aOutputChannels, static_cast<uint32_t>(aChunk.mChannelData.Length()));
+ switch (aChunk.mBufferFormat) {
+ case AUDIO_FORMAT_S16: {
+ AutoTArray<const int16_t*, 2> array;
+ array.SetLength(numChannelsToCopy);
+ for (uint32_t i = 0; i < array.Length(); i++) {
+ array[i] = static_cast<const int16_t*>(aChunk.mChannelData[i]);
+ }
+ InterleaveTrackData(array, aDuration, aOutputChannels, aOutput,
+ aChunk.mVolume);
+ break;
+ }
+ case AUDIO_FORMAT_FLOAT32: {
+ AutoTArray<const float*, 2> array;
+ array.SetLength(numChannelsToCopy);
+ for (uint32_t i = 0; i < array.Length(); i++) {
+ array[i] = static_cast<const float*>(aChunk.mChannelData[i]);
+ }
+ InterleaveTrackData(array, aDuration, aOutputChannels, aOutput,
+ aChunk.mVolume);
+ break;
+ }
+ case AUDIO_FORMAT_SILENCE: {
+ MOZ_ASSERT(false, "To implement.");
+ }
+ };
+}
+
+/*static*/
+void AudioTrackEncoder::DeInterleaveTrackData(AudioDataValue* aInput,
+ int32_t aDuration,
+ int32_t aChannels,
+ AudioDataValue* aOutput) {
+ for (int32_t i = 0; i < aChannels; ++i) {
+ for (int32_t j = 0; j < aDuration; ++j) {
+ aOutput[i * aDuration + j] = aInput[i + j * aChannels];
+ }
+ }
+}
+
+size_t AudioTrackEncoder::SizeOfExcludingThis(
+ mozilla::MallocSizeOf aMallocSizeOf) {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+ return mOutgoingBuffer.SizeOfExcludingThis(aMallocSizeOf);
+}
+
+VideoTrackEncoder::VideoTrackEncoder(
+ RefPtr<DriftCompensator> aDriftCompensator, TrackRate aTrackRate,
+ MediaQueue<EncodedFrame>& aEncodedDataQueue,
+ FrameDroppingMode aFrameDroppingMode)
+ : TrackEncoder(aTrackRate, aEncodedDataQueue),
+ mDriftCompensator(std::move(aDriftCompensator)),
+ mEncodedTicks(0),
+ mVideoBitrate(0),
+ mFrameDroppingMode(aFrameDroppingMode),
+ mEnabled(true) {
+ mLastChunk.mDuration = 0;
+}
+
+void VideoTrackEncoder::Suspend(const TimeStamp& aTime) {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+ TRACK_LOG(LogLevel::Info,
+ ("[VideoTrackEncoder %p]: Suspend() at %.3fs, was %s", this,
+ mStartTime.IsNull() ? 0.0 : (aTime - mStartTime).ToSeconds(),
+ mSuspended ? "suspended" : "live"));
+
+ if (mSuspended) {
+ return;
+ }
+
+ mSuspended = true;
+ mSuspendTime = aTime;
+}
+
+void VideoTrackEncoder::Resume(const TimeStamp& aTime) {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+
+ if (!mSuspended) {
+ return;
+ }
+
+ TRACK_LOG(
+ LogLevel::Info,
+ ("[VideoTrackEncoder %p]: Resume() after %.3fs, was %s", this,
+ (aTime - mSuspendTime).ToSeconds(), mSuspended ? "suspended" : "live"));
+
+ mSuspended = false;
+
+ TimeDuration suspendDuration = aTime - mSuspendTime;
+ if (!mLastChunk.mTimeStamp.IsNull()) {
+ VideoChunk* nextChunk = mIncomingBuffer.FindChunkContaining(aTime);
+ MOZ_ASSERT_IF(nextChunk, nextChunk->mTimeStamp <= aTime);
+ if (nextChunk) {
+ nextChunk->mTimeStamp = aTime;
+ }
+ mLastChunk.mTimeStamp += suspendDuration;
+ }
+ if (!mStartTime.IsNull()) {
+ mStartTime += suspendDuration;
+ }
+
+ mSuspendTime = TimeStamp();
+}
+
+void VideoTrackEncoder::Disable(const TimeStamp& aTime) {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+ TRACK_LOG(LogLevel::Debug, ("[VideoTrackEncoder %p]: Disable()", this));
+
+ if (mStartTime.IsNull()) {
+ // We haven't started yet. No need to touch future frames.
+ mEnabled = false;
+ return;
+ }
+
+ // Advancing currentTime to process any frames in mIncomingBuffer between
+ // mCurrentTime and aTime.
+ AdvanceCurrentTime(aTime);
+ if (!mLastChunk.mTimeStamp.IsNull()) {
+ // Insert a black frame at t=aTime into mIncomingBuffer, to trigger the
+ // shift to black at the right moment.
+ VideoSegment tempSegment;
+ tempSegment.AppendFrom(&mIncomingBuffer);
+ mIncomingBuffer.AppendFrame(do_AddRef(mLastChunk.mFrame.GetImage()),
+ mLastChunk.mFrame.GetIntrinsicSize(),
+ mLastChunk.mFrame.GetPrincipalHandle(), true,
+ aTime);
+ mIncomingBuffer.AppendFrom(&tempSegment);
+ }
+ mEnabled = false;
+}
+
+void VideoTrackEncoder::Enable(const TimeStamp& aTime) {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+ TRACK_LOG(LogLevel::Debug, ("[VideoTrackEncoder %p]: Enable()", this));
+
+ if (mStartTime.IsNull()) {
+ // We haven't started yet. No need to touch future frames.
+ mEnabled = true;
+ return;
+ }
+
+ // Advancing currentTime to process any frames in mIncomingBuffer between
+ // mCurrentTime and aTime.
+ AdvanceCurrentTime(aTime);
+ if (!mLastChunk.mTimeStamp.IsNull()) {
+ // Insert a real frame at t=aTime into mIncomingBuffer, to trigger the
+ // shift from black at the right moment.
+ VideoSegment tempSegment;
+ tempSegment.AppendFrom(&mIncomingBuffer);
+ mIncomingBuffer.AppendFrame(do_AddRef(mLastChunk.mFrame.GetImage()),
+ mLastChunk.mFrame.GetIntrinsicSize(),
+ mLastChunk.mFrame.GetPrincipalHandle(),
+ mLastChunk.mFrame.GetForceBlack(), aTime);
+ mIncomingBuffer.AppendFrom(&tempSegment);
+ }
+ mEnabled = true;
+}
+
+void VideoTrackEncoder::AppendVideoSegment(VideoSegment&& aSegment) {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+ TRACK_LOG(LogLevel::Verbose,
+ ("[VideoTrackEncoder %p]: AppendVideoSegment()", this));
+
+ if (mCanceled) {
+ return;
+ }
+
+ if (mEndOfStream) {
+ return;
+ }
+
+ for (VideoSegment::ConstChunkIterator iter(aSegment); !iter.IsEnded();
+ iter.Next()) {
+ if (iter->IsNull()) {
+ // A null image was sent. This is a signal from the source that we should
+ // clear any images buffered in the future.
+ mIncomingBuffer.Clear();
+ continue; // Don't append iter, as it is null.
+ }
+ if (VideoChunk* c = mIncomingBuffer.GetLastChunk()) {
+ if (iter->mTimeStamp < c->mTimeStamp) {
+ // Time went backwards. This can happen when a MediaDecoder seeks.
+ // We need to handle this by removing any frames buffered in the future
+ // and start over at iter->mTimeStamp.
+ mIncomingBuffer.Clear();
+ }
+ }
+ SetStarted();
+ mIncomingBuffer.AppendFrame(do_AddRef(iter->mFrame.GetImage()),
+ iter->mFrame.GetIntrinsicSize(),
+ iter->mFrame.GetPrincipalHandle(),
+ iter->mFrame.GetForceBlack(), iter->mTimeStamp);
+ }
+ aSegment.Clear();
+}
+
+void VideoTrackEncoder::Init(const VideoSegment& aSegment,
+ const TimeStamp& aTime,
+ size_t aFrameRateDetectionMinChunks) {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+ MOZ_ASSERT(!aTime.IsNull());
+
+ if (mInitialized) {
+ return;
+ }
+
+ mInitCounter++;
+ TRACK_LOG(LogLevel::Debug,
+ ("[VideoTrackEncoder %p]: Init the video encoder %d times", this,
+ mInitCounter));
+
+ Maybe<float> framerate;
+ if (!aSegment.IsEmpty()) {
+ // The number of whole frames, i.e., with known duration.
+ size_t frameCount = 0;
+ RollingMean<TimeDuration, TimeDuration> meanDuration(
+ FRAMERATE_DETECTION_ROLLING_WINDOW);
+ VideoSegment::ConstChunkIterator iter(aSegment);
+ TimeStamp previousChunkTime = iter->mTimeStamp;
+ iter.Next();
+ for (; !iter.IsEnded(); iter.Next(), ++frameCount) {
+ meanDuration.insert(iter->mTimeStamp - previousChunkTime);
+ previousChunkTime = iter->mTimeStamp;
+ }
+ TRACK_LOG(LogLevel::Debug, ("[VideoTrackEncoder %p]: Init() frameCount=%zu",
+ this, frameCount));
+ if (frameCount >= aFrameRateDetectionMinChunks) {
+ if (meanDuration.empty()) {
+ // No whole frames available, use aTime as end time.
+ framerate = Some(1.0f / (aTime - mStartTime).ToSeconds());
+ } else {
+ // We want some frames for estimating the framerate.
+ framerate = Some(1.0f / meanDuration.mean().ToSeconds());
+ }
+ } else if ((aTime - mStartTime).ToSeconds() >
+ FRAMERATE_DETECTION_MAX_DURATION_S) {
+ // Instead of failing init after the fail-timeout, we fallback to a very
+ // low rate.
+ framerate = Some(static_cast<float>(frameCount) /
+ (aTime - mStartTime).ToSeconds());
+ }
+ }
+
+ if (framerate) {
+ for (VideoSegment::ConstChunkIterator iter(aSegment); !iter.IsEnded();
+ iter.Next()) {
+ if (iter->IsNull()) {
+ continue;
+ }
+
+ gfx::IntSize imgsize = iter->mFrame.GetImage()->GetSize();
+ gfx::IntSize intrinsicSize = iter->mFrame.GetIntrinsicSize();
+ nsresult rv = Init(imgsize.width, imgsize.height, intrinsicSize.width,
+ intrinsicSize.height, *framerate);
+
+ if (NS_SUCCEEDED(rv)) {
+ TRACK_LOG(LogLevel::Info,
+ ("[VideoTrackEncoder %p]: Successfully initialized!", this));
+ return;
+ }
+
+ TRACK_LOG(
+ LogLevel::Error,
+ ("[VideoTrackEncoder %p]: Failed to initialize the encoder!", this));
+ OnError();
+ break;
+ }
+ }
+
+ if (((aTime - mStartTime).ToSeconds() > VIDEO_INIT_FAILED_DURATION) &&
+ mInitCounter > 1) {
+ TRACK_LOG(LogLevel::Warning,
+ ("[VideoTrackEncoder %p]: No successful init for %ds.", this,
+ VIDEO_INIT_FAILED_DURATION));
+ OnError();
+ return;
+ }
+}
+
+void VideoTrackEncoder::Cancel() {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+ TRACK_LOG(LogLevel::Info, ("[VideoTrackEncoder %p]: Cancel()", this));
+ mCanceled = true;
+ mEndOfStream = true;
+ mIncomingBuffer.Clear();
+ mOutgoingBuffer.Clear();
+ mLastChunk.SetNull(0);
+ mEncodedDataQueue.Finish();
+}
+
+void VideoTrackEncoder::NotifyEndOfStream() {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+
+ if (mCanceled) {
+ return;
+ }
+
+ if (mEndOfStream) {
+ // We have already been notified.
+ return;
+ }
+
+ mEndOfStream = true;
+ TRACK_LOG(LogLevel::Info,
+ ("[VideoTrackEncoder %p]: NotifyEndOfStream()", this));
+
+ if (!mLastChunk.IsNull()) {
+ RefPtr<layers::Image> lastImage = mLastChunk.mFrame.GetImage();
+ const TimeStamp now = TimeStamp::Now();
+ TimeStamp currentTime = mSuspended ? mSuspendTime : mCurrentTime;
+ currentTime = mDriftCompensator->GetVideoTime(now, currentTime);
+ TimeDuration absoluteEndTime = currentTime - mStartTime;
+ CheckedInt64 duration =
+ UsecsToFrames(absoluteEndTime.ToMicroseconds(), mTrackRate) -
+ mEncodedTicks;
+ if (duration.isValid() && duration.value() > 0) {
+ mEncodedTicks += duration.value();
+ TRACK_LOG(LogLevel::Debug,
+ ("[VideoTrackEncoder %p]: Appending last video frame %p at pos "
+ "%.3fs, "
+ "track-end=%.3fs",
+ this, lastImage.get(),
+ (mLastChunk.mTimeStamp - mStartTime).ToSeconds(),
+ absoluteEndTime.ToSeconds()));
+ mOutgoingBuffer.AppendFrame(
+ lastImage.forget(), mLastChunk.mFrame.GetIntrinsicSize(),
+ PRINCIPAL_HANDLE_NONE, mLastChunk.mFrame.GetForceBlack() || !mEnabled,
+ mLastChunk.mTimeStamp);
+ mOutgoingBuffer.ExtendLastFrameBy(duration.value());
+ }
+
+ if (!mInitialized) {
+ // Try to init without waiting for an accurate framerate.
+ Init(mOutgoingBuffer, currentTime, 0);
+ }
+ }
+
+ if (mCanceled) {
+ // Previous Init failed and we got canceled. Nothing to do here.
+ return;
+ }
+
+ mIncomingBuffer.Clear();
+ mLastChunk.SetNull(0);
+
+ if (NS_WARN_IF(!mInitialized)) {
+ // Still not initialized. There was probably no real frame at all, perhaps
+ // by muting. Initialize the encoder with default frame width, frame
+ // height, and frame rate.
+ Init(DEFAULT_FRAME_WIDTH, DEFAULT_FRAME_HEIGHT, DEFAULT_FRAME_WIDTH,
+ DEFAULT_FRAME_HEIGHT, DEFAULT_FRAME_RATE);
+ }
+
+ if (NS_FAILED(Encode(&mOutgoingBuffer))) {
+ OnError();
+ }
+
+ MOZ_ASSERT(mOutgoingBuffer.IsEmpty());
+}
+
+void VideoTrackEncoder::SetStartOffset(const TimeStamp& aStartOffset) {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+ MOZ_ASSERT(mCurrentTime.IsNull());
+ TRACK_LOG(LogLevel::Info, ("[VideoTrackEncoder %p]: SetStartOffset()", this));
+ mStartTime = aStartOffset;
+ mCurrentTime = aStartOffset;
+}
+
+void VideoTrackEncoder::AdvanceCurrentTime(const TimeStamp& aTime) {
+ AUTO_PROFILER_LABEL("VideoTrackEncoder::AdvanceCurrentTime", OTHER);
+
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+ MOZ_ASSERT(!mStartTime.IsNull());
+ MOZ_ASSERT(!mCurrentTime.IsNull());
+
+ if (mCanceled) {
+ return;
+ }
+
+ if (mEndOfStream) {
+ return;
+ }
+
+ if (mSuspended) {
+ TRACK_LOG(
+ LogLevel::Verbose,
+ ("[VideoTrackEncoder %p]: AdvanceCurrentTime() suspended at %.3fs",
+ this, (mCurrentTime - mStartTime).ToSeconds()));
+ mCurrentTime = aTime;
+ mIncomingBuffer.ForgetUpToTime(mCurrentTime);
+ return;
+ }
+
+ TRACK_LOG(LogLevel::Verbose,
+ ("[VideoTrackEncoder %p]: AdvanceCurrentTime() to %.3fs", this,
+ (aTime - mStartTime).ToSeconds()));
+
+ // Grab frames within the currentTime range from the incoming buffer.
+ VideoSegment tempSegment;
+ {
+ VideoChunk* previousChunk = &mLastChunk;
+ auto appendDupes = [&](const TimeStamp& aUpTo) {
+ while ((aUpTo - previousChunk->mTimeStamp).ToSeconds() > 1.0) {
+ // We encode at least one frame per second, even if there are none
+ // flowing.
+ previousChunk->mTimeStamp += TimeDuration::FromSeconds(1.0);
+ tempSegment.AppendFrame(
+ do_AddRef(previousChunk->mFrame.GetImage()),
+ previousChunk->mFrame.GetIntrinsicSize(),
+ previousChunk->mFrame.GetPrincipalHandle(),
+ previousChunk->mFrame.GetForceBlack() || !mEnabled,
+ previousChunk->mTimeStamp);
+ TRACK_LOG(
+ LogLevel::Verbose,
+ ("[VideoTrackEncoder %p]: Duplicating video frame (%p) at pos %.3f",
+ this, previousChunk->mFrame.GetImage(),
+ (previousChunk->mTimeStamp - mStartTime).ToSeconds()));
+ }
+ };
+ for (VideoSegment::ChunkIterator iter(mIncomingBuffer); !iter.IsEnded();
+ iter.Next()) {
+ MOZ_ASSERT(!iter->IsNull());
+ if (!previousChunk->IsNull() &&
+ iter->mTimeStamp <= previousChunk->mTimeStamp) {
+ // This frame starts earlier than previousChunk. Skip.
+ continue;
+ }
+ if (iter->mTimeStamp >= aTime) {
+ // This frame starts in the future. Stop.
+ break;
+ }
+ if (!previousChunk->IsNull()) {
+ appendDupes(iter->mTimeStamp);
+ }
+ tempSegment.AppendFrame(
+ do_AddRef(iter->mFrame.GetImage()), iter->mFrame.GetIntrinsicSize(),
+ iter->mFrame.GetPrincipalHandle(),
+ iter->mFrame.GetForceBlack() || !mEnabled, iter->mTimeStamp);
+ TRACK_LOG(LogLevel::Verbose,
+ ("[VideoTrackEncoder %p]: Taking video frame (%p) at pos %.3f",
+ this, iter->mFrame.GetImage(),
+ (iter->mTimeStamp - mStartTime).ToSeconds()));
+ previousChunk = &*iter;
+ }
+ if (!previousChunk->IsNull()) {
+ appendDupes(aTime);
+ }
+ }
+ mCurrentTime = aTime;
+ mIncomingBuffer.ForgetUpToTime(mCurrentTime);
+
+ // Convert tempSegment timestamps to durations and add chunks with known
+ // duration to mOutgoingBuffer.
+ const TimeStamp now = TimeStamp::Now();
+ for (VideoSegment::ConstChunkIterator iter(tempSegment); !iter.IsEnded();
+ iter.Next()) {
+ VideoChunk chunk = *iter;
+
+ if (mLastChunk.mTimeStamp.IsNull()) {
+ // This is the first real chunk in the track. Make it start at the
+ // beginning of the track.
+ MOZ_ASSERT(!iter->mTimeStamp.IsNull());
+
+ TRACK_LOG(
+ LogLevel::Verbose,
+ ("[VideoTrackEncoder %p]: Got the first video frame (%p) at pos %.3f "
+ "(moving it to beginning)",
+ this, iter->mFrame.GetImage(),
+ (iter->mTimeStamp - mStartTime).ToSeconds()));
+
+ mLastChunk = *iter;
+ mLastChunk.mTimeStamp = mStartTime;
+ continue;
+ }
+
+ MOZ_ASSERT(!mLastChunk.IsNull());
+ MOZ_ASSERT(!chunk.IsNull());
+
+ TimeDuration absoluteEndTime =
+ mDriftCompensator->GetVideoTime(now, chunk.mTimeStamp) - mStartTime;
+ TRACK_LOG(LogLevel::Verbose,
+ ("[VideoTrackEncoder %p]: Appending video frame %p, at pos %.3fs "
+ "until %.3fs",
+ this, mLastChunk.mFrame.GetImage(),
+ (mDriftCompensator->GetVideoTime(now, mLastChunk.mTimeStamp) -
+ mStartTime)
+ .ToSeconds(),
+ absoluteEndTime.ToSeconds()));
+ CheckedInt64 duration =
+ UsecsToFrames(absoluteEndTime.ToMicroseconds(), mTrackRate) -
+ mEncodedTicks;
+ if (!duration.isValid()) {
+ NS_ERROR("Duration overflow");
+ return;
+ }
+
+ if (duration.value() <= 0) {
+ // A frame either started before the last frame (can happen when
+ // multiple frames are added before SetStartOffset), or
+ // two frames were so close together that they ended up at the same
+ // position. We handle both cases by ignoring the previous frame.
+
+ TRACK_LOG(LogLevel::Verbose,
+ ("[VideoTrackEncoder %p]: Duration from frame %p to frame %p "
+ "is %" PRId64 ". Ignoring %p",
+ this, mLastChunk.mFrame.GetImage(), iter->mFrame.GetImage(),
+ duration.value(), mLastChunk.mFrame.GetImage()));
+
+ TimeStamp t = mLastChunk.mTimeStamp;
+ mLastChunk = *iter;
+ mLastChunk.mTimeStamp = t;
+ continue;
+ }
+
+ mEncodedTicks += duration.value();
+ mOutgoingBuffer.AppendFrame(
+ do_AddRef(mLastChunk.mFrame.GetImage()),
+ mLastChunk.mFrame.GetIntrinsicSize(), PRINCIPAL_HANDLE_NONE,
+ mLastChunk.mFrame.GetForceBlack() || !mEnabled, mLastChunk.mTimeStamp);
+ mOutgoingBuffer.ExtendLastFrameBy(duration.value());
+ mLastChunk = chunk;
+ }
+
+ if (mOutgoingBuffer.IsEmpty()) {
+ return;
+ }
+
+ Init(mOutgoingBuffer, mCurrentTime, FRAMERATE_DETECTION_MIN_CHUNKS);
+
+ if (!mInitialized) {
+ return;
+ }
+
+ if (NS_FAILED(Encode(&mOutgoingBuffer))) {
+ OnError();
+ return;
+ }
+
+ MOZ_ASSERT(mOutgoingBuffer.IsEmpty());
+}
+
+size_t VideoTrackEncoder::SizeOfExcludingThis(
+ mozilla::MallocSizeOf aMallocSizeOf) {
+ MOZ_ASSERT(!mWorkerThread || mWorkerThread->IsCurrentThreadIn());
+ return mIncomingBuffer.SizeOfExcludingThis(aMallocSizeOf) +
+ mOutgoingBuffer.SizeOfExcludingThis(aMallocSizeOf);
+}
+
+} // namespace mozilla
+
+#undef TRACK_LOG
diff --git a/dom/media/encoder/TrackEncoder.h b/dom/media/encoder/TrackEncoder.h
new file mode 100644
index 0000000000..879949874f
--- /dev/null
+++ b/dom/media/encoder/TrackEncoder.h
@@ -0,0 +1,501 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef TrackEncoder_h_
+#define TrackEncoder_h_
+
+#include "AudioSegment.h"
+#include "EncodedFrame.h"
+#include "MediaQueue.h"
+#include "MediaTrackGraph.h"
+#include "TrackMetadataBase.h"
+#include "VideoSegment.h"
+
+namespace mozilla {
+
+class AbstractThread;
+class DriftCompensator;
+class TrackEncoder;
+
+class TrackEncoderListener {
+ public:
+ NS_INLINE_DECL_THREADSAFE_REFCOUNTING(TrackEncoderListener)
+
+ /**
+ * Called when the TrackEncoder has received its first real data.
+ */
+ virtual void Started(TrackEncoder* aEncoder) = 0;
+
+ /**
+ * Called when the TrackEncoder's underlying encoder has been successfully
+ * initialized and there's non-null data ready to be encoded.
+ */
+ virtual void Initialized(TrackEncoder* aEncoder) = 0;
+
+ /**
+ * Called after the TrackEncoder hit an unexpected error, causing it to
+ * abort operation.
+ */
+ virtual void Error(TrackEncoder* aEncoder) = 0;
+
+ protected:
+ virtual ~TrackEncoderListener() = default;
+};
+
+/**
+ * Base class of AudioTrackEncoder and VideoTrackEncoder. Lifetime managed by
+ * MediaEncoder. All methods are to be called only on the worker thread.
+ *
+ * The control APIs are all called by MediaEncoder on its dedicated thread. Data
+ * is encoded as soon as it has been appended (and time has advanced past its
+ * end in case of video) and pushed to mEncodedDataQueue.
+ */
+class TrackEncoder {
+ public:
+ TrackEncoder(TrackRate aTrackRate,
+ MediaQueue<EncodedFrame>& aEncodedDataQueue);
+
+ /**
+ * Called by MediaEncoder to cancel the encoding.
+ */
+ virtual void Cancel() = 0;
+
+ /**
+ * Notifies us that we have reached the end of the stream and no more data
+ * will be appended.
+ */
+ virtual void NotifyEndOfStream() = 0;
+
+ /**
+ * Creates and sets up meta data for a specific codec, called on the worker
+ * thread.
+ */
+ virtual already_AddRefed<TrackMetadataBase> GetMetadata() = 0;
+
+ /**
+ * MediaQueue containing encoded data, that is pushed as soon as it's ready.
+ */
+ MediaQueue<EncodedFrame>& EncodedDataQueue() { return mEncodedDataQueue; }
+
+ /**
+ * Returns true once this TrackEncoder is initialized.
+ */
+ bool IsInitialized();
+
+ /**
+ * Returns true once this TrackEncoder has received some data.
+ */
+ bool IsStarted();
+
+ /**
+ * True if the track encoder has encoded all source segments coming from
+ * MediaTrackGraph. Call on the worker thread.
+ */
+ bool IsEncodingComplete() const;
+
+ /**
+ * Registers a listener to events from this TrackEncoder.
+ * We hold a strong reference to the listener.
+ */
+ void RegisterListener(TrackEncoderListener* aListener);
+
+ /**
+ * Unregisters a listener from events from this TrackEncoder.
+ * The listener will stop receiving events synchronously.
+ */
+ bool UnregisterListener(TrackEncoderListener* aListener);
+
+ virtual void SetBitrate(const uint32_t aBitrate) = 0;
+
+ /**
+ * It's optional to set the worker thread, but if you do we'll assert that
+ * we are in the worker thread in every method that gets called.
+ */
+ void SetWorkerThread(AbstractThread* aWorkerThread);
+
+ /**
+ * Measure size of internal buffers.
+ */
+ virtual size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) = 0;
+
+ protected:
+ virtual ~TrackEncoder() { MOZ_ASSERT(mListeners.IsEmpty()); }
+
+ /**
+ * If this TrackEncoder was not already initialized, it is set to initialized
+ * and listeners are notified.
+ */
+ void SetInitialized();
+
+ /**
+ * If this TrackEncoder was not already marked started, its started state is
+ * set and listeners are notified.
+ */
+ void SetStarted();
+
+ /**
+ * Called after an error. Cancels the encoding and notifies listeners.
+ */
+ void OnError();
+
+ /**
+ * True if the track encoder has been initialized successfully.
+ */
+ bool mInitialized;
+
+ /**
+ * True if the track encoder has received data.
+ */
+ bool mStarted;
+
+ /**
+ * True once all data until the end of the input track has been received.
+ */
+ bool mEndOfStream;
+
+ /**
+ * True once this encoding has been cancelled.
+ */
+ bool mCanceled;
+
+ // How many times we have tried to initialize the encoder.
+ uint32_t mInitCounter;
+
+ /**
+ * True if this TrackEncoder is currently suspended.
+ */
+ bool mSuspended;
+
+ /**
+ * The track rate of source media.
+ */
+ const TrackRate mTrackRate;
+
+ /**
+ * If set we assert that all methods are called on this thread.
+ */
+ RefPtr<AbstractThread> mWorkerThread;
+
+ /**
+ * MediaQueue where encoded data ends up. Note that metadata goes out of band.
+ */
+ MediaQueue<EncodedFrame>& mEncodedDataQueue;
+
+ nsTArray<RefPtr<TrackEncoderListener>> mListeners;
+};
+
+class AudioTrackEncoder : public TrackEncoder {
+ public:
+ AudioTrackEncoder(TrackRate aTrackRate,
+ MediaQueue<EncodedFrame>& aEncodedDataQueue)
+ : TrackEncoder(aTrackRate, aEncodedDataQueue),
+ mChannels(0),
+ mNotInitDuration(0),
+ mAudioBitrate(0) {}
+
+ /**
+ * Suspends encoding from now, i.e., all future audio data received through
+ * AppendAudioSegment() until the next Resume() will be dropped.
+ */
+ void Suspend();
+
+ /**
+ * Resumes encoding starting now, i.e., data from the next
+ * AppendAudioSegment() will get encoded.
+ */
+ void Resume();
+
+ /**
+ * Appends and consumes track data from aSegment.
+ */
+ void AppendAudioSegment(AudioSegment&& aSegment);
+
+ template <typename T>
+ static void InterleaveTrackData(nsTArray<const T*>& aInput, int32_t aDuration,
+ uint32_t aOutputChannels,
+ AudioDataValue* aOutput, float aVolume) {
+ if (aInput.Length() < aOutputChannels) {
+ // Up-mix. This might make the mChannelData have more than aChannels.
+ AudioChannelsUpMix(&aInput, aOutputChannels,
+ SilentChannel::ZeroChannel<T>());
+ }
+
+ if (aInput.Length() > aOutputChannels) {
+ DownmixAndInterleave(aInput, aDuration, aVolume, aOutputChannels,
+ aOutput);
+ } else {
+ InterleaveAndConvertBuffer(aInput.Elements(), aDuration, aVolume,
+ aOutputChannels, aOutput);
+ }
+ }
+
+ /**
+ * Interleaves the track data and stores the result into aOutput. Might need
+ * to up-mix or down-mix the channel data if the channels number of this chunk
+ * is different from aOutputChannels. The channel data from aChunk might be
+ * modified by up-mixing.
+ */
+ static void InterleaveTrackData(AudioChunk& aChunk, int32_t aDuration,
+ uint32_t aOutputChannels,
+ AudioDataValue* aOutput);
+
+ /**
+ * De-interleaves the aInput data and stores the result into aOutput.
+ * No up-mix or down-mix operations inside.
+ */
+ static void DeInterleaveTrackData(AudioDataValue* aInput, int32_t aDuration,
+ int32_t aChannels, AudioDataValue* aOutput);
+
+ /**
+ * Measure size of internal buffers.
+ */
+ size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) override;
+
+ void SetBitrate(const uint32_t aBitrate) override {
+ mAudioBitrate = aBitrate;
+ }
+
+ /**
+ * Tries to initiate the AudioEncoder based on data in aSegment.
+ * This can be re-called often, as it will exit early should we already be
+ * initiated. mInitiated will only be set if there was enough data in
+ * aSegment to infer metadata. If mInitiated gets set, listeners are notified.
+ *
+ * Not having enough data in aSegment to initiate the encoder for an
+ * accumulated aDuration of one second will make us initiate with a default
+ * number of channels.
+ *
+ * If we attempt to initiate the underlying encoder but fail, we Cancel() and
+ * notify listeners.
+ */
+ void TryInit(const AudioSegment& aSegment, TrackTime aDuration);
+
+ void Cancel() override;
+
+ /**
+ * Dispatched from MediaTrackGraph when we have finished feeding data to
+ * mOutgoingBuffer.
+ */
+ void NotifyEndOfStream() override;
+
+ protected:
+ /**
+ * Number of samples per channel in a pcm buffer. This is also the value of
+ * frame size required by audio encoder, and listeners will be notified when
+ * at least this much data has been added to mOutgoingBuffer.
+ */
+ virtual int NumInputFramesPerPacket() const { return 0; }
+
+ /**
+ * Initializes the audio encoder. The call of this method is delayed until we
+ * have received the first valid track from MediaTrackGraph.
+ */
+ virtual nsresult Init(int aChannels) = 0;
+
+ /**
+ * Encodes buffered data and pushes it to mEncodedDataQueue.
+ */
+ virtual nsresult Encode(AudioSegment* aSegment) = 0;
+
+ /**
+ * The number of channels are used for processing PCM data in the audio
+ * encoder. This value comes from the first valid audio chunk. If encoder
+ * can't support the channels in the chunk, downmix PCM stream can be
+ * performed. This value also be used to initialize the audio encoder.
+ */
+ int mChannels;
+
+ /**
+ * A segment queue of outgoing audio track data to the encoder.
+ * The contents of mOutgoingBuffer will always be what has been appended on
+ * the encoder thread but not yet consumed by the encoder sub class.
+ */
+ AudioSegment mOutgoingBuffer;
+
+ TrackTime mNotInitDuration;
+
+ uint32_t mAudioBitrate;
+};
+
+enum class FrameDroppingMode {
+ ALLOW, // Allowed to drop frames to keep up under load
+ DISALLOW, // Must not drop any frames, even if it means we will OOM
+};
+
+class VideoTrackEncoder : public TrackEncoder {
+ public:
+ VideoTrackEncoder(RefPtr<DriftCompensator> aDriftCompensator,
+ TrackRate aTrackRate,
+ MediaQueue<EncodedFrame>& aEncodedDataQueue,
+ FrameDroppingMode aFrameDroppingMode);
+
+ /**
+ * Suspends encoding from aTime, i.e., all video frame with a timestamp
+ * between aTime and the timestamp of the next Resume() will be dropped.
+ */
+ void Suspend(const TimeStamp& aTime);
+
+ /**
+ * Resumes encoding starting at aTime.
+ */
+ void Resume(const TimeStamp& aTime);
+
+ /**
+ * Makes the video black from aTime.
+ */
+ void Disable(const TimeStamp& aTime);
+
+ /**
+ * Makes the video non-black from aTime.
+ *
+ * NB that it could still be forced black for other reasons, like principals.
+ */
+ void Enable(const TimeStamp& aTime);
+
+ /**
+ * Appends source video frames to mIncomingBuffer. We only append the source
+ * chunk if the image is different from mLastChunk's image. Called on the
+ * MediaTrackGraph thread.
+ */
+ void AppendVideoSegment(VideoSegment&& aSegment);
+
+ /**
+ * Measure size of internal buffers.
+ */
+ size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) override;
+
+ void SetBitrate(const uint32_t aBitrate) override {
+ mVideoBitrate = aBitrate;
+ }
+
+ /**
+ * Tries to initiate the VideoEncoder based on data in aSegment.
+ * This can be re-called often, as it will exit early should we already be
+ * initiated. mInitiated will only be set if there was enough data in
+ * aSegment to infer metadata. If mInitiated gets set, listeners are notified.
+ * The amount of chunks needed can be controlled by
+ * aFrameRateDetectionMinChunks which denotes the minimum number of chunks
+ * needed to infer the framerate.
+ *
+ * Failing to initiate the encoder for an accumulated aDuration of 30 seconds
+ * is seen as an error and will cancel the current encoding.
+ */
+ void Init(const VideoSegment& aSegment, const TimeStamp& aTime,
+ size_t aFrameRateDetectionMinChunks);
+
+ TrackTime SecondsToMediaTime(double aS) const {
+ NS_ASSERTION(0 <= aS && aS <= TRACK_TICKS_MAX / TRACK_RATE_MAX,
+ "Bad seconds");
+ return mTrackRate * aS;
+ }
+
+ /**
+ * MediaTrackGraph notifies us about the time of the track's start.
+ * This gets called on the MediaEncoder thread after a dispatch.
+ */
+ void SetStartOffset(const TimeStamp& aStartOffset);
+
+ void Cancel() override;
+
+ /**
+ * Notifies us that we have reached the end of the stream and no more data
+ * will be appended to mIncomingBuffer.
+ */
+ void NotifyEndOfStream() override;
+
+ /**
+ * Dispatched from MediaTrackGraph when it has run an iteration so we can
+ * hand more data to the encoder.
+ */
+ void AdvanceCurrentTime(const TimeStamp& aTime);
+
+ protected:
+ /**
+ * Initialize the video encoder. In order to collect the value of width and
+ * height of source frames, this initialization is delayed until we have
+ * received the first valid video frame from MediaTrackGraph.
+ * Listeners will be notified after it has been successfully initialized.
+ */
+ virtual nsresult Init(int32_t aWidth, int32_t aHeight, int32_t aDisplayWidth,
+ int32_t aDisplayHeight, float aEstimatedFrameRate) = 0;
+
+ /**
+ * Encodes data in the outgoing buffer and pushes it to mEncodedDataQueue.
+ */
+ virtual nsresult Encode(VideoSegment* aSegment) = 0;
+
+ /**
+ * Drift compensator for re-clocking incoming video frame wall-clock
+ * timestamps to audio time.
+ */
+ const RefPtr<DriftCompensator> mDriftCompensator;
+
+ /**
+ * The last unique frame and duration so far handled by
+ * NotifyAdvanceCurrentTime. When a new frame is detected, mLastChunk is added
+ * to mOutgoingBuffer.
+ */
+ VideoChunk mLastChunk;
+
+ /**
+ * A segment queue of incoming video track data, from listeners.
+ * The duration of mIncomingBuffer is irrelevant as we only look at TimeStamps
+ * of frames. Consumed data is replaced by null data.
+ */
+ VideoSegment mIncomingBuffer;
+
+ /**
+ * A segment queue of outgoing video track data to the encoder.
+ * The contents of mOutgoingBuffer will always be what has been consumed from
+ * mIncomingBuffer (up to mCurrentTime) but not yet consumed by the encoder
+ * sub class. There won't be any null data at the beginning of mOutgoingBuffer
+ * unless explicitly pushed by the producer.
+ */
+ VideoSegment mOutgoingBuffer;
+
+ /**
+ * The number of mTrackRate ticks we have passed to mOutgoingBuffer.
+ */
+ TrackTime mEncodedTicks;
+
+ /**
+ * The time up to which we have forwarded data from mIncomingBuffer to
+ * mOutgoingBuffer.
+ */
+ TimeStamp mCurrentTime;
+
+ /**
+ * The time the video track started, so the start of the video track can be
+ * synced to the start of the audio track.
+ *
+ * Note that this time will progress during suspension, to make sure the
+ * incoming frames stay in sync with the output.
+ */
+ TimeStamp mStartTime;
+
+ /**
+ * The time Suspend was called on the MediaRecorder, so we can calculate the
+ * duration on the next Resume().
+ */
+ TimeStamp mSuspendTime;
+
+ uint32_t mVideoBitrate;
+
+ /**
+ * ALLOW to drop frames under load.
+ * DISALLOW to encode all frames, mainly for testing.
+ */
+ FrameDroppingMode mFrameDroppingMode;
+
+ /**
+ * True if the video MediaTrackTrack this VideoTrackEncoder is attached to is
+ * currently enabled. While false, we encode all frames as black.
+ */
+ bool mEnabled;
+};
+
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/encoder/TrackMetadataBase.h b/dom/media/encoder/TrackMetadataBase.h
new file mode 100644
index 0000000000..503b52e5ec
--- /dev/null
+++ b/dom/media/encoder/TrackMetadataBase.h
@@ -0,0 +1,76 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef TrackMetadataBase_h_
+#define TrackMetadataBase_h_
+
+#include "nsTArray.h"
+#include "nsCOMPtr.h"
+namespace mozilla {
+
+// A class represent meta data for various codec format. Only support one track
+// information.
+class TrackMetadataBase {
+ public:
+ NS_INLINE_DECL_THREADSAFE_REFCOUNTING(TrackMetadataBase)
+ enum MetadataKind {
+ METADATA_OPUS, // Represent the Opus metadata
+ METADATA_VP8,
+ METADATA_VORBIS,
+ METADATA_AVC,
+ METADATA_AAC,
+ METADATA_AMR,
+ METADATA_EVRC,
+ METADATA_UNKNOWN // Metadata Kind not set
+ };
+ // Return the specific metadata kind
+ virtual MetadataKind GetKind() const = 0;
+
+ protected:
+ // Protected destructor, to discourage deletion outside of Release():
+ virtual ~TrackMetadataBase() = default;
+};
+
+// The base class for audio metadata.
+class AudioTrackMetadata : public TrackMetadataBase {
+ public:
+ // The duration of each sample set generated by encoder. (counted by samples)
+ // If the duration is variant, this value should return 0.
+ virtual uint32_t GetAudioFrameDuration() = 0;
+
+ // The size of each sample set generated by encoder. (counted by byte)
+ // If the size is variant, this value should return 0.
+ virtual uint32_t GetAudioFrameSize() = 0;
+
+ // AudioSampleRate is the number of audio sample per second.
+ virtual uint32_t GetAudioSampleRate() = 0;
+
+ virtual uint32_t GetAudioChannels() = 0;
+};
+
+// The base class for video metadata.
+class VideoTrackMetadata : public TrackMetadataBase {
+ public:
+ // VideoHeight and VideoWidth are the frame size of the elementary stream.
+ virtual uint32_t GetVideoHeight() = 0;
+ virtual uint32_t GetVideoWidth() = 0;
+
+ // VideoDisplayHeight and VideoDisplayWidth are the display frame size.
+ virtual uint32_t GetVideoDisplayHeight() = 0;
+ virtual uint32_t GetVideoDisplayWidth() = 0;
+
+ // VideoClockRate is the number of samples per second in video frame's
+ // timestamp.
+ // For example, if VideoClockRate is 90k Hz and VideoFrameRate is
+ // 30 fps, each frame's sample duration will be 3000 Hz.
+ virtual uint32_t GetVideoClockRate() = 0;
+
+ // VideoFrameRate is numner of frames per second.
+ virtual uint32_t GetVideoFrameRate() = 0;
+};
+
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/encoder/VP8TrackEncoder.cpp b/dom/media/encoder/VP8TrackEncoder.cpp
new file mode 100644
index 0000000000..6412592ed1
--- /dev/null
+++ b/dom/media/encoder/VP8TrackEncoder.cpp
@@ -0,0 +1,720 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "VP8TrackEncoder.h"
+
+#include "DriftCompensation.h"
+#include "ImageToI420.h"
+#include "mozilla/gfx/2D.h"
+#include "prsystem.h"
+#include "VideoSegment.h"
+#include "VideoUtils.h"
+#include "vpx/vp8cx.h"
+#include "vpx/vpx_encoder.h"
+#include "WebMWriter.h"
+#include "mozilla/media/MediaUtils.h"
+#include "mozilla/dom/ImageUtils.h"
+#include "mozilla/dom/ImageBitmapBinding.h"
+#include "mozilla/ProfilerLabels.h"
+
+namespace mozilla {
+
+LazyLogModule gVP8TrackEncoderLog("VP8TrackEncoder");
+#define VP8LOG(level, msg, ...) \
+ MOZ_LOG(gVP8TrackEncoderLog, level, (msg, ##__VA_ARGS__))
+
+constexpr int DEFAULT_BITRATE_BPS = 2500000;
+constexpr int DEFAULT_KEYFRAME_INTERVAL_MS = 10000;
+constexpr int DYNAMIC_MAXKFDIST_CHECK_INTERVAL = 5;
+constexpr float DYNAMIC_MAXKFDIST_DIFFACTOR = 0.4;
+constexpr float DYNAMIC_MAXKFDIST_KFINTERVAL_FACTOR = 0.75;
+constexpr int I420_STRIDE_ALIGN = 16;
+
+using namespace mozilla::gfx;
+using namespace mozilla::layers;
+using namespace mozilla::media;
+using namespace mozilla::dom;
+
+namespace {
+
+template <int N>
+static int Aligned(int aValue) {
+ if (aValue < N) {
+ return N;
+ }
+
+ // The `- 1` avoids overreaching when `aValue % N == 0`.
+ return (((aValue - 1) / N) + 1) * N;
+}
+
+template <int Alignment>
+size_t I420Size(int aWidth, int aHeight) {
+ int yStride = Aligned<Alignment>(aWidth);
+ int yHeight = aHeight;
+ size_t yPlaneSize = yStride * yHeight;
+
+ int uvStride = Aligned<Alignment>((aWidth + 1) / 2);
+ int uvHeight = (aHeight + 1) / 2;
+ size_t uvPlaneSize = uvStride * uvHeight;
+
+ return yPlaneSize + uvPlaneSize * 2;
+}
+
+nsresult CreateEncoderConfig(int32_t aWidth, int32_t aHeight,
+ uint32_t aVideoBitrate, TrackRate aTrackRate,
+ int32_t aMaxKeyFrameDistance,
+ vpx_codec_enc_cfg_t* config) {
+ // Encoder configuration structure.
+ memset(config, 0, sizeof(vpx_codec_enc_cfg_t));
+ if (vpx_codec_enc_config_default(vpx_codec_vp8_cx(), config, 0)) {
+ VP8LOG(LogLevel::Error, "Failed to get default configuration");
+ return NS_ERROR_FAILURE;
+ }
+
+ config->g_w = aWidth;
+ config->g_h = aHeight;
+ // TODO: Maybe we should have various aFrameRate bitrate pair for each
+ // devices? or for different platform
+
+ // rc_target_bitrate needs kbit/s
+ config->rc_target_bitrate = std::max(
+ 1U, (aVideoBitrate != 0 ? aVideoBitrate : DEFAULT_BITRATE_BPS) / 1000);
+
+ // Setting the time base of the codec
+ config->g_timebase.num = 1;
+ config->g_timebase.den = aTrackRate;
+
+ // No error resilience as this is not intended for UDP transports
+ config->g_error_resilient = 0;
+
+ // Allow some frame lagging for large timeslices (when low latency is not
+ // needed)
+ /*std::min(10U, mKeyFrameInterval / 200)*/
+ config->g_lag_in_frames = 0;
+
+ int32_t number_of_cores = PR_GetNumberOfProcessors();
+ if (aWidth * aHeight > 1920 * 1080 && number_of_cores >= 8) {
+ config->g_threads = 4; // 4 threads for > 1080p.
+ } else if (aWidth * aHeight > 1280 * 960 && number_of_cores >= 6) {
+ config->g_threads = 3; // 3 threads for 1080p.
+ } else if (aWidth * aHeight > 640 * 480 && number_of_cores >= 3) {
+ config->g_threads = 2; // 2 threads for qHD/HD.
+ } else {
+ config->g_threads = 1; // 1 thread for VGA or less
+ }
+
+ // rate control settings
+
+ // No frame dropping
+ config->rc_dropframe_thresh = 0;
+ // Variable bitrate
+ config->rc_end_usage = VPX_VBR;
+ // Single pass encoding
+ config->g_pass = VPX_RC_ONE_PASS;
+ // ffmpeg doesn't currently support streams that use resize.
+ // Therefore, for safety, we should turn it off until it does.
+ config->rc_resize_allowed = 0;
+ // Allows 100% under target bitrate to compensate for prior overshoot
+ config->rc_undershoot_pct = 100;
+ // Allows 15% over target bitrate to compensate for prior undershoot
+ config->rc_overshoot_pct = 15;
+ // Tells the decoding application to buffer 500ms before beginning playback
+ config->rc_buf_initial_sz = 500;
+ // The decoding application will try to keep 600ms of buffer during playback
+ config->rc_buf_optimal_sz = 600;
+ // The decoding application may buffer 1000ms worth of encoded data
+ config->rc_buf_sz = 1000;
+
+ // We set key frame interval to automatic and try to set kf_max_dist so that
+ // the encoder chooses to put keyframes slightly more often than
+ // mKeyFrameInterval (which will encode with VPX_EFLAG_FORCE_KF when reached).
+ config->kf_mode = VPX_KF_AUTO;
+ config->kf_max_dist = aMaxKeyFrameDistance;
+
+ return NS_OK;
+}
+} // namespace
+
+VP8TrackEncoder::VP8TrackEncoder(RefPtr<DriftCompensator> aDriftCompensator,
+ TrackRate aTrackRate,
+ MediaQueue<EncodedFrame>& aEncodedDataQueue,
+ FrameDroppingMode aFrameDroppingMode,
+ Maybe<float> aKeyFrameIntervalFactor)
+ : VideoTrackEncoder(std::move(aDriftCompensator), aTrackRate,
+ aEncodedDataQueue, aFrameDroppingMode),
+ mKeyFrameInterval(
+ TimeDuration::FromMilliseconds(DEFAULT_KEYFRAME_INTERVAL_MS)),
+ mKeyFrameIntervalFactor(aKeyFrameIntervalFactor.valueOr(
+ DYNAMIC_MAXKFDIST_KFINTERVAL_FACTOR)) {
+ MOZ_COUNT_CTOR(VP8TrackEncoder);
+ CalculateMaxKeyFrameDistance().apply(
+ [&](auto aKfd) { SetMaxKeyFrameDistance(aKfd); });
+}
+
+VP8TrackEncoder::~VP8TrackEncoder() {
+ Destroy();
+ MOZ_COUNT_DTOR(VP8TrackEncoder);
+}
+
+void VP8TrackEncoder::Destroy() {
+ if (mInitialized) {
+ vpx_codec_destroy(&mVPXContext);
+ }
+
+ mInitialized = false;
+}
+
+Maybe<int32_t> VP8TrackEncoder::CalculateMaxKeyFrameDistance(
+ Maybe<float> aEstimatedFrameRate /* = Nothing() */) const {
+ if (!aEstimatedFrameRate && mMeanFrameDuration.empty()) {
+ // Not enough data to make a new calculation.
+ return Nothing();
+ }
+
+ // Calculate an estimation of our current framerate
+ const float estimatedFrameRate = aEstimatedFrameRate.valueOrFrom(
+ [&] { return 1.0f / mMeanFrameDuration.mean().ToSeconds(); });
+ // Set a kf_max_dist that should avoid triggering the VPX_EFLAG_FORCE_KF flag
+ return Some(std::max(
+ 1, static_cast<int32_t>(estimatedFrameRate * mKeyFrameIntervalFactor *
+ mKeyFrameInterval.ToSeconds())));
+}
+
+void VP8TrackEncoder::SetMaxKeyFrameDistance(int32_t aMaxKeyFrameDistance) {
+ if (mInitialized) {
+ VP8LOG(
+ LogLevel::Debug,
+ "%p SetMaxKeyFrameDistance() set kf_max_dist to %d based on estimated "
+ "framerate %.2ffps keyframe-factor %.2f and keyframe-interval %.2fs",
+ this, aMaxKeyFrameDistance, 1 / mMeanFrameDuration.mean().ToSeconds(),
+ mKeyFrameIntervalFactor, mKeyFrameInterval.ToSeconds());
+ DebugOnly<nsresult> rv =
+ Reconfigure(mFrameWidth, mFrameHeight, aMaxKeyFrameDistance);
+ MOZ_ASSERT(
+ NS_SUCCEEDED(rv),
+ "Reconfig for new key frame distance with proven size should succeed");
+ } else {
+ VP8LOG(LogLevel::Debug, "%p SetMaxKeyFrameDistance() distance=%d", this,
+ aMaxKeyFrameDistance);
+ mMaxKeyFrameDistance = Some(aMaxKeyFrameDistance);
+ }
+}
+
+nsresult VP8TrackEncoder::Init(int32_t aWidth, int32_t aHeight,
+ int32_t aDisplayWidth, int32_t aDisplayHeight,
+ float aEstimatedFrameRate) {
+ if (aDisplayWidth < 1 || aDisplayHeight < 1) {
+ return NS_ERROR_FAILURE;
+ }
+
+ if (aEstimatedFrameRate <= 0) {
+ return NS_ERROR_FAILURE;
+ }
+
+ int32_t maxKeyFrameDistance =
+ *CalculateMaxKeyFrameDistance(Some(aEstimatedFrameRate));
+
+ nsresult rv = InitInternal(aWidth, aHeight, maxKeyFrameDistance);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ MOZ_ASSERT(!mI420Frame);
+ MOZ_ASSERT(mI420FrameSize == 0);
+ const size_t neededSize = I420Size<I420_STRIDE_ALIGN>(aWidth, aHeight);
+ mI420Frame.reset(new (fallible) uint8_t[neededSize]);
+ mI420FrameSize = mI420Frame ? neededSize : 0;
+ if (!mI420Frame) {
+ VP8LOG(LogLevel::Warning, "Allocating I420 frame of size %zu failed",
+ neededSize);
+ return NS_ERROR_FAILURE;
+ }
+ vpx_img_wrap(&mVPXImageWrapper, VPX_IMG_FMT_I420, aWidth, aHeight,
+ I420_STRIDE_ALIGN, mI420Frame.get());
+
+ if (!mMetadata) {
+ mMetadata = MakeAndAddRef<VP8Metadata>();
+ mMetadata->mWidth = aWidth;
+ mMetadata->mHeight = aHeight;
+ mMetadata->mDisplayWidth = aDisplayWidth;
+ mMetadata->mDisplayHeight = aDisplayHeight;
+
+ VP8LOG(LogLevel::Info,
+ "%p Init() created metadata. width=%d, height=%d, displayWidth=%d, "
+ "displayHeight=%d, framerate=%.2f",
+ this, mMetadata->mWidth, mMetadata->mHeight,
+ mMetadata->mDisplayWidth, mMetadata->mDisplayHeight,
+ aEstimatedFrameRate);
+
+ SetInitialized();
+ }
+
+ return NS_OK;
+}
+
+nsresult VP8TrackEncoder::InitInternal(int32_t aWidth, int32_t aHeight,
+ int32_t aMaxKeyFrameDistance) {
+ if (aWidth < 1 || aHeight < 1) {
+ return NS_ERROR_FAILURE;
+ }
+
+ if (mInitialized) {
+ MOZ_ASSERT(false);
+ return NS_ERROR_FAILURE;
+ }
+
+ VP8LOG(LogLevel::Debug,
+ "%p InitInternal(). width=%d, height=%d, kf_max_dist=%d", this, aWidth,
+ aHeight, aMaxKeyFrameDistance);
+
+ // Encoder configuration structure.
+ vpx_codec_enc_cfg_t config;
+ nsresult rv = CreateEncoderConfig(aWidth, aHeight, mVideoBitrate, mTrackRate,
+ aMaxKeyFrameDistance, &config);
+ NS_ENSURE_SUCCESS(rv, NS_ERROR_FAILURE);
+
+ vpx_codec_flags_t flags = 0;
+ flags |= VPX_CODEC_USE_OUTPUT_PARTITION;
+ if (vpx_codec_enc_init(&mVPXContext, vpx_codec_vp8_cx(), &config, flags)) {
+ return NS_ERROR_FAILURE;
+ }
+
+ vpx_codec_control(&mVPXContext, VP8E_SET_STATIC_THRESHOLD, 1);
+ vpx_codec_control(&mVPXContext, VP8E_SET_CPUUSED, 15);
+ vpx_codec_control(&mVPXContext, VP8E_SET_TOKEN_PARTITIONS,
+ VP8_TWO_TOKENPARTITION);
+
+ mFrameWidth = aWidth;
+ mFrameHeight = aHeight;
+ mMaxKeyFrameDistance = Some(aMaxKeyFrameDistance);
+
+ return NS_OK;
+}
+
+nsresult VP8TrackEncoder::Reconfigure(int32_t aWidth, int32_t aHeight,
+ int32_t aMaxKeyFrameDistance) {
+ if (aWidth <= 0 || aHeight <= 0) {
+ MOZ_ASSERT(false);
+ return NS_ERROR_FAILURE;
+ }
+
+ if (!mInitialized) {
+ MOZ_ASSERT(false);
+ return NS_ERROR_FAILURE;
+ }
+
+ bool needsReInit = aMaxKeyFrameDistance != *mMaxKeyFrameDistance;
+
+ if (aWidth != mFrameWidth || aHeight != mFrameHeight) {
+ VP8LOG(LogLevel::Info, "Dynamic resolution change (%dx%d -> %dx%d).",
+ mFrameWidth, mFrameHeight, aWidth, aHeight);
+ const size_t neededSize = I420Size<I420_STRIDE_ALIGN>(aWidth, aHeight);
+ if (neededSize > mI420FrameSize) {
+ needsReInit = true;
+ mI420Frame.reset(new (fallible) uint8_t[neededSize]);
+ mI420FrameSize = mI420Frame ? neededSize : 0;
+ }
+ if (!mI420Frame) {
+ VP8LOG(LogLevel::Warning, "Allocating I420 frame of size %zu failed",
+ neededSize);
+ return NS_ERROR_FAILURE;
+ }
+ vpx_img_wrap(&mVPXImageWrapper, VPX_IMG_FMT_I420, aWidth, aHeight,
+ I420_STRIDE_ALIGN, mI420Frame.get());
+ }
+
+ if (needsReInit) {
+ Destroy();
+ mMaxKeyFrameDistance = Some(aMaxKeyFrameDistance);
+ nsresult rv = InitInternal(aWidth, aHeight, aMaxKeyFrameDistance);
+ NS_ENSURE_SUCCESS(rv, NS_ERROR_FAILURE);
+ mInitialized = true;
+ return NS_OK;
+ }
+
+ // Encoder configuration structure.
+ vpx_codec_enc_cfg_t config;
+ nsresult rv = CreateEncoderConfig(aWidth, aHeight, mVideoBitrate, mTrackRate,
+ aMaxKeyFrameDistance, &config);
+ NS_ENSURE_SUCCESS(rv, NS_ERROR_FAILURE);
+ // Set new configuration
+ if (vpx_codec_enc_config_set(&mVPXContext, &config) != VPX_CODEC_OK) {
+ VP8LOG(LogLevel::Error, "Failed to set new configuration");
+ return NS_ERROR_FAILURE;
+ }
+
+ mFrameWidth = aWidth;
+ mFrameHeight = aHeight;
+
+ return NS_OK;
+}
+
+already_AddRefed<TrackMetadataBase> VP8TrackEncoder::GetMetadata() {
+ AUTO_PROFILER_LABEL("VP8TrackEncoder::GetMetadata", OTHER);
+
+ MOZ_ASSERT(mInitialized);
+
+ if (!mInitialized) {
+ return nullptr;
+ }
+
+ MOZ_ASSERT(mMetadata);
+ return do_AddRef(mMetadata);
+}
+
+Result<RefPtr<EncodedFrame>, nsresult> VP8TrackEncoder::ExtractEncodedData() {
+ vpx_codec_iter_t iter = nullptr;
+ EncodedFrame::FrameType frameType = EncodedFrame::VP8_P_FRAME;
+ auto frameData = MakeRefPtr<EncodedFrame::FrameData>();
+ const vpx_codec_cx_pkt_t* pkt = nullptr;
+ while ((pkt = vpx_codec_get_cx_data(&mVPXContext, &iter)) != nullptr) {
+ switch (pkt->kind) {
+ case VPX_CODEC_CX_FRAME_PKT: {
+ // Copy the encoded data from libvpx to frameData
+ frameData->AppendElements((uint8_t*)pkt->data.frame.buf,
+ pkt->data.frame.sz);
+ break;
+ }
+ default: {
+ break;
+ }
+ }
+ // End of frame
+ if ((pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT) == 0) {
+ if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
+ frameType = EncodedFrame::VP8_I_FRAME;
+ }
+ break;
+ }
+ }
+
+ if (frameData->IsEmpty()) {
+ return RefPtr<EncodedFrame>(nullptr);
+ }
+
+ if (!pkt) {
+ // This check silences a coverity warning about accessing a null pkt below.
+ return RefPtr<EncodedFrame>(nullptr);
+ }
+
+ if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
+ // Update the since-last-keyframe counter, and account for this frame's
+ // time.
+ TrackTime frameTime = pkt->data.frame.pts;
+ DebugOnly<TrackTime> frameDuration = pkt->data.frame.duration;
+ MOZ_ASSERT(frameTime + frameDuration <= mEncodedTimestamp);
+ mDurationSinceLastKeyframe =
+ std::min(mDurationSinceLastKeyframe, mEncodedTimestamp - frameTime);
+ }
+
+ // Convert the timestamp and duration to Usecs.
+ media::TimeUnit timestamp = media::TimeUnit(pkt->data.frame.pts, mTrackRate);
+ if (!timestamp.IsValid()) {
+ NS_ERROR("Microsecond timestamp overflow");
+ return Err(NS_ERROR_DOM_MEDIA_OVERFLOW_ERR);
+ }
+
+ mExtractedDuration += pkt->data.frame.duration;
+ if (!mExtractedDuration.isValid()) {
+ NS_ERROR("Duration overflow");
+ return Err(NS_ERROR_DOM_MEDIA_OVERFLOW_ERR);
+ }
+
+ media::TimeUnit totalDuration =
+ media::TimeUnit(mExtractedDuration.value(), mTrackRate);
+ if (!totalDuration.IsValid()) {
+ NS_ERROR("Duration overflow");
+ return Err(NS_ERROR_DOM_MEDIA_OVERFLOW_ERR);
+ }
+
+ media::TimeUnit duration = totalDuration - mExtractedDurationUs;
+ if (!duration.IsValid()) {
+ NS_ERROR("Duration overflow");
+ return Err(NS_ERROR_DOM_MEDIA_OVERFLOW_ERR);
+ }
+
+ mExtractedDurationUs = totalDuration;
+
+ VP8LOG(LogLevel::Verbose,
+ "ExtractEncodedData TimeStamp %.2f, Duration %.2f, FrameType %d",
+ timestamp.ToSeconds(), duration.ToSeconds(), frameType);
+
+ if (static_cast<int>(totalDuration.ToSeconds()) /
+ DYNAMIC_MAXKFDIST_CHECK_INTERVAL >
+ static_cast<int>(mLastKeyFrameDistanceUpdate.ToSeconds()) /
+ DYNAMIC_MAXKFDIST_CHECK_INTERVAL) {
+ // The interval has passed since the last keyframe update. Update again.
+ mLastKeyFrameDistanceUpdate = totalDuration;
+ const int32_t maxKfDistance =
+ CalculateMaxKeyFrameDistance().valueOr(*mMaxKeyFrameDistance);
+ const float diffFactor =
+ static_cast<float>(maxKfDistance) / *mMaxKeyFrameDistance;
+ VP8LOG(LogLevel::Debug, "maxKfDistance: %d, factor: %.2f", maxKfDistance,
+ diffFactor);
+ if (std::abs(1.0 - diffFactor) > DYNAMIC_MAXKFDIST_DIFFACTOR) {
+ SetMaxKeyFrameDistance(maxKfDistance);
+ }
+ }
+
+ return MakeRefPtr<EncodedFrame>(timestamp, duration.ToMicroseconds(),
+ PR_USEC_PER_SEC, frameType,
+ std::move(frameData));
+}
+
+/**
+ * Encoding flow in Encode():
+ * 1: Assert valid state.
+ * 2: Encode the video chunks in mSourceSegment in a for-loop.
+ * 2.1: The duration is taken straight from the video chunk's duration.
+ * 2.2: Setup the video chunk with mVPXImageWrapper by PrepareRawFrame().
+ * 2.3: Pass frame to vp8 encoder by vpx_codec_encode().
+ * 2.4: Extract the encoded frame from encoder by ExtractEncodedData().
+ * 2.5: Set the nextEncodeOperation for the next frame.
+ * 2.6: If we are not skipping the next frame, add the encoded frame to
+ * mEncodedDataQueue. If we are skipping the next frame, extend the encoded
+ * frame's duration in the next run of the loop.
+ * 3. Clear aSegment.
+ */
+nsresult VP8TrackEncoder::Encode(VideoSegment* aSegment) {
+ MOZ_ASSERT(mInitialized);
+ MOZ_ASSERT(!IsEncodingComplete());
+
+ AUTO_PROFILER_LABEL("VP8TrackEncoder::Encode", OTHER);
+
+ EncodeOperation nextEncodeOperation = ENCODE_NORMAL_FRAME;
+
+ RefPtr<EncodedFrame> encodedFrame;
+ for (VideoSegment::ChunkIterator iter(*aSegment); !iter.IsEnded();
+ iter.Next()) {
+ VideoChunk& chunk = *iter;
+
+ VP8LOG(LogLevel::Verbose,
+ "nextEncodeOperation is %d for frame of duration %" PRId64,
+ nextEncodeOperation, chunk.GetDuration());
+
+ TimeStamp timebase = TimeStamp::Now();
+
+ // Encode frame.
+ if (nextEncodeOperation != SKIP_FRAME) {
+ MOZ_ASSERT(!encodedFrame);
+ nsresult rv = PrepareRawFrame(chunk);
+ NS_ENSURE_SUCCESS(rv, NS_ERROR_FAILURE);
+
+ // Encode the data with VP8 encoder
+ int flags = 0;
+ if (nextEncodeOperation == ENCODE_I_FRAME) {
+ VP8LOG(LogLevel::Warning,
+ "MediaRecorder lagging behind. Encoding keyframe.");
+ flags |= VPX_EFLAG_FORCE_KF;
+ }
+
+ // Sum duration of non-key frames and force keyframe if exceeded the
+ // given keyframe interval
+ if (mKeyFrameInterval > TimeDuration::FromSeconds(0)) {
+ if (media::TimeUnit(mDurationSinceLastKeyframe, mTrackRate)
+ .ToTimeDuration() >= mKeyFrameInterval) {
+ VP8LOG(LogLevel::Warning,
+ "Reached mKeyFrameInterval without seeing a keyframe. Forcing "
+ "one. time: %.2f, interval: %.2f",
+ media::TimeUnit(mDurationSinceLastKeyframe, mTrackRate)
+ .ToSeconds(),
+ mKeyFrameInterval.ToSeconds());
+ mDurationSinceLastKeyframe = 0;
+ flags |= VPX_EFLAG_FORCE_KF;
+ }
+ mDurationSinceLastKeyframe += chunk.GetDuration();
+ }
+
+ if (vpx_codec_encode(&mVPXContext, &mVPXImageWrapper, mEncodedTimestamp,
+ (unsigned long)chunk.GetDuration(), flags,
+ VPX_DL_REALTIME)) {
+ VP8LOG(LogLevel::Error, "vpx_codec_encode failed to encode the frame.");
+ return NS_ERROR_FAILURE;
+ }
+
+ // Move forward the mEncodedTimestamp.
+ mEncodedTimestamp += chunk.GetDuration();
+
+ // Extract the encoded data from the underlying encoder and push it to
+ // mEncodedDataQueue.
+ auto result = ExtractEncodedData();
+ if (result.isErr()) {
+ VP8LOG(LogLevel::Error, "ExtractEncodedData failed.");
+ return NS_ERROR_FAILURE;
+ }
+
+ MOZ_ASSERT(result.inspect(),
+ "We expected a frame here. EOS is handled explicitly later");
+ encodedFrame = result.unwrap();
+ } else {
+ // SKIP_FRAME
+
+ MOZ_DIAGNOSTIC_ASSERT(encodedFrame);
+
+ if (mKeyFrameInterval > TimeDuration::FromSeconds(0)) {
+ mDurationSinceLastKeyframe += chunk.GetDuration();
+ }
+
+ // Move forward the mEncodedTimestamp.
+ mEncodedTimestamp += chunk.GetDuration();
+
+ // Extend the duration of the last encoded frame in mEncodedDataQueue
+ // because this frame will be skipped.
+ VP8LOG(LogLevel::Warning,
+ "MediaRecorder lagging behind. Skipping a frame.");
+
+ mExtractedDuration += chunk.mDuration;
+ if (!mExtractedDuration.isValid()) {
+ NS_ERROR("skipped duration overflow");
+ return NS_ERROR_DOM_MEDIA_OVERFLOW_ERR;
+ }
+
+ media::TimeUnit totalDuration =
+ media::TimeUnit(mExtractedDuration.value(), mTrackRate);
+ media::TimeUnit skippedDuration = totalDuration - mExtractedDurationUs;
+ mExtractedDurationUs = totalDuration;
+ if (!skippedDuration.IsValid()) {
+ NS_ERROR("skipped duration overflow");
+ return NS_ERROR_DOM_MEDIA_OVERFLOW_ERR;
+ }
+
+ encodedFrame = MakeRefPtr<EncodedFrame>(
+ encodedFrame->mTime,
+ encodedFrame->mDuration + skippedDuration.ToMicroseconds(),
+ encodedFrame->mDurationBase, encodedFrame->mFrameType,
+ encodedFrame->mFrameData);
+ }
+
+ mMeanFrameEncodeDuration.insert(TimeStamp::Now() - timebase);
+ mMeanFrameDuration.insert(
+ media::TimeUnit(chunk.GetDuration(), mTrackRate).ToTimeDuration());
+ nextEncodeOperation = GetNextEncodeOperation(
+ mMeanFrameEncodeDuration.mean(), mMeanFrameDuration.mean());
+
+ if (nextEncodeOperation != SKIP_FRAME) {
+ // Note that the next operation might be SKIP_FRAME even if there is no
+ // next frame.
+ mEncodedDataQueue.Push(encodedFrame.forget());
+ }
+ }
+
+ if (encodedFrame) {
+ // Push now if we ended on a SKIP_FRAME before.
+ mEncodedDataQueue.Push(encodedFrame.forget());
+ }
+
+ // Remove the chunks we have processed.
+ aSegment->Clear();
+
+ if (mEndOfStream) {
+ // EOS: Extract the remaining frames from the underlying encoder.
+ VP8LOG(LogLevel::Debug, "mEndOfStream is true");
+ // No more frames will be encoded. Clearing temporary frames saves some
+ // memory.
+ if (mI420Frame) {
+ mI420Frame = nullptr;
+ mI420FrameSize = 0;
+ }
+ // mMuteFrame must be released before gfx shutdown. We do it now since it
+ // may be too late when this VP8TrackEncoder gets destroyed.
+ mMuteFrame = nullptr;
+ // Bug 1243611, keep calling vpx_codec_encode and vpx_codec_get_cx_data
+ // until vpx_codec_get_cx_data return null.
+ while (true) {
+ if (vpx_codec_encode(&mVPXContext, nullptr, mEncodedTimestamp, 0, 0,
+ VPX_DL_REALTIME)) {
+ return NS_ERROR_FAILURE;
+ }
+ auto result = ExtractEncodedData();
+ if (result.isErr()) {
+ return NS_ERROR_FAILURE;
+ }
+ if (!result.inspect()) {
+ // Null means end-of-stream.
+ break;
+ }
+ mEncodedDataQueue.Push(result.unwrap().forget());
+ }
+ mEncodedDataQueue.Finish();
+ }
+
+ return NS_OK;
+}
+
+nsresult VP8TrackEncoder::PrepareRawFrame(VideoChunk& aChunk) {
+ gfx::IntSize intrinsicSize = aChunk.mFrame.GetIntrinsicSize();
+ RefPtr<Image> img;
+ if (aChunk.mFrame.GetForceBlack() || aChunk.IsNull()) {
+ if (!mMuteFrame || mMuteFrame->GetSize() != intrinsicSize) {
+ mMuteFrame = mozilla::VideoFrame::CreateBlackImage(intrinsicSize);
+ }
+ if (!mMuteFrame) {
+ VP8LOG(LogLevel::Warning, "Failed to allocate black image of size %dx%d",
+ intrinsicSize.width, intrinsicSize.height);
+ return NS_OK;
+ }
+ img = mMuteFrame;
+ } else {
+ img = aChunk.mFrame.GetImage();
+ }
+
+ gfx::IntSize imgSize = img->GetSize();
+ if (imgSize != IntSize(mFrameWidth, mFrameHeight)) {
+ nsresult rv =
+ Reconfigure(imgSize.width, imgSize.height, *mMaxKeyFrameDistance);
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+
+ MOZ_ASSERT(mFrameWidth == imgSize.width);
+ MOZ_ASSERT(mFrameHeight == imgSize.height);
+
+ nsresult rv = ConvertToI420(img, mVPXImageWrapper.planes[VPX_PLANE_Y],
+ mVPXImageWrapper.stride[VPX_PLANE_Y],
+ mVPXImageWrapper.planes[VPX_PLANE_U],
+ mVPXImageWrapper.stride[VPX_PLANE_U],
+ mVPXImageWrapper.planes[VPX_PLANE_V],
+ mVPXImageWrapper.stride[VPX_PLANE_V]);
+ if (NS_FAILED(rv)) {
+ VP8LOG(LogLevel::Error, "Converting to I420 failed");
+ return rv;
+ }
+
+ return NS_OK;
+}
+
+// These two define value used in GetNextEncodeOperation to determine the
+// EncodeOperation for next target frame.
+#define I_FRAME_RATIO (0.85) // Effectively disabled, because perceived quality
+#define SKIP_FRAME_RATIO (0.85)
+
+/**
+ * Compares the elapsed time from the beginning of GetEncodedTrack and
+ * the processed frame duration in mSourceSegment
+ * in order to set the nextEncodeOperation for next target frame.
+ */
+VP8TrackEncoder::EncodeOperation VP8TrackEncoder::GetNextEncodeOperation(
+ TimeDuration aTimeElapsed, TimeDuration aProcessedDuration) {
+ if (mFrameDroppingMode == FrameDroppingMode::DISALLOW) {
+ return ENCODE_NORMAL_FRAME;
+ }
+
+ if (aTimeElapsed.ToSeconds() >
+ aProcessedDuration.ToSeconds() * SKIP_FRAME_RATIO) {
+ // The encoder is too slow.
+ // We should skip next frame to consume the mSourceSegment.
+ return SKIP_FRAME;
+ }
+
+ if (aTimeElapsed.ToSeconds() >
+ aProcessedDuration.ToSeconds() * I_FRAME_RATIO) {
+ // The encoder is a little slow.
+ // We force the encoder to encode an I-frame to accelerate.
+ return ENCODE_I_FRAME;
+ }
+
+ return ENCODE_NORMAL_FRAME;
+}
+
+} // namespace mozilla
+
+#undef VP8LOG
diff --git a/dom/media/encoder/VP8TrackEncoder.h b/dom/media/encoder/VP8TrackEncoder.h
new file mode 100644
index 0000000000..c0e0d3a929
--- /dev/null
+++ b/dom/media/encoder/VP8TrackEncoder.h
@@ -0,0 +1,167 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef VP8TrackEncoder_h_
+#define VP8TrackEncoder_h_
+
+#include "TrackEncoder.h"
+
+#include "mozilla/RollingMean.h"
+#include "TimeUnits.h"
+#include "vpx/vpx_codec.h"
+
+namespace mozilla {
+
+typedef struct vpx_codec_ctx vpx_codec_ctx_t;
+typedef struct vpx_codec_enc_cfg vpx_codec_enc_cfg_t;
+typedef struct vpx_image vpx_image_t;
+
+class VP8Metadata;
+
+/**
+ * VP8TrackEncoder implements VideoTrackEncoder by using the libvpx library.
+ * We implement a realtime and variable frame rate encoder. In order to achieve
+ * that, there is a frame-drop encoding policy implemented in Encode().
+ */
+class VP8TrackEncoder : public VideoTrackEncoder {
+ enum EncodeOperation {
+ ENCODE_NORMAL_FRAME, // VP8 track encoder works normally.
+ ENCODE_I_FRAME, // The next frame will be encoded as I-Frame.
+ SKIP_FRAME, // Skip the next frame.
+ };
+
+ public:
+ VP8TrackEncoder(RefPtr<DriftCompensator> aDriftCompensator,
+ TrackRate aTrackRate,
+ MediaQueue<EncodedFrame>& aEncodedDataQueue,
+ FrameDroppingMode aFrameDroppingMode,
+ Maybe<float> aKeyFrameIntervalFactor = Nothing());
+ virtual ~VP8TrackEncoder();
+
+ already_AddRefed<TrackMetadataBase> GetMetadata() final;
+
+ protected:
+ nsresult Init(int32_t aWidth, int32_t aHeight, int32_t aDisplayWidth,
+ int32_t aDisplayHeight, float aEstimatedFrameRate) final;
+
+ private:
+ // Initiates the underlying vpx encoder.
+ nsresult InitInternal(int32_t aWidth, int32_t aHeight,
+ int32_t aMaxKeyFrameDistance);
+
+ // Get the EncodeOperation for next target frame.
+ EncodeOperation GetNextEncodeOperation(TimeDuration aTimeElapsed,
+ TimeDuration aProcessedDuration);
+
+ // Extracts the encoded data from the underlying encoder and returns it.
+ // Return value: An EncodedFrame if a frame was extracted.
+ // nullptr if we reached end-of-stream or nothing was available
+ // from the underlying encoder.
+ // An error nsresult otherwise.
+ Result<RefPtr<EncodedFrame>, nsresult> ExtractEncodedData();
+
+ // Takes the data in aSegment, encodes it, extracts it, and pushes it to
+ // mEncodedDataQueue.
+ nsresult Encode(VideoSegment* aSegment) final;
+
+ // Prepare the input data to the mVPXImageWrapper for encoding.
+ nsresult PrepareRawFrame(VideoChunk& aChunk);
+
+ // Re-configures an existing encoder with a new frame size.
+ nsresult Reconfigure(int32_t aWidth, int32_t aHeight,
+ int32_t aMaxKeyFrameDistance);
+
+ // Destroys the context and image wrapper. Does not de-allocate the structs.
+ void Destroy();
+
+ // Helper that calculates the desired max keyframe distance (vp8 config's
+ // max_kf_dist) based on configured key frame interval and recent framerate.
+ // Returns Nothing if not enough input data is available.
+ Maybe<int32_t> CalculateMaxKeyFrameDistance(
+ Maybe<float> aEstimatedFrameRate = Nothing()) const;
+
+ void SetMaxKeyFrameDistance(int32_t aMaxKeyFrameDistance);
+
+ // VP8 Metadata, set on successfuly Init and never modified again.
+ RefPtr<VP8Metadata> mMetadata;
+
+ // The width the encoder is currently configured with. The input frames to the
+ // underlying encoder must match this width, i.e., the underlying encoder will
+ // not do any resampling.
+ int mFrameWidth = 0;
+
+ // The height the encoder is currently configured with. The input frames to
+ // the underlying encoder must match this height, i.e., the underlying encoder
+ // will not do any resampling.
+ int mFrameHeight = 0;
+
+ // Encoded timestamp.
+ TrackTime mEncodedTimestamp = 0;
+
+ // Total duration in mTrackRate extracted from the underlying encoder.
+ CheckedInt64 mExtractedDuration;
+
+ // Total duration extracted from the underlying encoder.
+ media::TimeUnit mExtractedDurationUs;
+
+ // Muted frame, we only create it once.
+ RefPtr<layers::Image> mMuteFrame;
+
+ // I420 frame, for converting to I420.
+ UniquePtr<uint8_t[]> mI420Frame;
+ size_t mI420FrameSize = 0;
+
+ /**
+ * A duration of non-key frames in mTrackRate.
+ */
+ TrackTime mDurationSinceLastKeyframe = 0;
+
+ /**
+ * The max interval at which a keyframe gets forced (causing video quality
+ * degradation). The encoder is configured to encode keyframes more often than
+ * this, though it can vary based on frame rate.
+ */
+ const TimeDuration mKeyFrameInterval;
+
+ /**
+ * A factor used to multiply the estimated key-frame-interval based on
+ * mKeyFrameInterval (ms) with when configuring kf_max_dist in the encoder.
+ * The goal is to set it a bit below 1.0 to avoid falling back to forcing
+ * keyframes.
+ * NB that for purposes of testing the mKeyFrameInterval fallback this may be
+ * set to values higher than 1.0.
+ */
+ float mKeyFrameIntervalFactor;
+
+ /**
+ * Time when we last updated the key-frame-distance.
+ */
+ media::TimeUnit mLastKeyFrameDistanceUpdate;
+
+ /**
+ * The frame duration value last used to configure kf_max_dist.
+ */
+ Maybe<int32_t> mMaxKeyFrameDistance;
+
+ /**
+ * The mean duration of recent frames.
+ */
+ RollingMean<TimeDuration, TimeDuration> mMeanFrameDuration{30};
+
+ /**
+ * The mean wall-clock time it took to encode recent frames.
+ */
+ RollingMean<TimeDuration, TimeDuration> mMeanFrameEncodeDuration{30};
+
+ // VP8 relative members.
+ // Codec context structure.
+ vpx_codec_ctx_t mVPXContext;
+ // Image Descriptor.
+ vpx_image_t mVPXImageWrapper;
+};
+
+} // namespace mozilla
+
+#endif
diff --git a/dom/media/encoder/moz.build b/dom/media/encoder/moz.build
new file mode 100644
index 0000000000..f995ecdc1c
--- /dev/null
+++ b/dom/media/encoder/moz.build
@@ -0,0 +1,42 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+with Files("*"):
+ BUG_COMPONENT = ("Core", "Audio/Video: Recording")
+
+EXPORTS += [
+ "ContainerWriter.h",
+ "EncodedFrame.h",
+ "MediaEncoder.h",
+ "OpusTrackEncoder.h",
+ "TrackEncoder.h",
+ "TrackMetadataBase.h",
+ "VP8TrackEncoder.h",
+]
+
+UNIFIED_SOURCES += [
+ "MediaEncoder.cpp",
+ "Muxer.cpp",
+ "OpusTrackEncoder.cpp",
+ "TrackEncoder.cpp",
+ "VP8TrackEncoder.cpp",
+]
+
+FINAL_LIBRARY = "xul"
+
+LOCAL_INCLUDES += [
+ "/dom/media",
+ "/ipc/chromium/src",
+ "/media/libyuv/libyuv/include",
+]
+
+include("/ipc/chromium/chromium-config.mozbuild")
+
+# Suppress some GCC warnings being treated as errors:
+# - about attributes on forward declarations for types that are already
+# defined, which complains about an important MOZ_EXPORT for android::AString
+if CONFIG["CC_TYPE"] in ("clang", "gcc"):
+ CXXFLAGS += ["-Wno-error=attributes"]