/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at http://mozilla.org/MPL/2.0/. */ #ifndef VideoFrameConverter_h #define VideoFrameConverter_h #include "ImageContainer.h" #include "ImageConversion.h" #include "Pacer.h" #include "PerformanceRecorder.h" #include "VideoSegment.h" #include "nsISupportsImpl.h" #include "nsThreadUtils.h" #include "jsapi/RTCStatsReport.h" #include "mozilla/dom/ImageBitmapBinding.h" #include "mozilla/dom/ImageUtils.h" #include "api/video/video_frame.h" #include "common_video/include/video_frame_buffer_pool.h" #include "common_video/include/video_frame_buffer.h" #include "media/base/adapted_video_track_source.h" // The number of frame buffers VideoFrameConverter may create before returning // errors. // Sometimes these are released synchronously but they can be forwarded all the // way to the encoder for asynchronous encoding. With a pool size of 5, // we allow 1 buffer for the current conversion, and 4 buffers to be queued at // the encoder. #define CONVERTER_BUFFER_POOL_SIZE 5 extern mozilla::LazyLogModule gMediaPipelineLog; #define LOG(level, msg, ...) \ MOZ_LOG(gMediaPipelineLog, level, (msg, ##__VA_ARGS__)) namespace mozilla { enum class FrameDroppingPolicy { Allowed, Disabled, }; // An async video frame format converter. // // Input is typically a MediaTrackListener driven by MediaTrackGraph. // // Output is exposed through rtc::AdaptedVideoTrackSource, which implements // rtc::VideoSourceInterface. template class VideoFrameConverterImpl : public rtc::AdaptedVideoTrackSource { protected: explicit VideoFrameConverterImpl( already_AddRefed aTarget, const dom::RTCStatsTimestampMaker& aTimestampMaker, bool aLockScaling) : mTimestampMaker(aTimestampMaker), mTarget(aTarget), mLockScaling(aLockScaling), mPacer(MakeAndAddRef>( do_AddRef(mTarget), mIdleFrameDuplicationInterval)), mScalingPool(false, CONVERTER_BUFFER_POOL_SIZE), mConversionPool(false, CONVERTER_BUFFER_POOL_SIZE) { MOZ_COUNT_CTOR(VideoFrameConverterImpl); } // AdaptedVideoTrackSource impl -- we don't expect any of these to be called. // They are in libwebrtc because they are used by blink to communicate // properties from a video track source to their libwebrtc integration layer. // We signal this elsewhere. void GenerateKeyFrame() override { MOZ_CRASH("Unexpected VideoFrameConverterImpl::GenerateKeyFrame"); } SourceState state() const override { MOZ_CRASH("Unexpected VideoFrameConverterImpl::state"); } bool remote() const override { MOZ_CRASH("Unexpected VideoFrameConverterImpl::remote"); } bool is_screencast() const override { MOZ_CRASH("Unexpected VideoFrameConverterImpl::is_screencast"); } std::optional needs_denoising() const override { MOZ_CRASH("Unexpected VideoFrameConverterImpl::needs_denoising"); } void RegisterListener() { mPacingListener = mPacer->PacedItemEvent().Connect( mTarget, [self = RefPtr(this)](FrameToProcess&& aFrame, TimeStamp aTime) { self->QueueForProcessing(std::move(aFrame.mImage), aTime, aFrame.mSize, aFrame.mForceBlack); }); } public: using rtc::VideoSourceInterface::AddOrUpdateSink; using rtc::VideoSourceInterface::RemoveSink; void QueueVideoChunk(const VideoChunk& aChunk, bool aForceBlack) { gfx::IntSize size = aChunk.mFrame.GetIntrinsicSize(); if (size.width == 0 || size.height == 0) { return; } TimeStamp t = aChunk.mTimeStamp; MOZ_ASSERT(!t.IsNull()); mPacer->Enqueue( FrameToProcess(aChunk.mFrame.GetImage(), t, size, aForceBlack), t); } /** * An active VideoFrameConverter actively converts queued video frames. * While inactive, we keep track of the frame most recently queued for * processing, so it can be immediately sent out once activated. */ void SetActive(bool aActive) { MOZ_ALWAYS_SUCCEEDS(mTarget->Dispatch(NS_NewRunnableFunction( __func__, [self = RefPtr(this), this, aActive, time = TimeStamp::Now()] { if (mActive == aActive) { return; } LOG(LogLevel::Debug, "VideoFrameConverter %p is now %s", this, aActive ? "active" : "inactive"); mActive = aActive; if (aActive && mLastFrameQueuedForProcessing.Serial() != -2) { // After activating, we re-process the last image that was queued // for processing so it can be immediately sent. The image is reset // so it doesn't get dropped if within the duplicate frame interval. QueueForProcessing(std::move(mLastFrameQueuedForProcessing.mImage), std::max(mLastFrameQueuedForProcessing.mTime + TimeDuration::FromMicroseconds(1), time), mLastFrameQueuedForProcessing.mSize, mLastFrameQueuedForProcessing.mForceBlack); } }))); } void SetTrackEnabled(bool aTrackEnabled) { MOZ_ALWAYS_SUCCEEDS(mTarget->Dispatch(NS_NewRunnableFunction( __func__, [self = RefPtr(this), this, aTrackEnabled, time = TimeStamp::Now()] { if (mTrackEnabled == aTrackEnabled) { return; } LOG(LogLevel::Debug, "VideoFrameConverterImpl %p Track is now %s", this, aTrackEnabled ? "enabled" : "disabled"); mTrackEnabled = aTrackEnabled; if (!aTrackEnabled) { // After disabling we immediately send a frame as black, so it can // be seen quickly, even if no frames are flowing. If no frame has // been queued for processing yet, we use the FrameToProcess default // size (640x480). QueueForProcessing(/* aImage= */ nullptr, std::max(mLastFrameQueuedForProcessing.mTime + TimeDuration::FromMicroseconds(1), time), mLastFrameQueuedForProcessing.mSize, /* aForceBlack= */ true); } }))); } void SetTrackingId(TrackingId aTrackingId) { MOZ_ALWAYS_SUCCEEDS(mTarget->Dispatch(NS_NewRunnableFunction( __func__, [self = RefPtr(this), this, id = std::move(aTrackingId)]() mutable { mTrackingId = Some(std::move(id)); }))); } void SetIdleFrameDuplicationInterval(TimeDuration aInterval) { MOZ_ALWAYS_SUCCEEDS(mTarget->Dispatch(NS_NewRunnableFunction( __func__, [self = RefPtr(this), this, aInterval] { mIdleFrameDuplicationInterval = aInterval; }))); mPacer->SetDuplicationInterval(aInterval); } void Shutdown() { mPacer->Shutdown()->Then( mTarget, __func__, [self = RefPtr(this), this] { mPacingListener.DisconnectIfExists(); mScalingPool.Release(); mConversionPool.Release(); mLastFrameQueuedForProcessing = FrameToProcess(); mLastFrameConverted = Nothing(); }); } protected: struct FrameToProcess { FrameToProcess() = default; FrameToProcess(RefPtr aImage, TimeStamp aTime, gfx::IntSize aSize, bool aForceBlack) : mImage(std::move(aImage)), mTime(aTime), mSize(aSize), mForceBlack(aForceBlack) {} RefPtr mImage; TimeStamp mTime = TimeStamp::Now(); gfx::IntSize mSize = gfx::IntSize(640, 480); bool mForceBlack = false; int32_t Serial() const { if (mForceBlack) { // Set the last-img check to indicate black. // -1 is not a guaranteed invalid serial. See bug 1262134. return -1; } if (!mImage) { // Set the last-img check to indicate reset. // -2 is not a guaranteed invalid serial. See bug 1262134. return -2; } return mImage->GetSerial(); } }; struct FrameConverted { FrameConverted(webrtc::VideoFrame aFrame, gfx::IntSize aOriginalSize, int32_t aSerial) : mFrame(std::move(aFrame)), mOriginalSize(aOriginalSize), mSerial(aSerial) {} webrtc::VideoFrame mFrame; gfx::IntSize mOriginalSize; int32_t mSerial; }; MOZ_COUNTED_DTOR_VIRTUAL(VideoFrameConverterImpl) void VideoFrameConverted(const webrtc::VideoFrame& aVideoFrame, gfx::IntSize aOriginalSize, int32_t aSerial) { MOZ_ASSERT(mTarget->IsOnCurrentThread()); LOG(LogLevel::Verbose, "VideoFrameConverterImpl %p: Converted a frame. Diff from last: %.3fms", this, static_cast(aVideoFrame.timestamp_us() - (mLastFrameConverted ? mLastFrameConverted->mFrame.timestamp_us() : aVideoFrame.timestamp_us())) / 1000); // Check that time doesn't go backwards MOZ_ASSERT_IF(mLastFrameConverted, aVideoFrame.timestamp_us() > mLastFrameConverted->mFrame.timestamp_us()); mLastFrameConverted = Some(FrameConverted(aVideoFrame, aOriginalSize, aSerial)); OnFrame(aVideoFrame); } void QueueForProcessing(RefPtr aImage, TimeStamp aTime, gfx::IntSize aSize, bool aForceBlack) { MOZ_ASSERT(mTarget->IsOnCurrentThread()); FrameToProcess frame{std::move(aImage), aTime, aSize, aForceBlack || !mTrackEnabled}; if (frame.mTime <= mLastFrameQueuedForProcessing.mTime) { LOG(LogLevel::Debug, "VideoFrameConverterImpl %p: Dropping a frame because time did not " "progress (%.3fs)", this, (mLastFrameQueuedForProcessing.mTime - frame.mTime).ToSeconds()); return; } if (frame.Serial() == mLastFrameQueuedForProcessing.Serial()) { // This is the same frame as the last one. We limit the same-frame rate, // and rewrite the time so the frame-gap is in multiples of the // duplication interval. // // The pacer only starts duplicating frames if there is no flow of frames // into it. There are other reasons the same frame could repeat here, and // at a shorter interval than the duplication interval. For instance after // the sender is disabled (SetTrackEnabled) but there is still a flow of // frames into the pacer. All disabled frames have the same serial. if (auto diff = frame.mTime - mLastFrameQueuedForProcessing.mTime; diff >= mIdleFrameDuplicationInterval) { auto diff_us = static_cast(diff.ToMicroseconds()); auto idle_interval_us = static_cast( mIdleFrameDuplicationInterval.ToMicroseconds()); auto multiples = diff_us / idle_interval_us; MOZ_ASSERT(multiples > 0); LOG(LogLevel::Verbose, "VideoFrameConverterImpl %p: Rewrote time interval for a duplicate " "frame from %.3fs to %.3fs", this, (frame.mTime - mLastFrameQueuedForProcessing.mTime).ToSeconds(), (mIdleFrameDuplicationInterval * multiples).ToSeconds()); frame.mTime = mLastFrameQueuedForProcessing.mTime + (mIdleFrameDuplicationInterval * multiples); } else { LOG(LogLevel::Verbose, "VideoFrameConverterImpl %p: Dropping a duplicate frame because " "the duplication interval (%.3fs) hasn't passed (%.3fs)", this, mIdleFrameDuplicationInterval.ToSeconds(), (frame.mTime - mLastFrameQueuedForProcessing.mTime).ToSeconds()); return; } } mLastFrameQueuedForProcessing = std::move(frame); if (!mActive) { LOG(LogLevel::Debug, "VideoFrameConverterImpl %p: Ignoring a frame because we're inactive", this); return; } MOZ_ALWAYS_SUCCEEDS(mTarget->Dispatch(NewRunnableMethod( "VideoFrameConverterImpl::ProcessVideoFrame", this, &VideoFrameConverterImpl::ProcessVideoFrame, mLastFrameQueuedForProcessing))); } void ProcessVideoFrame(const FrameToProcess& aFrame) { MOZ_ASSERT(mTarget->IsOnCurrentThread()); auto convert = [this, &aFrame]() -> rtc::scoped_refptr { rtc::scoped_refptr buffer = mConversionPool.CreateI420Buffer(aFrame.mSize.width, aFrame.mSize.height); if (!buffer) { #ifdef MOZ_DIAGNOSTIC_ASSERT_ENABLED ++mConversionFramesDropped; #endif MOZ_DIAGNOSTIC_ASSERT(mConversionFramesDropped <= 100, "Conversion buffers must be leaking"); LOG(LogLevel::Warning, "VideoFrameConverterImpl %p: Creating a conversion buffer failed", this); return nullptr; } #ifdef MOZ_DIAGNOSTIC_ASSERT_ENABLED mConversionFramesDropped = 0; #endif PerformanceRecorder rec( "VideoFrameConverterImpl::ConvertToI420"_ns, *mTrackingId, buffer->width(), buffer->height()); nsresult rv = ConvertToI420(aFrame.mImage, buffer->MutableDataY(), buffer->StrideY(), buffer->MutableDataU(), buffer->StrideU(), buffer->MutableDataV(), buffer->StrideV(), aFrame.mSize); if (NS_FAILED(rv)) { LOG(LogLevel::Warning, "VideoFrameConverterImpl %p: Image conversion failed", this); return nullptr; } rec.Record(); return buffer; }; auto cropAndScale = [this, &aFrame]( const rtc::scoped_refptr& aSrc, int aCrop_x, int aCrop_y, int aCrop_w, int aCrop_h, int aOut_width, int aOut_height) -> rtc::scoped_refptr { rtc::scoped_refptr buffer = mScalingPool.CreateI420Buffer(aOut_width, aOut_height); if (!buffer) { #ifdef MOZ_DIAGNOSTIC_ASSERT_ENABLED ++mScalingFramesDropped; MOZ_DIAGNOSTIC_ASSERT(mScalingFramesDropped <= 100, "Scaling buffers must be leaking"); #endif LOG(LogLevel::Warning, "VideoFrameConverterImpl %p: Creating a scaling buffer failed", this); return nullptr; } #ifdef MOZ_DIAGNOSTIC_ASSERT_ENABLED mScalingFramesDropped = 0; #endif PerformanceRecorder rec( "VideoFrameConverterImpl::CropAndScale"_ns, *mTrackingId, aSrc->width(), aSrc->height()); LOG(LogLevel::Verbose, "VideoFrameConverterImpl %p: Scaling image %d, %dx%d -> %dx%d", this, aFrame.Serial(), aFrame.mSize.Width(), aFrame.mSize.Height(), aOut_width, aOut_height); buffer->CropAndScaleFrom(*aSrc, aCrop_x, aCrop_y, aCrop_w, aCrop_h); rec.Record(); return buffer; }; const webrtc::Timestamp time = dom::RTCStatsTimestamp::FromMozTime(mTimestampMaker, aFrame.mTime) .ToRealtime(); const bool sameAsLastConverted = mLastFrameConverted && aFrame.Serial() == mLastFrameConverted->mSerial; const gfx::IntSize inSize = sameAsLastConverted ? mLastFrameConverted->mOriginalSize : aFrame.mSize; int crop_x{}, crop_y{}, crop_width{}, crop_height{}, out_width{}, out_height{}; bool keep = AdaptFrame(inSize.Width(), inSize.Height(), time.us(), &out_width, &out_height, &crop_width, &crop_height, &crop_x, &crop_y); if (mLockScaling) { crop_x = crop_y = 0; crop_width = out_width = inSize.Width(); crop_height = out_height = inSize.Height(); } if (out_width == 0 || out_height == 0) { LOG(LogLevel::Verbose, "VideoFrameConverterImpl %p: Skipping a frame because it has no " "pixels", this); OnFrameDropped(); return; } if constexpr (DropPolicy == FrameDroppingPolicy::Allowed) { if (!keep) { LOG(LogLevel::Verbose, "VideoFrameConverterImpl %p: Dropping a frame because of SinkWants", this); // AdaptFrame has already called OnFrameDropped. return; } if (aFrame.mTime < mLastFrameQueuedForProcessing.mTime) { LOG(LogLevel::Verbose, "VideoFrameConverterImpl %p: Dropping a frame that is %.3f seconds " "before latest", this, (mLastFrameQueuedForProcessing.mTime - aFrame.mTime).ToSeconds()); OnFrameDropped(); return; } } if (sameAsLastConverted) { if (out_width == mLastFrameConverted->mFrame.width() && out_height == mLastFrameConverted->mFrame.height()) { // This is the same input frame as last time. Avoid a conversion. LOG(LogLevel::Verbose, "VideoFrameConverterImpl %p: Re-converting last frame %d. " "Re-using with same resolution.", this, aFrame.Serial()); webrtc::VideoFrame frame = mLastFrameConverted->mFrame; frame.set_timestamp_us(time.us()); VideoFrameConverted(frame, mLastFrameConverted->mOriginalSize, mLastFrameConverted->mSerial); return; } } if (aFrame.mForceBlack) { // Send a black image. rtc::scoped_refptr buffer = mScalingPool.CreateI420Buffer(out_width, out_height); if (!buffer) { MOZ_DIAGNOSTIC_CRASH( "Buffers not leaving scope except for " "reconfig, should never leak"); LOG(LogLevel::Warning, "VideoFrameConverterImpl %p: Creating a buffer for a black video " "frame failed", this); OnFrameDropped(); return; } LOG(LogLevel::Verbose, "VideoFrameConverterImpl %p: Sending a black video frame. " "CropAndScale: %dx%d -> %dx%d", this, aFrame.mSize.Width(), aFrame.mSize.Height(), out_width, out_height); webrtc::I420Buffer::SetBlack(buffer.get()); VideoFrameConverted(webrtc::VideoFrame::Builder() .set_video_frame_buffer(buffer) .set_timestamp_us(time.us()) .build(), inSize, aFrame.Serial()); return; } if (!aFrame.mImage) { // Don't send anything for null images. return; } MOZ_ASSERT(aFrame.mImage->GetSize() == aFrame.mSize); rtc::scoped_refptr srcFrame; RefPtr image = aFrame.mImage->AsPlanarYCbCrImage(); if (image) { dom::ImageUtils utils(image); Maybe format = utils.GetFormat(); if (format.isSome() && format.value() == dom::ImageBitmapFormat::YUV420P && image->GetData()) { const layers::PlanarYCbCrData* data = image->GetData(); srcFrame = webrtc::WrapI420Buffer( aFrame.mImage->GetSize().width, aFrame.mImage->GetSize().height, data->mYChannel, data->mYStride, data->mCbChannel, data->mCbCrStride, data->mCrChannel, data->mCbCrStride, [image] { /* keep reference alive*/ }); LOG(LogLevel::Verbose, "VideoFrameConverterImpl %p: Avoiding a conversion for image %d", this, aFrame.Serial()); } } if (!srcFrame) { srcFrame = convert(); } if (!srcFrame) { OnFrameDropped(); return; } if (srcFrame->width() == out_width && srcFrame->height() == out_height) { LOG(LogLevel::Verbose, "VideoFrameConverterImpl %p: Avoiding scaling for image %d, " "Dimensions: %dx%d", this, aFrame.Serial(), out_width, out_height); VideoFrameConverted(webrtc::VideoFrame::Builder() .set_video_frame_buffer(srcFrame) .set_timestamp_us(time.us()) .build(), inSize, aFrame.Serial()); return; } if (rtc::scoped_refptr buffer = cropAndScale(rtc::scoped_refptr(srcFrame), crop_x, crop_y, crop_width, crop_height, out_width, out_height)) { VideoFrameConverted(webrtc::VideoFrame::Builder() .set_video_frame_buffer(buffer) .set_timestamp_us(time.us()) .build(), inSize, aFrame.Serial()); } } public: const dom::RTCStatsTimestampMaker mTimestampMaker; const nsCOMPtr mTarget; const bool mLockScaling; protected: TimeDuration mIdleFrameDuplicationInterval = TimeDuration::Forever(); // Used to pace future frames close to their rendering-time. Thread-safe. const RefPtr> mPacer; // Accessed only from mTarget. MediaEventListener mPacingListener; webrtc::VideoFrameBufferPool mScalingPool; webrtc::VideoFrameBufferPool mConversionPool; FrameToProcess mLastFrameQueuedForProcessing; Maybe mLastFrameConverted; bool mActive = false; bool mTrackEnabled = true; Maybe mTrackingId; #ifdef MOZ_DIAGNOSTIC_ASSERT_ENABLED size_t mConversionFramesDropped = 0; size_t mScalingFramesDropped = 0; #endif }; class VideoFrameConverter : public rtc::RefCountedObject< VideoFrameConverterImpl> { protected: VideoFrameConverter(already_AddRefed aTarget, const dom::RTCStatsTimestampMaker& aTimestampMaker, bool aLockScaling) : rtc::RefCountedObject( std::move(aTarget), aTimestampMaker, aLockScaling) {} public: static already_AddRefed Create( already_AddRefed aTarget, const dom::RTCStatsTimestampMaker& aTimestampMaker, bool aLockScaling) { RefPtr converter = new VideoFrameConverter( std::move(aTarget), aTimestampMaker, aLockScaling); converter->RegisterListener(); return converter.forget(); } }; } // namespace mozilla #undef LOG #endif // VideoFrameConverter_h