/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim:set ts=2 sw=2 sts=2 et cindent: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "FFmpegVideoEncoder.h" #include #include #include "BufferReader.h" #include "EncoderConfig.h" #include "FFmpegLog.h" #include "FFmpegRuntimeLinker.h" #include "FFmpegUtils.h" #include "H264.h" #include "ImageContainer.h" #include "ImageConversion.h" #include "libavutil/error.h" #include "libavutil/pixfmt.h" #include "libyuv.h" #include "mozilla/StaticPrefs_media.h" #include "mozilla/dom/ImageBitmapBinding.h" #include "mozilla/dom/ImageUtils.h" #include "mozilla/dom/VideoFrameBinding.h" #include "nsPrintfCString.h" // The ffmpeg namespace is introduced to avoid the PixelFormat's name conflicts // with MediaDataEncoder::PixelFormat in MediaDataEncoder class scope. namespace ffmpeg { // TODO: WebCodecs' I420A should map to MediaDataEncoder::PixelFormat and then // to AV_PIX_FMT_YUVA420P here. #if LIBAVCODEC_VERSION_MAJOR < 54 using FFmpegPixelFormat = enum PixelFormat; const FFmpegPixelFormat FFMPEG_PIX_FMT_NONE = FFmpegPixelFormat::PIX_FMT_NONE; const FFmpegPixelFormat FFMPEG_PIX_FMT_RGBA = FFmpegPixelFormat::PIX_FMT_RGBA; const FFmpegPixelFormat FFMPEG_PIX_FMT_BGRA = FFmpegPixelFormat::PIX_FMT_BGRA; const FFmpegPixelFormat FFMPEG_PIX_FMT_RGB24 = FFmpegPixelFormat::PIX_FMT_RGB24; const FFmpegPixelFormat FFMPEG_PIX_FMT_BGR24 = FFmpegPixelFormat::PIX_FMT_BGR24; const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV444P = FFmpegPixelFormat::PIX_FMT_YUV444P; const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV422P = FFmpegPixelFormat::PIX_FMT_YUV422P; const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV420P = FFmpegPixelFormat::PIX_FMT_YUV420P; const FFmpegPixelFormat FFMPEG_PIX_FMT_NV12 = FFmpegPixelFormat::PIX_FMT_NV12; const FFmpegPixelFormat FFMPEG_PIX_FMT_NV21 = FFmpegPixelFormat::PIX_FMT_NV21; #else using FFmpegPixelFormat = enum AVPixelFormat; const FFmpegPixelFormat FFMPEG_PIX_FMT_NONE = FFmpegPixelFormat::AV_PIX_FMT_NONE; const FFmpegPixelFormat FFMPEG_PIX_FMT_RGBA = FFmpegPixelFormat::AV_PIX_FMT_RGBA; const FFmpegPixelFormat FFMPEG_PIX_FMT_BGRA = FFmpegPixelFormat::AV_PIX_FMT_BGRA; const FFmpegPixelFormat FFMPEG_PIX_FMT_RGB24 = FFmpegPixelFormat::AV_PIX_FMT_RGB24; const FFmpegPixelFormat FFMPEG_PIX_FMT_BGR24 = FFmpegPixelFormat::AV_PIX_FMT_BGR24; const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV444P = FFmpegPixelFormat::AV_PIX_FMT_YUV444P; const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV422P = FFmpegPixelFormat::AV_PIX_FMT_YUV422P; const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV420P = FFmpegPixelFormat::AV_PIX_FMT_YUV420P; const FFmpegPixelFormat FFMPEG_PIX_FMT_NV12 = FFmpegPixelFormat::AV_PIX_FMT_NV12; const FFmpegPixelFormat FFMPEG_PIX_FMT_NV21 = FFmpegPixelFormat::AV_PIX_FMT_NV21; #endif static const char* GetPixelFormatString(FFmpegPixelFormat aFormat) { switch (aFormat) { case FFMPEG_PIX_FMT_NONE: return "none"; case FFMPEG_PIX_FMT_RGBA: return "packed RGBA 8:8:8:8 (32bpp, RGBARGBA...)"; case FFMPEG_PIX_FMT_BGRA: return "packed BGRA 8:8:8:8 (32bpp, BGRABGRA...)"; case FFMPEG_PIX_FMT_RGB24: return "packed RGB 8:8:8 (24bpp, RGBRGB...)"; case FFMPEG_PIX_FMT_BGR24: return "packed RGB 8:8:8 (24bpp, BGRBGR...)"; case FFMPEG_PIX_FMT_YUV444P: return "planar YUV 4:4:4 (24bpp, 1 Cr & Cb sample per 1x1 Y samples)"; case FFMPEG_PIX_FMT_YUV422P: return "planar YUV 4:2:2 (16bpp, 1 Cr & Cb sample per 2x1 Y samples)"; case FFMPEG_PIX_FMT_YUV420P: return "planar YUV 4:2:0 (12bpp, 1 Cr & Cb sample per 2x2 Y samples)"; case FFMPEG_PIX_FMT_NV12: return "planar YUV 4:2:0 (12bpp, 1 interleaved UV components per 1x1 Y " "samples)"; case FFMPEG_PIX_FMT_NV21: return "planar YUV 4:2:0 (12bpp, 1 interleaved VU components per 1x1 Y " "samples)"; default: break; } MOZ_ASSERT_UNREACHABLE("Unsupported pixel format"); return "unsupported"; } }; // namespace ffmpeg namespace mozilla { struct H264Setting { int mValue; nsCString mString; }; struct H264LiteralSetting { int mValue; nsLiteralCString mString; H264Setting get() const { return {mValue, mString.AsString()}; } }; static constexpr H264LiteralSetting H264Profiles[]{ {FF_PROFILE_H264_BASELINE, "baseline"_ns}, {FF_PROFILE_H264_MAIN, "main"_ns}, {FF_PROFILE_H264_EXTENDED, ""_ns}, {FF_PROFILE_H264_HIGH, "high"_ns}}; static Maybe GetH264Profile(const H264_PROFILE& aProfile) { switch (aProfile) { case H264_PROFILE::H264_PROFILE_UNKNOWN: return Nothing(); case H264_PROFILE::H264_PROFILE_BASE: return Some(H264Profiles[0].get()); case H264_PROFILE::H264_PROFILE_MAIN: return Some(H264Profiles[1].get()); case H264_PROFILE::H264_PROFILE_EXTENDED: return Some(H264Profiles[2].get()); case H264_PROFILE::H264_PROFILE_HIGH: return Some(H264Profiles[3].get()); default: break; } MOZ_ASSERT_UNREACHABLE("undefined profile"); return Nothing(); } static Maybe GetH264Level(const H264_LEVEL& aLevel) { int val = static_cast(aLevel); nsPrintfCString str("%d", val); str.Insert('.', 1); return Some(H264Setting{val, str}); } struct VPXSVCAppendix { uint8_t mLayeringMode; }; struct SVCLayerSettings { using CodecAppendix = Variant; size_t mNumberSpatialLayers; size_t mNumberTemporalLayers; uint8_t mPeriodicity; nsTArray mLayerIds; // libvpx: ts_rate_decimator, libaom: framerate_factor nsTArray mRateDecimators; nsTArray mTargetBitrates; Maybe mCodecAppendix; }; static SVCLayerSettings GetSVCLayerSettings(CodecType aCodec, const ScalabilityMode& aMode, uint32_t aBitPerSec) { // TODO: Apply more sophisticated bitrate allocation, like SvcRateAllocator: // https://searchfox.org/mozilla-central/rev/3bd65516eb9b3a9568806d846ba8c81a9402a885/third_party/libwebrtc/modules/video_coding/svc/svc_rate_allocator.h#26 size_t layers = 0; const uint32_t kbps = aBitPerSec / 1000; // ts_target_bitrate requies kbps. uint8_t periodicity; nsTArray layerIds; nsTArray rateDecimators; nsTArray bitrates; Maybe appendix; if (aMode == ScalabilityMode::L1T2) { // Two temporal layers. 0-1... // // Frame pattern: // Layer 0: |0| |2| |4| |6| |8| // Layer 1: | |1| |3| |5| |7| | layers = 2; // 2 frames per period. periodicity = 2; // Assign layer ids. layerIds.AppendElement(0); layerIds.AppendElement(1); // Set rate decimators. rateDecimators.AppendElement(2); rateDecimators.AppendElement(1); // Bitrate allocation: L0 - 60%, L1 - 40%. bitrates.AppendElement(kbps * 3 / 5); bitrates.AppendElement(kbps); if (aCodec == CodecType::VP8 || aCodec == CodecType::VP9) { appendix.emplace(VPXSVCAppendix{ .mLayeringMode = 2 /* VP9E_TEMPORAL_LAYERING_MODE_0101 */ }); } } else { MOZ_ASSERT(aMode == ScalabilityMode::L1T3); // Three temporal layers. 0-2-1-2... // // Frame pattern: // Layer 0: |0| | | |4| | | |8| | | |12| // Layer 1: | | |2| | | |6| | | |10| | | // Layer 2: | |1| |3| |5| |7| |9| |11| | layers = 3; // 4 frames per period periodicity = 4; // Assign layer ids. layerIds.AppendElement(0); layerIds.AppendElement(2); layerIds.AppendElement(1); layerIds.AppendElement(2); // Set rate decimators. rateDecimators.AppendElement(4); rateDecimators.AppendElement(2); rateDecimators.AppendElement(1); // Bitrate allocation: L0 - 50%, L1 - 20%, L2 - 30%. bitrates.AppendElement(kbps / 2); bitrates.AppendElement(kbps * 7 / 10); bitrates.AppendElement(kbps); if (aCodec == CodecType::VP8 || aCodec == CodecType::VP9) { appendix.emplace(VPXSVCAppendix{ .mLayeringMode = 3 /* VP9E_TEMPORAL_LAYERING_MODE_0212 */ }); } } MOZ_ASSERT(layers == bitrates.Length(), "Bitrate must be assigned to each layer"); return SVCLayerSettings{1, layers, periodicity, std::move(layerIds), std::move(rateDecimators), std::move(bitrates), appendix}; } void FFmpegVideoEncoder::SVCInfo::UpdateTemporalLayerId() { MOZ_ASSERT(!mTemporalLayerIds.IsEmpty()); mCurrentIndex = (mCurrentIndex + 1) % mTemporalLayerIds.Length(); } uint8_t FFmpegVideoEncoder::SVCInfo::CurrentTemporalLayerId() { MOZ_ASSERT(!mTemporalLayerIds.IsEmpty()); return mTemporalLayerIds[mCurrentIndex]; } void FFmpegVideoEncoder::SVCInfo::ResetTemporalLayerId() { MOZ_ASSERT(!mTemporalLayerIds.IsEmpty()); mCurrentIndex = 0; } FFmpegVideoEncoder::FFmpegVideoEncoder( const FFmpegLibWrapper* aLib, AVCodecID aCodecID, const RefPtr& aTaskQueue, const EncoderConfig& aConfig) : FFmpegDataEncoder(aLib, aCodecID, aTaskQueue, aConfig) {} RefPtr FFmpegVideoEncoder::Init() { FFMPEGV_LOG("Init"); return InvokeAsync(mTaskQueue, __func__, [self = RefPtr(this)]() { MediaResult r = self->InitEncoder(); if (NS_FAILED(r.Code())) { FFMPEGV_LOG("%s", r.Description().get()); return InitPromise::CreateAndReject(r, __func__); } return InitPromise::CreateAndResolve(true, __func__); }); } nsCString FFmpegVideoEncoder::GetDescriptionName() const { #ifdef USING_MOZFFVPX return "ffvpx video encoder"_ns; #else const char* lib = # if defined(MOZ_FFMPEG) FFmpegRuntimeLinker::LinkStatusLibraryName(); # else "no library: ffmpeg disabled during build"; # endif return nsPrintfCString("ffmpeg video encoder (%s)", lib); #endif } bool FFmpegVideoEncoder::SvcEnabled() const { return mConfig.mScalabilityMode != ScalabilityMode::None; } MediaResult FFmpegVideoEncoder::InitEncoder() { MOZ_ASSERT(mTaskQueue->IsOnCurrentThread()); ForceEnablingFFmpegDebugLogs(); FFMPEGV_LOG("FFmpegVideoEncoder::InitEncoder"); // Initialize the common members of the encoder instance auto r = AllocateCodecContext(mLib, mCodecID); if (r.isErr()) { return r.inspectErr(); } mCodecContext = r.unwrap(); mCodecName = mCodecContext->codec->name; // And now the video-specific part mCodecContext->pix_fmt = ffmpeg::FFMPEG_PIX_FMT_YUV420P; // // TODO: do this properly, based on the colorspace of the frame. Setting // this like that crashes encoders. if (mConfig.mCodec != CodecType::AV1) { // if (mConfig.mPixelFormat == dom::ImageBitmapFormat::RGBA32 || // mConfig.mPixelFormat == dom::ImageBitmapFormat::BGRA32) { // mCodecContext->color_primaries = AVCOL_PRI_BT709; // mCodecContext->colorspace = AVCOL_SPC_RGB; // #ifdef FFVPX_VERSION // mCodecContext->color_trc = AVCOL_TRC_IEC61966_2_1; // #endif // } else { // mCodecContext->color_primaries = AVCOL_PRI_BT709; // mCodecContext->colorspace = AVCOL_SPC_BT709; // mCodecContext->color_trc = AVCOL_TRC_BT709; // } // } mCodecContext->width = static_cast(mConfig.mSize.width); mCodecContext->height = static_cast(mConfig.mSize.height); // Reasonnable default for the quantization range. mCodecContext->qmin = static_cast(StaticPrefs::media_ffmpeg_encoder_quantizer_min()); mCodecContext->qmax = static_cast(StaticPrefs::media_ffmpeg_encoder_quantizer_max()); if (mConfig.mUsage == Usage::Realtime) { mCodecContext->thread_count = 1; } else { int64_t pixels = mCodecContext->width * mCodecContext->height; int threads = 1; // Select a thread count that depends on the frame size, and cap to the // number of available threads minus one if (pixels >= 3840 * 2160) { threads = 16; } else if (pixels >= 1920 * 1080) { threads = 8; } else if (pixels >= 1280 * 720) { threads = 4; } else if (pixels >= 640 * 480) { threads = 2; } mCodecContext->thread_count = std::clamp(threads, 1, GetNumberOfProcessors() - 1); } // TODO(bug 1869560): The recommended time_base is the reciprocal of the frame // rate, but we set it to microsecond for now. mCodecContext->time_base = AVRational{.num = 1, .den = static_cast(USECS_PER_S)}; #if LIBAVCODEC_VERSION_MAJOR >= 57 // Note that sometimes framerate can be zero (from webcodecs). mCodecContext->framerate = AVRational{.num = static_cast(mConfig.mFramerate), .den = 1}; #endif #if LIBAVCODEC_VERSION_MAJOR >= 60 mCodecContext->flags |= AV_CODEC_FLAG_FRAME_DURATION; #endif // Setting 0 here disable inter-frames: all frames are keyframes mCodecContext->gop_size = mConfig.mKeyframeInterval ? static_cast(mConfig.mKeyframeInterval) : 10000; mCodecContext->keyint_min = 0; // When either real-time or SVC is enabled via config, the general settings of // the encoder are set to be more appropriate for real-time usage if (mConfig.mUsage == Usage::Realtime || SvcEnabled()) { if (mConfig.mUsage != Usage::Realtime) { FFMPEGV_LOG( "SVC enabled but low latency encoding mode not enabled, forcing low " "latency mode"); } mLib->av_opt_set(mCodecContext->priv_data, "deadline", "realtime", 0); // Explicitly ask encoder do not keep in flight at any one time for // lookahead purposes. mLib->av_opt_set(mCodecContext->priv_data, "lag-in-frames", "0", 0); if (mConfig.mCodec == CodecType::VP8 || mConfig.mCodec == CodecType::VP9) { mLib->av_opt_set(mCodecContext->priv_data, "error-resilient", "1", 0); } if (mConfig.mCodec == CodecType::AV1) { mLib->av_opt_set(mCodecContext->priv_data, "error-resilience", "1", 0); // This sets usage to AOM_USAGE_REALTIME mLib->av_opt_set(mCodecContext->priv_data, "usage", "1", 0); // Allow the bitrate to swing 50% up and down the target mLib->av_opt_set(mCodecContext->priv_data, "rc_undershoot_percent", "50", 0); mLib->av_opt_set(mCodecContext->priv_data, "rc_overshoot_percent", "50", 0); // Row multithreading -- note that we do single threaded encoding for now, // so this doesn't do much mLib->av_opt_set(mCodecContext->priv_data, "row_mt", "1", 0); // Cyclic refresh adaptive quantization mLib->av_opt_set(mCodecContext->priv_data, "aq-mode", "3", 0); // optimized for real-time, 7 for regular, lower: more cpu use -> higher // compression ratio mLib->av_opt_set(mCodecContext->priv_data, "cpu-used", "9", 0); // disable, this is to handle camera motion, unlikely for our use case mLib->av_opt_set(mCodecContext->priv_data, "enable-global-motion", "0", 0); mLib->av_opt_set(mCodecContext->priv_data, "enable-cfl-intra", "0", 0); // TODO: Set a number of tiles appropriate for the number of threads used // -- disable tiling if using a single thread. mLib->av_opt_set(mCodecContext->priv_data, "tile-columns", "0", 0); mLib->av_opt_set(mCodecContext->priv_data, "tile-rows", "0", 0); } } else { if (mConfig.mCodec == CodecType::AV1) { mLib->av_opt_set_int( mCodecContext->priv_data, "cpu-used", static_cast(StaticPrefs::media_ffmpeg_encoder_cpu_used()), 0); } } if (SvcEnabled()) { if (Maybe settings = GetSVCSettings()) { if (mCodecName == "libaom-av1") { if (mConfig.mBitrateMode != BitrateMode::Constant) { return MediaResult(NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR, "AV1 with SVC only supports constant bitrate"_ns); } } SVCSettings s = settings.extract(); FFMPEGV_LOG("SVC options string: %s=%s", s.mSettingKeyValue.first.get(), s.mSettingKeyValue.second.get()); mLib->av_opt_set(mCodecContext->priv_data, s.mSettingKeyValue.first.get(), s.mSettingKeyValue.second.get(), 0); // FFmpegVideoEncoder is reset after Drain(), so mSVCInfo should be // reset() before emplace(). mSVCInfo.reset(); mSVCInfo.emplace(std::move(s.mTemporalLayerIds)); // TODO: layer settings should be changed dynamically when the frame's // color space changed. } } nsAutoCString h264Log; if (mConfig.mCodecSpecific && mConfig.mCodecSpecific->is()) { // TODO: Set profile, level, avcc/annexb for openh264 and others. if (mCodecName == "libx264") { const H264Specific& h264Specific = mConfig.mCodecSpecific->as(); H264Settings s = GetH264Settings(h264Specific); mCodecContext->profile = s.mProfile; mCodecContext->level = s.mLevel; for (const auto& pair : s.mSettingKeyValuePairs) { mLib->av_opt_set(mCodecContext->priv_data, pair.first.get(), pair.second.get(), 0); } // Log the settings. // When using profile other than EXTENDED, the profile string is in the // first element of mSettingKeyValuePairs, while EXTENDED profile has no // profile string. MOZ_ASSERT_IF( s.mSettingKeyValuePairs.Length() != 3, h264Specific.mProfile == H264_PROFILE::H264_PROFILE_EXTENDED); const char* profileStr = s.mSettingKeyValuePairs.Length() == 3 ? s.mSettingKeyValuePairs[0].second.get() : "extended"; const char* levelStr = s.mSettingKeyValuePairs.Length() == 3 ? s.mSettingKeyValuePairs[1].second.get() : s.mSettingKeyValuePairs[0].second.get(); const char* formatStr = h264Specific.mFormat == H264BitStreamFormat::AVC ? "AVCC" : "AnnexB"; h264Log.AppendPrintf(", H264: profile - %d (%s), level %d (%s), %s", mCodecContext->profile, profileStr, mCodecContext->level, levelStr, formatStr); } } // - if mConfig.mDenoising is set: av_opt_set_int(mCodecContext->priv_data, // "noise_sensitivity", x, 0), where the x is from 0(disabled) to 6. // - if mConfig.mAdaptiveQp is set: av_opt_set_int(mCodecContext->priv_data, // "aq_mode", x, 0), where x is from 0 to 3: 0 - Disabled, 1 - Variance // AQ(default), 2 - Complexity AQ, 3 - Cycle AQ. // Our old version of libaom-av1 is considered experimental by the recent // ffmpeg we use. Allow experimental codecs for now until we decide on an AV1 // encoder. mCodecContext->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL; SetContextBitrate(); AVDictionary* options = nullptr; if (int ret = OpenCodecContext(mCodecContext->codec, &options); ret < 0) { return MediaResult( NS_ERROR_DOM_MEDIA_FATAL_ERR, RESULT_DETAIL("failed to open %s avcodec: %s", mCodecName.get(), MakeErrorString(mLib, ret).get())); } mLib->av_dict_free(&options); FFMPEGV_LOG( "%s has been initialized with format: %s, bitrate: %" PRIi64 ", width: %d, height: %d, quantizer: [%d, %d], time_base: %d/%d%s", mCodecName.get(), ffmpeg::GetPixelFormatString(mCodecContext->pix_fmt), static_cast(mCodecContext->bit_rate), mCodecContext->width, mCodecContext->height, mCodecContext->qmin, mCodecContext->qmax, mCodecContext->time_base.num, mCodecContext->time_base.den, h264Log.IsEmpty() ? "" : h264Log.get()); return NS_OK; } // avcodec_send_frame and avcodec_receive_packet were introduced in version 58. #if LIBAVCODEC_VERSION_MAJOR >= 58 Result FFmpegVideoEncoder< LIBAV_VER>::EncodeInputWithModernAPIs(RefPtr aSample) { MOZ_ASSERT(mTaskQueue->IsOnCurrentThread()); MOZ_ASSERT(mCodecContext); MOZ_ASSERT(aSample); RefPtr sample(aSample->As()); // Validate input. if (!sample->mImage) { return Err(MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, "No image"_ns)); } if (sample->mImage->GetSize().IsEmpty()) { return Err(MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, "image width or height is invalid"_ns)); } // Allocate AVFrame. if (!PrepareFrame()) { return Err( MediaResult(NS_ERROR_OUT_OF_MEMORY, "failed to allocate frame"_ns)); } // Set AVFrame properties for its internal data allocation. For now, we always // convert into ffmpeg's buffer. mFrame->format = ffmpeg::FFMPEG_PIX_FMT_YUV420P; mFrame->width = static_cast(mConfig.mSize.width); mFrame->height = static_cast(mConfig.mSize.height); mFrame->pict_type = sample->mKeyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_NONE; // Allocate AVFrame data. if (int ret = mLib->av_frame_get_buffer(mFrame, 0); ret < 0) { return Err(MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, RESULT_DETAIL("failed to allocate frame data: %s", MakeErrorString(mLib, ret).get()))); } // Make sure AVFrame is writable. if (int ret = mLib->av_frame_make_writable(mFrame); ret < 0) { return Err(MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, RESULT_DETAIL("failed to make frame writable: %s", MakeErrorString(mLib, ret).get()))); } MediaResult rv = ConvertToI420( sample->mImage, mFrame->data[0], mFrame->linesize[0], mFrame->data[1], mFrame->linesize[1], mFrame->data[2], mFrame->linesize[2], mConfig.mSize); if (NS_FAILED(rv)) { return Err(MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, "failed to convert format to I420"_ns)); } // Set presentation timestamp and duration of the AVFrame. The unit of pts is // time_base. // TODO(bug 1869560): The recommended time_base is the reciprocal of the frame // rate, but we set it to microsecond for now. # if LIBAVCODEC_VERSION_MAJOR >= 59 mFrame->time_base = AVRational{.num = 1, .den = static_cast(USECS_PER_S)}; # endif // Provide fake pts, see header file. if (mConfig.mCodec == CodecType::AV1) { mFrame->pts = mFakePts; mPtsMap.Insert(mFakePts, aSample->mTime.ToMicroseconds()); mFakePts += aSample->mDuration.ToMicroseconds(); mCurrentFramePts = aSample->mTime.ToMicroseconds(); } else { mFrame->pts = aSample->mTime.ToMicroseconds(); } # if LIBAVCODEC_VERSION_MAJOR >= 60 mFrame->duration = aSample->mDuration.ToMicroseconds(); # else // Save duration in the time_base unit. mDurationMap.Insert(mFrame->pts, aSample->mDuration.ToMicroseconds()); # endif Duration(mFrame) = aSample->mDuration.ToMicroseconds(); AVDictionary* dict = nullptr; // VP8/VP9 use a mode that handles the temporal layer id sequence internally, // and don't require setting explicitly setting the metadata. Other codecs // such as AV1 via libaom however requires manual frame tagging. if (SvcEnabled() && mConfig.mCodec != CodecType::VP8 && mConfig.mCodec != CodecType::VP9) { if (aSample->mKeyframe) { FFMPEGV_LOG("Key frame requested, reseting temporal layer id"); mSVCInfo->ResetTemporalLayerId(); } nsPrintfCString str("%d", mSVCInfo->CurrentTemporalLayerId()); mLib->av_dict_set(&dict, "temporal_id", str.get(), 0); mFrame->metadata = dict; } // Now send the AVFrame to ffmpeg for encoding, same code for audio and video. return FFmpegDataEncoder::EncodeWithModernAPIs(); } #endif // if LIBAVCODEC_VERSION_MAJOR >= 58 Result, MediaResult> FFmpegVideoEncoder::ToMediaRawData(AVPacket* aPacket) { MOZ_ASSERT(mTaskQueue->IsOnCurrentThread()); MOZ_ASSERT(aPacket); auto creationResult = CreateMediaRawData(aPacket); if (creationResult.isErr()) { return Err(creationResult.unwrapErr()); } RefPtr data = creationResult.unwrap(); data->mKeyframe = (aPacket->flags & AV_PKT_FLAG_KEY) != 0; auto extradataResult = GetExtraData(aPacket); if (extradataResult.isOk()) { data->mExtraData = extradataResult.unwrap(); } else if (extradataResult.isErr()) { MediaResult e = extradataResult.unwrapErr(); if (e.Code() != NS_ERROR_NOT_AVAILABLE && e.Code() != NS_ERROR_NOT_IMPLEMENTED) { return Err(e); } FFMPEGV_LOG("GetExtraData failed with %s, but we can ignore it for now", e.Description().get()); } // TODO(bug 1869560): The unit of pts, dts, and duration is time_base, which // is recommended to be the reciprocal of the frame rate, but we set it to // microsecond for now. data->mTime = media::TimeUnit::FromMicroseconds(aPacket->pts); #if LIBAVCODEC_VERSION_MAJOR >= 60 data->mDuration = media::TimeUnit::FromMicroseconds(aPacket->duration); #else int64_t duration; if (mDurationMap.Find(aPacket->pts, duration)) { data->mDuration = media::TimeUnit::FromMicroseconds(duration); } else { data->mDuration = media::TimeUnit::FromMicroseconds(aPacket->duration); } #endif data->mTimecode = media::TimeUnit::FromMicroseconds(aPacket->dts); if (mConfig.mCodec == CodecType::AV1) { auto found = mPtsMap.Take(aPacket->pts); data->mTime = media::TimeUnit::FromMicroseconds(found.value()); } if (mSVCInfo) { if (data->mKeyframe) { FFMPEGV_LOG( "Encoded packet is key frame, reseting temporal layer id sequence"); mSVCInfo->ResetTemporalLayerId(); } uint8_t temporalLayerId = mSVCInfo->CurrentTemporalLayerId(); data->mTemporalLayerId.emplace(temporalLayerId); mSVCInfo->UpdateTemporalLayerId(); } return data; } Result, MediaResult> FFmpegVideoEncoder::GetExtraData(AVPacket* aPacket) { MOZ_ASSERT(mTaskQueue->IsOnCurrentThread()); MOZ_ASSERT(aPacket); // H264 Extra data comes with the key frame and we only extract it when // encoding into AVCC format. if (mCodecID != AV_CODEC_ID_H264 || !mConfig.mCodecSpecific || !mConfig.mCodecSpecific->is() || mConfig.mCodecSpecific->as().mFormat != H264BitStreamFormat::AVC || !(aPacket->flags & AV_PKT_FLAG_KEY)) { return Err( MediaResult(NS_ERROR_NOT_AVAILABLE, "No available extra data"_ns)); } if (mCodecName != "libx264") { return Err(MediaResult( NS_ERROR_NOT_IMPLEMENTED, RESULT_DETAIL( "Get extra data from codec %s has not been implemented yet", mCodecName.get()))); } bool useGlobalHeader = #if LIBAVCODEC_VERSION_MAJOR >= 57 mCodecContext->flags & AV_CODEC_FLAG_GLOBAL_HEADER; #else false; #endif Span buf; if (useGlobalHeader) { buf = Span(mCodecContext->extradata, static_cast(mCodecContext->extradata_size)); } else { buf = Span(aPacket->data, static_cast(aPacket->size)); } if (buf.empty()) { return Err(MediaResult(NS_ERROR_UNEXPECTED, "fail to get H264 AVCC header in key frame!"_ns)); } BufferReader reader(buf); // The first part is sps. uint32_t spsSize; MOZ_TRY_VAR(spsSize, reader.ReadU32()); Span spsData; MOZ_TRY_VAR(spsData, reader.ReadSpan(static_cast(spsSize))); // The second part is pps. uint32_t ppsSize; MOZ_TRY_VAR(ppsSize, reader.ReadU32()); Span ppsData; MOZ_TRY_VAR(ppsData, reader.ReadSpan(static_cast(ppsSize))); // Ensure we have profile, constraints and level needed to create the extra // data. if (spsData.Length() < 4) { return Err(MediaResult(NS_ERROR_UNEXPECTED, "spsData is too short"_ns)); } FFMPEGV_LOG( "Generate extra data: profile - %u, constraints: %u, level: %u for pts @ " "%" PRId64, spsData[1], spsData[2], spsData[3], aPacket->pts); // Create extra data. auto extraData = MakeRefPtr(); H264::WriteExtraData(extraData, spsData[1], spsData[2], spsData[3], spsData, ppsData); MOZ_ASSERT(extraData); return extraData.forget(); } void FFmpegVideoEncoder::ForceEnablingFFmpegDebugLogs() { #if DEBUG if (!getenv("MOZ_AV_LOG_LEVEL") && MOZ_LOG_TEST(sFFmpegVideoLog, LogLevel::Debug)) { mLib->av_log_set_level(AV_LOG_DEBUG); } #endif // DEBUG } Maybe::SVCSettings> FFmpegVideoEncoder::GetSVCSettings() { MOZ_ASSERT(!mCodecName.IsEmpty()); MOZ_ASSERT(SvcEnabled()); CodecType codecType = CodecType::Unknown; if (mCodecName == "libvpx") { codecType = CodecType::VP8; } else if (mCodecName == "libvpx-vp9") { codecType = CodecType::VP9; } else if (mCodecName == "libaom-av1") { codecType = CodecType::AV1; } if (codecType == CodecType::Unknown) { FFMPEGV_LOG("SVC setting is not implemented for %s codec", mCodecName.get()); return Nothing(); } SVCLayerSettings svc = GetSVCLayerSettings( codecType, mConfig.mScalabilityMode, mConfig.mBitrate); nsAutoCString name; nsAutoCString parameters; if (codecType == CodecType::VP8 || codecType == CodecType::VP9) { // Check if the number of temporal layers in codec specific settings // matches // the number of layers for the given scalability mode. if (mConfig.mCodecSpecific) { if (mConfig.mCodecSpecific->is()) { MOZ_ASSERT( mConfig.mCodecSpecific->as().mNumTemporalLayers == svc.mNumberTemporalLayers); } else if (mConfig.mCodecSpecific->is()) { MOZ_ASSERT( mConfig.mCodecSpecific->as().mNumTemporalLayers == svc.mNumberTemporalLayers); } } // Form an SVC setting string for libvpx. name = "ts-parameters"_ns; parameters.Append("ts_target_bitrate="); for (size_t i = 0; i < svc.mTargetBitrates.Length(); ++i) { if (i > 0) { parameters.Append(","); } parameters.AppendPrintf("%d", svc.mTargetBitrates[i]); } parameters.AppendPrintf( ":ts_layering_mode=%u", svc.mCodecAppendix->as().mLayeringMode); } if (codecType == CodecType::AV1) { // Form an SVC setting string for libaom. name = "svc-parameters"_ns; parameters.AppendPrintf("number_spatial_layers=%zu", svc.mNumberSpatialLayers); parameters.AppendPrintf(":number_temporal_layers=%zu", svc.mNumberTemporalLayers); parameters.Append(":framerate_factor="); for (size_t i = 0; i < svc.mRateDecimators.Length(); ++i) { if (i > 0) { parameters.Append(","); } parameters.AppendPrintf("%d", svc.mRateDecimators[i]); } parameters.Append(":layer_target_bitrate="); for (size_t i = 0; i < svc.mTargetBitrates.Length(); ++i) { if (i > 0) { parameters.Append(","); } parameters.AppendPrintf("%d", svc.mTargetBitrates[i]); } } return Some( SVCSettings{std::move(svc.mLayerIds), std::make_pair(std::move(name), std::move(parameters))}); } FFmpegVideoEncoder::H264Settings FFmpegVideoEncoder< LIBAV_VER>::GetH264Settings(const H264Specific& aH264Specific) { MOZ_ASSERT(mCodecName == "libx264", "GetH264Settings is libx264-only for now"); nsTArray> keyValuePairs; Maybe profile = GetH264Profile(aH264Specific.mProfile); MOZ_RELEASE_ASSERT(profile.isSome()); if (!profile->mString.IsEmpty()) { keyValuePairs.AppendElement(std::make_pair("profile"_ns, profile->mString)); } else { MOZ_RELEASE_ASSERT(aH264Specific.mProfile == H264_PROFILE::H264_PROFILE_EXTENDED); } Maybe level = GetH264Level(aH264Specific.mLevel); MOZ_RELEASE_ASSERT(level.isSome()); MOZ_RELEASE_ASSERT(!level->mString.IsEmpty()); keyValuePairs.AppendElement(std::make_pair("level"_ns, level->mString)); // Set format: libx264's default format is annexb. if (aH264Specific.mFormat == H264BitStreamFormat::AVC) { keyValuePairs.AppendElement(std::make_pair("x264-params"_ns, "annexb=0")); // mCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER // if we don't want to append SPS/PPS data in all keyframe // (LIBAVCODEC_VERSION_MAJOR >= 57 only). } else { // Set annexb explicitly even if it's default format. keyValuePairs.AppendElement(std::make_pair("x264-params"_ns, "annexb=1")); } return H264Settings{.mProfile = profile->mValue, .mLevel = level->mValue, .mSettingKeyValuePairs = std::move(keyValuePairs)}; } } // namespace mozilla