907 lines
33 KiB
C++
907 lines
33 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* vim:set ts=2 sw=2 sts=2 et cindent: */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#include "FFmpegVideoEncoder.h"
|
|
|
|
#include <algorithm>
|
|
|
|
#include <aom/aomcx.h>
|
|
|
|
#include "BufferReader.h"
|
|
#include "EncoderConfig.h"
|
|
#include "FFmpegLog.h"
|
|
#include "FFmpegRuntimeLinker.h"
|
|
#include "FFmpegUtils.h"
|
|
#include "H264.h"
|
|
#include "ImageContainer.h"
|
|
#include "ImageConversion.h"
|
|
#include "libavutil/error.h"
|
|
#include "libavutil/pixfmt.h"
|
|
#include "libyuv.h"
|
|
#include "mozilla/StaticPrefs_media.h"
|
|
#include "mozilla/dom/ImageBitmapBinding.h"
|
|
#include "mozilla/dom/ImageUtils.h"
|
|
#include "mozilla/dom/VideoFrameBinding.h"
|
|
#include "nsPrintfCString.h"
|
|
|
|
// The ffmpeg namespace is introduced to avoid the PixelFormat's name conflicts
|
|
// with MediaDataEncoder::PixelFormat in MediaDataEncoder class scope.
|
|
namespace ffmpeg {
|
|
|
|
// TODO: WebCodecs' I420A should map to MediaDataEncoder::PixelFormat and then
|
|
// to AV_PIX_FMT_YUVA420P here.
|
|
#if LIBAVCODEC_VERSION_MAJOR < 54
|
|
using FFmpegPixelFormat = enum PixelFormat;
|
|
const FFmpegPixelFormat FFMPEG_PIX_FMT_NONE = FFmpegPixelFormat::PIX_FMT_NONE;
|
|
const FFmpegPixelFormat FFMPEG_PIX_FMT_RGBA = FFmpegPixelFormat::PIX_FMT_RGBA;
|
|
const FFmpegPixelFormat FFMPEG_PIX_FMT_BGRA = FFmpegPixelFormat::PIX_FMT_BGRA;
|
|
const FFmpegPixelFormat FFMPEG_PIX_FMT_RGB24 = FFmpegPixelFormat::PIX_FMT_RGB24;
|
|
const FFmpegPixelFormat FFMPEG_PIX_FMT_BGR24 = FFmpegPixelFormat::PIX_FMT_BGR24;
|
|
const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV444P =
|
|
FFmpegPixelFormat::PIX_FMT_YUV444P;
|
|
const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV422P =
|
|
FFmpegPixelFormat::PIX_FMT_YUV422P;
|
|
const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV420P =
|
|
FFmpegPixelFormat::PIX_FMT_YUV420P;
|
|
const FFmpegPixelFormat FFMPEG_PIX_FMT_NV12 = FFmpegPixelFormat::PIX_FMT_NV12;
|
|
const FFmpegPixelFormat FFMPEG_PIX_FMT_NV21 = FFmpegPixelFormat::PIX_FMT_NV21;
|
|
#else
|
|
using FFmpegPixelFormat = enum AVPixelFormat;
|
|
const FFmpegPixelFormat FFMPEG_PIX_FMT_NONE =
|
|
FFmpegPixelFormat::AV_PIX_FMT_NONE;
|
|
const FFmpegPixelFormat FFMPEG_PIX_FMT_RGBA =
|
|
FFmpegPixelFormat::AV_PIX_FMT_RGBA;
|
|
const FFmpegPixelFormat FFMPEG_PIX_FMT_BGRA =
|
|
FFmpegPixelFormat::AV_PIX_FMT_BGRA;
|
|
const FFmpegPixelFormat FFMPEG_PIX_FMT_RGB24 =
|
|
FFmpegPixelFormat::AV_PIX_FMT_RGB24;
|
|
const FFmpegPixelFormat FFMPEG_PIX_FMT_BGR24 =
|
|
FFmpegPixelFormat::AV_PIX_FMT_BGR24;
|
|
const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV444P =
|
|
FFmpegPixelFormat::AV_PIX_FMT_YUV444P;
|
|
const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV422P =
|
|
FFmpegPixelFormat::AV_PIX_FMT_YUV422P;
|
|
const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV420P =
|
|
FFmpegPixelFormat::AV_PIX_FMT_YUV420P;
|
|
const FFmpegPixelFormat FFMPEG_PIX_FMT_NV12 =
|
|
FFmpegPixelFormat::AV_PIX_FMT_NV12;
|
|
const FFmpegPixelFormat FFMPEG_PIX_FMT_NV21 =
|
|
FFmpegPixelFormat::AV_PIX_FMT_NV21;
|
|
#endif
|
|
|
|
static const char* GetPixelFormatString(FFmpegPixelFormat aFormat) {
|
|
switch (aFormat) {
|
|
case FFMPEG_PIX_FMT_NONE:
|
|
return "none";
|
|
case FFMPEG_PIX_FMT_RGBA:
|
|
return "packed RGBA 8:8:8:8 (32bpp, RGBARGBA...)";
|
|
case FFMPEG_PIX_FMT_BGRA:
|
|
return "packed BGRA 8:8:8:8 (32bpp, BGRABGRA...)";
|
|
case FFMPEG_PIX_FMT_RGB24:
|
|
return "packed RGB 8:8:8 (24bpp, RGBRGB...)";
|
|
case FFMPEG_PIX_FMT_BGR24:
|
|
return "packed RGB 8:8:8 (24bpp, BGRBGR...)";
|
|
case FFMPEG_PIX_FMT_YUV444P:
|
|
return "planar YUV 4:4:4 (24bpp, 1 Cr & Cb sample per 1x1 Y samples)";
|
|
case FFMPEG_PIX_FMT_YUV422P:
|
|
return "planar YUV 4:2:2 (16bpp, 1 Cr & Cb sample per 2x1 Y samples)";
|
|
case FFMPEG_PIX_FMT_YUV420P:
|
|
return "planar YUV 4:2:0 (12bpp, 1 Cr & Cb sample per 2x2 Y samples)";
|
|
case FFMPEG_PIX_FMT_NV12:
|
|
return "planar YUV 4:2:0 (12bpp, 1 interleaved UV components per 1x1 Y "
|
|
"samples)";
|
|
case FFMPEG_PIX_FMT_NV21:
|
|
return "planar YUV 4:2:0 (12bpp, 1 interleaved VU components per 1x1 Y "
|
|
"samples)";
|
|
default:
|
|
break;
|
|
}
|
|
MOZ_ASSERT_UNREACHABLE("Unsupported pixel format");
|
|
return "unsupported";
|
|
}
|
|
|
|
}; // namespace ffmpeg
|
|
|
|
namespace mozilla {
|
|
|
|
struct H264Setting {
|
|
int mValue;
|
|
nsCString mString;
|
|
};
|
|
|
|
struct H264LiteralSetting {
|
|
int mValue;
|
|
nsLiteralCString mString;
|
|
H264Setting get() const { return {mValue, mString.AsString()}; }
|
|
};
|
|
|
|
static constexpr H264LiteralSetting H264Profiles[]{
|
|
{FF_PROFILE_H264_BASELINE, "baseline"_ns},
|
|
{FF_PROFILE_H264_MAIN, "main"_ns},
|
|
{FF_PROFILE_H264_EXTENDED, ""_ns},
|
|
{FF_PROFILE_H264_HIGH, "high"_ns}};
|
|
|
|
static Maybe<H264Setting> GetH264Profile(const H264_PROFILE& aProfile) {
|
|
switch (aProfile) {
|
|
case H264_PROFILE::H264_PROFILE_UNKNOWN:
|
|
return Nothing();
|
|
case H264_PROFILE::H264_PROFILE_BASE:
|
|
return Some(H264Profiles[0].get());
|
|
case H264_PROFILE::H264_PROFILE_MAIN:
|
|
return Some(H264Profiles[1].get());
|
|
case H264_PROFILE::H264_PROFILE_EXTENDED:
|
|
return Some(H264Profiles[2].get());
|
|
case H264_PROFILE::H264_PROFILE_HIGH:
|
|
return Some(H264Profiles[3].get());
|
|
default:
|
|
break;
|
|
}
|
|
MOZ_ASSERT_UNREACHABLE("undefined profile");
|
|
return Nothing();
|
|
}
|
|
|
|
static Maybe<H264Setting> GetH264Level(const H264_LEVEL& aLevel) {
|
|
int val = static_cast<int>(aLevel);
|
|
nsPrintfCString str("%d", val);
|
|
str.Insert('.', 1);
|
|
return Some(H264Setting{val, str});
|
|
}
|
|
|
|
struct VPXSVCAppendix {
|
|
uint8_t mLayeringMode;
|
|
};
|
|
|
|
struct SVCLayerSettings {
|
|
using CodecAppendix = Variant<VPXSVCAppendix, aom_svc_params_t>;
|
|
size_t mNumberSpatialLayers;
|
|
size_t mNumberTemporalLayers;
|
|
uint8_t mPeriodicity;
|
|
nsTArray<uint8_t> mLayerIds;
|
|
// libvpx: ts_rate_decimator, libaom: framerate_factor
|
|
nsTArray<uint8_t> mRateDecimators;
|
|
nsTArray<uint32_t> mTargetBitrates;
|
|
Maybe<CodecAppendix> mCodecAppendix;
|
|
};
|
|
|
|
static SVCLayerSettings GetSVCLayerSettings(CodecType aCodec,
|
|
const ScalabilityMode& aMode,
|
|
uint32_t aBitPerSec) {
|
|
// TODO: Apply more sophisticated bitrate allocation, like SvcRateAllocator:
|
|
// https://searchfox.org/mozilla-central/rev/3bd65516eb9b3a9568806d846ba8c81a9402a885/third_party/libwebrtc/modules/video_coding/svc/svc_rate_allocator.h#26
|
|
|
|
size_t layers = 0;
|
|
const uint32_t kbps = aBitPerSec / 1000; // ts_target_bitrate requies kbps.
|
|
|
|
uint8_t periodicity;
|
|
nsTArray<uint8_t> layerIds;
|
|
nsTArray<uint8_t> rateDecimators;
|
|
nsTArray<uint32_t> bitrates;
|
|
|
|
Maybe<SVCLayerSettings::CodecAppendix> appendix;
|
|
|
|
if (aMode == ScalabilityMode::L1T2) {
|
|
// Two temporal layers. 0-1...
|
|
//
|
|
// Frame pattern:
|
|
// Layer 0: |0| |2| |4| |6| |8|
|
|
// Layer 1: | |1| |3| |5| |7| |
|
|
|
|
layers = 2;
|
|
|
|
// 2 frames per period.
|
|
periodicity = 2;
|
|
|
|
// Assign layer ids.
|
|
layerIds.AppendElement(0);
|
|
layerIds.AppendElement(1);
|
|
|
|
// Set rate decimators.
|
|
rateDecimators.AppendElement(2);
|
|
rateDecimators.AppendElement(1);
|
|
|
|
// Bitrate allocation: L0 - 60%, L1 - 40%.
|
|
bitrates.AppendElement(kbps * 3 / 5);
|
|
bitrates.AppendElement(kbps);
|
|
|
|
if (aCodec == CodecType::VP8 || aCodec == CodecType::VP9) {
|
|
appendix.emplace(VPXSVCAppendix{
|
|
.mLayeringMode = 2 /* VP9E_TEMPORAL_LAYERING_MODE_0101 */
|
|
});
|
|
}
|
|
} else {
|
|
MOZ_ASSERT(aMode == ScalabilityMode::L1T3);
|
|
// Three temporal layers. 0-2-1-2...
|
|
//
|
|
// Frame pattern:
|
|
// Layer 0: |0| | | |4| | | |8| | | |12|
|
|
// Layer 1: | | |2| | | |6| | | |10| | |
|
|
// Layer 2: | |1| |3| |5| |7| |9| |11| |
|
|
|
|
layers = 3;
|
|
|
|
// 4 frames per period
|
|
periodicity = 4;
|
|
|
|
// Assign layer ids.
|
|
layerIds.AppendElement(0);
|
|
layerIds.AppendElement(2);
|
|
layerIds.AppendElement(1);
|
|
layerIds.AppendElement(2);
|
|
|
|
// Set rate decimators.
|
|
rateDecimators.AppendElement(4);
|
|
rateDecimators.AppendElement(2);
|
|
rateDecimators.AppendElement(1);
|
|
|
|
// Bitrate allocation: L0 - 50%, L1 - 20%, L2 - 30%.
|
|
bitrates.AppendElement(kbps / 2);
|
|
bitrates.AppendElement(kbps * 7 / 10);
|
|
bitrates.AppendElement(kbps);
|
|
|
|
if (aCodec == CodecType::VP8 || aCodec == CodecType::VP9) {
|
|
appendix.emplace(VPXSVCAppendix{
|
|
.mLayeringMode = 3 /* VP9E_TEMPORAL_LAYERING_MODE_0212 */
|
|
});
|
|
}
|
|
}
|
|
|
|
MOZ_ASSERT(layers == bitrates.Length(),
|
|
"Bitrate must be assigned to each layer");
|
|
return SVCLayerSettings{1,
|
|
layers,
|
|
periodicity,
|
|
std::move(layerIds),
|
|
std::move(rateDecimators),
|
|
std::move(bitrates),
|
|
appendix};
|
|
}
|
|
|
|
void FFmpegVideoEncoder<LIBAV_VER>::SVCInfo::UpdateTemporalLayerId() {
|
|
MOZ_ASSERT(!mTemporalLayerIds.IsEmpty());
|
|
mCurrentIndex = (mCurrentIndex + 1) % mTemporalLayerIds.Length();
|
|
}
|
|
|
|
uint8_t FFmpegVideoEncoder<LIBAV_VER>::SVCInfo::CurrentTemporalLayerId() {
|
|
MOZ_ASSERT(!mTemporalLayerIds.IsEmpty());
|
|
return mTemporalLayerIds[mCurrentIndex];
|
|
}
|
|
|
|
void FFmpegVideoEncoder<LIBAV_VER>::SVCInfo::ResetTemporalLayerId() {
|
|
MOZ_ASSERT(!mTemporalLayerIds.IsEmpty());
|
|
mCurrentIndex = 0;
|
|
}
|
|
|
|
FFmpegVideoEncoder<LIBAV_VER>::FFmpegVideoEncoder(
|
|
const FFmpegLibWrapper* aLib, AVCodecID aCodecID,
|
|
const RefPtr<TaskQueue>& aTaskQueue, const EncoderConfig& aConfig)
|
|
: FFmpegDataEncoder(aLib, aCodecID, aTaskQueue, aConfig) {}
|
|
|
|
RefPtr<MediaDataEncoder::InitPromise> FFmpegVideoEncoder<LIBAV_VER>::Init() {
|
|
FFMPEGV_LOG("Init");
|
|
return InvokeAsync(mTaskQueue, __func__, [self = RefPtr(this)]() {
|
|
MediaResult r = self->InitEncoder();
|
|
if (NS_FAILED(r.Code())) {
|
|
FFMPEGV_LOG("%s", r.Description().get());
|
|
return InitPromise::CreateAndReject(r, __func__);
|
|
}
|
|
return InitPromise::CreateAndResolve(true, __func__);
|
|
});
|
|
}
|
|
|
|
nsCString FFmpegVideoEncoder<LIBAV_VER>::GetDescriptionName() const {
|
|
#ifdef USING_MOZFFVPX
|
|
return "ffvpx video encoder"_ns;
|
|
#else
|
|
const char* lib =
|
|
# if defined(MOZ_FFMPEG)
|
|
FFmpegRuntimeLinker::LinkStatusLibraryName();
|
|
# else
|
|
"no library: ffmpeg disabled during build";
|
|
# endif
|
|
return nsPrintfCString("ffmpeg video encoder (%s)", lib);
|
|
#endif
|
|
}
|
|
|
|
bool FFmpegVideoEncoder<LIBAV_VER>::SvcEnabled() const {
|
|
return mConfig.mScalabilityMode != ScalabilityMode::None;
|
|
}
|
|
|
|
MediaResult FFmpegVideoEncoder<LIBAV_VER>::InitEncoder() {
|
|
MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
|
|
|
|
ForceEnablingFFmpegDebugLogs();
|
|
|
|
FFMPEGV_LOG("FFmpegVideoEncoder::InitEncoder");
|
|
|
|
// Initialize the common members of the encoder instance
|
|
auto r = AllocateCodecContext(mLib, mCodecID);
|
|
if (r.isErr()) {
|
|
return r.inspectErr();
|
|
}
|
|
mCodecContext = r.unwrap();
|
|
mCodecName = mCodecContext->codec->name;
|
|
|
|
// And now the video-specific part
|
|
mCodecContext->pix_fmt = ffmpeg::FFMPEG_PIX_FMT_YUV420P;
|
|
// // TODO: do this properly, based on the colorspace of the frame. Setting
|
|
// this like that crashes encoders. if (mConfig.mCodec != CodecType::AV1) {
|
|
// if (mConfig.mPixelFormat == dom::ImageBitmapFormat::RGBA32 ||
|
|
// mConfig.mPixelFormat == dom::ImageBitmapFormat::BGRA32) {
|
|
// mCodecContext->color_primaries = AVCOL_PRI_BT709;
|
|
// mCodecContext->colorspace = AVCOL_SPC_RGB;
|
|
// #ifdef FFVPX_VERSION
|
|
// mCodecContext->color_trc = AVCOL_TRC_IEC61966_2_1;
|
|
// #endif
|
|
// } else {
|
|
// mCodecContext->color_primaries = AVCOL_PRI_BT709;
|
|
// mCodecContext->colorspace = AVCOL_SPC_BT709;
|
|
// mCodecContext->color_trc = AVCOL_TRC_BT709;
|
|
// }
|
|
// }
|
|
mCodecContext->width = static_cast<int>(mConfig.mSize.width);
|
|
mCodecContext->height = static_cast<int>(mConfig.mSize.height);
|
|
// Reasonnable default for the quantization range.
|
|
mCodecContext->qmin =
|
|
static_cast<int>(StaticPrefs::media_ffmpeg_encoder_quantizer_min());
|
|
mCodecContext->qmax =
|
|
static_cast<int>(StaticPrefs::media_ffmpeg_encoder_quantizer_max());
|
|
if (mConfig.mUsage == Usage::Realtime) {
|
|
mCodecContext->thread_count = 1;
|
|
} else {
|
|
int64_t pixels = mCodecContext->width * mCodecContext->height;
|
|
int threads = 1;
|
|
// Select a thread count that depends on the frame size, and cap to the
|
|
// number of available threads minus one
|
|
if (pixels >= 3840 * 2160) {
|
|
threads = 16;
|
|
} else if (pixels >= 1920 * 1080) {
|
|
threads = 8;
|
|
} else if (pixels >= 1280 * 720) {
|
|
threads = 4;
|
|
} else if (pixels >= 640 * 480) {
|
|
threads = 2;
|
|
}
|
|
mCodecContext->thread_count =
|
|
std::clamp<int>(threads, 1, GetNumberOfProcessors() - 1);
|
|
}
|
|
// TODO(bug 1869560): The recommended time_base is the reciprocal of the frame
|
|
// rate, but we set it to microsecond for now.
|
|
mCodecContext->time_base =
|
|
AVRational{.num = 1, .den = static_cast<int>(USECS_PER_S)};
|
|
#if LIBAVCODEC_VERSION_MAJOR >= 57
|
|
// Note that sometimes framerate can be zero (from webcodecs).
|
|
mCodecContext->framerate =
|
|
AVRational{.num = static_cast<int>(mConfig.mFramerate), .den = 1};
|
|
#endif
|
|
|
|
#if LIBAVCODEC_VERSION_MAJOR >= 60
|
|
mCodecContext->flags |= AV_CODEC_FLAG_FRAME_DURATION;
|
|
#endif
|
|
|
|
// Setting 0 here disable inter-frames: all frames are keyframes
|
|
mCodecContext->gop_size = mConfig.mKeyframeInterval
|
|
? static_cast<int>(mConfig.mKeyframeInterval)
|
|
: 10000;
|
|
mCodecContext->keyint_min = 0;
|
|
|
|
// When either real-time or SVC is enabled via config, the general settings of
|
|
// the encoder are set to be more appropriate for real-time usage
|
|
if (mConfig.mUsage == Usage::Realtime || SvcEnabled()) {
|
|
if (mConfig.mUsage != Usage::Realtime) {
|
|
FFMPEGV_LOG(
|
|
"SVC enabled but low latency encoding mode not enabled, forcing low "
|
|
"latency mode");
|
|
}
|
|
mLib->av_opt_set(mCodecContext->priv_data, "deadline", "realtime", 0);
|
|
// Explicitly ask encoder do not keep in flight at any one time for
|
|
// lookahead purposes.
|
|
mLib->av_opt_set(mCodecContext->priv_data, "lag-in-frames", "0", 0);
|
|
|
|
if (mConfig.mCodec == CodecType::VP8 || mConfig.mCodec == CodecType::VP9) {
|
|
mLib->av_opt_set(mCodecContext->priv_data, "error-resilient", "1", 0);
|
|
}
|
|
if (mConfig.mCodec == CodecType::AV1) {
|
|
mLib->av_opt_set(mCodecContext->priv_data, "error-resilience", "1", 0);
|
|
// This sets usage to AOM_USAGE_REALTIME
|
|
mLib->av_opt_set(mCodecContext->priv_data, "usage", "1", 0);
|
|
// Allow the bitrate to swing 50% up and down the target
|
|
mLib->av_opt_set(mCodecContext->priv_data, "rc_undershoot_percent", "50",
|
|
0);
|
|
mLib->av_opt_set(mCodecContext->priv_data, "rc_overshoot_percent", "50",
|
|
0);
|
|
// Row multithreading -- note that we do single threaded encoding for now,
|
|
// so this doesn't do much
|
|
mLib->av_opt_set(mCodecContext->priv_data, "row_mt", "1", 0);
|
|
// Cyclic refresh adaptive quantization
|
|
mLib->av_opt_set(mCodecContext->priv_data, "aq-mode", "3", 0);
|
|
// optimized for real-time, 7 for regular, lower: more cpu use -> higher
|
|
// compression ratio
|
|
mLib->av_opt_set(mCodecContext->priv_data, "cpu-used", "9", 0);
|
|
// disable, this is to handle camera motion, unlikely for our use case
|
|
mLib->av_opt_set(mCodecContext->priv_data, "enable-global-motion", "0",
|
|
0);
|
|
mLib->av_opt_set(mCodecContext->priv_data, "enable-cfl-intra", "0", 0);
|
|
// TODO: Set a number of tiles appropriate for the number of threads used
|
|
// -- disable tiling if using a single thread.
|
|
mLib->av_opt_set(mCodecContext->priv_data, "tile-columns", "0", 0);
|
|
mLib->av_opt_set(mCodecContext->priv_data, "tile-rows", "0", 0);
|
|
}
|
|
} else {
|
|
if (mConfig.mCodec == CodecType::AV1) {
|
|
mLib->av_opt_set_int(
|
|
mCodecContext->priv_data, "cpu-used",
|
|
static_cast<int>(StaticPrefs::media_ffmpeg_encoder_cpu_used()), 0);
|
|
}
|
|
}
|
|
|
|
if (SvcEnabled()) {
|
|
if (Maybe<SVCSettings> settings = GetSVCSettings()) {
|
|
if (mCodecName == "libaom-av1") {
|
|
if (mConfig.mBitrateMode != BitrateMode::Constant) {
|
|
return MediaResult(NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR,
|
|
"AV1 with SVC only supports constant bitrate"_ns);
|
|
}
|
|
}
|
|
|
|
SVCSettings s = settings.extract();
|
|
FFMPEGV_LOG("SVC options string: %s=%s", s.mSettingKeyValue.first.get(),
|
|
s.mSettingKeyValue.second.get());
|
|
mLib->av_opt_set(mCodecContext->priv_data, s.mSettingKeyValue.first.get(),
|
|
s.mSettingKeyValue.second.get(), 0);
|
|
|
|
// FFmpegVideoEncoder is reset after Drain(), so mSVCInfo should be
|
|
// reset() before emplace().
|
|
mSVCInfo.reset();
|
|
mSVCInfo.emplace(std::move(s.mTemporalLayerIds));
|
|
|
|
// TODO: layer settings should be changed dynamically when the frame's
|
|
// color space changed.
|
|
}
|
|
}
|
|
|
|
nsAutoCString h264Log;
|
|
if (mConfig.mCodecSpecific && mConfig.mCodecSpecific->is<H264Specific>()) {
|
|
// TODO: Set profile, level, avcc/annexb for openh264 and others.
|
|
if (mCodecName == "libx264") {
|
|
const H264Specific& h264Specific =
|
|
mConfig.mCodecSpecific->as<H264Specific>();
|
|
H264Settings s = GetH264Settings(h264Specific);
|
|
mCodecContext->profile = s.mProfile;
|
|
mCodecContext->level = s.mLevel;
|
|
for (const auto& pair : s.mSettingKeyValuePairs) {
|
|
mLib->av_opt_set(mCodecContext->priv_data, pair.first.get(),
|
|
pair.second.get(), 0);
|
|
}
|
|
|
|
// Log the settings.
|
|
// When using profile other than EXTENDED, the profile string is in the
|
|
// first element of mSettingKeyValuePairs, while EXTENDED profile has no
|
|
// profile string.
|
|
|
|
MOZ_ASSERT_IF(
|
|
s.mSettingKeyValuePairs.Length() != 3,
|
|
h264Specific.mProfile == H264_PROFILE::H264_PROFILE_EXTENDED);
|
|
const char* profileStr = s.mSettingKeyValuePairs.Length() == 3
|
|
? s.mSettingKeyValuePairs[0].second.get()
|
|
: "extended";
|
|
const char* levelStr = s.mSettingKeyValuePairs.Length() == 3
|
|
? s.mSettingKeyValuePairs[1].second.get()
|
|
: s.mSettingKeyValuePairs[0].second.get();
|
|
const char* formatStr =
|
|
h264Specific.mFormat == H264BitStreamFormat::AVC ? "AVCC" : "AnnexB";
|
|
h264Log.AppendPrintf(", H264: profile - %d (%s), level %d (%s), %s",
|
|
mCodecContext->profile, profileStr,
|
|
mCodecContext->level, levelStr, formatStr);
|
|
}
|
|
}
|
|
|
|
// - if mConfig.mDenoising is set: av_opt_set_int(mCodecContext->priv_data,
|
|
// "noise_sensitivity", x, 0), where the x is from 0(disabled) to 6.
|
|
// - if mConfig.mAdaptiveQp is set: av_opt_set_int(mCodecContext->priv_data,
|
|
// "aq_mode", x, 0), where x is from 0 to 3: 0 - Disabled, 1 - Variance
|
|
// AQ(default), 2 - Complexity AQ, 3 - Cycle AQ.
|
|
|
|
// Our old version of libaom-av1 is considered experimental by the recent
|
|
// ffmpeg we use. Allow experimental codecs for now until we decide on an AV1
|
|
// encoder.
|
|
mCodecContext->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
|
|
|
|
SetContextBitrate();
|
|
|
|
AVDictionary* options = nullptr;
|
|
if (int ret = OpenCodecContext(mCodecContext->codec, &options); ret < 0) {
|
|
return MediaResult(
|
|
NS_ERROR_DOM_MEDIA_FATAL_ERR,
|
|
RESULT_DETAIL("failed to open %s avcodec: %s", mCodecName.get(),
|
|
MakeErrorString(mLib, ret).get()));
|
|
}
|
|
mLib->av_dict_free(&options);
|
|
|
|
FFMPEGV_LOG(
|
|
"%s has been initialized with format: %s, bitrate: %" PRIi64
|
|
", width: %d, height: %d, quantizer: [%d, %d], time_base: %d/%d%s",
|
|
mCodecName.get(), ffmpeg::GetPixelFormatString(mCodecContext->pix_fmt),
|
|
static_cast<int64_t>(mCodecContext->bit_rate), mCodecContext->width,
|
|
mCodecContext->height, mCodecContext->qmin, mCodecContext->qmax,
|
|
mCodecContext->time_base.num, mCodecContext->time_base.den,
|
|
h264Log.IsEmpty() ? "" : h264Log.get());
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
// avcodec_send_frame and avcodec_receive_packet were introduced in version 58.
|
|
#if LIBAVCODEC_VERSION_MAJOR >= 58
|
|
Result<MediaDataEncoder::EncodedData, MediaResult> FFmpegVideoEncoder<
|
|
LIBAV_VER>::EncodeInputWithModernAPIs(RefPtr<const MediaData> aSample) {
|
|
MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
|
|
MOZ_ASSERT(mCodecContext);
|
|
MOZ_ASSERT(aSample);
|
|
|
|
RefPtr<const VideoData> sample(aSample->As<VideoData>());
|
|
|
|
// Validate input.
|
|
if (!sample->mImage) {
|
|
return Err(MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, "No image"_ns));
|
|
}
|
|
if (sample->mImage->GetSize().IsEmpty()) {
|
|
return Err(MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
|
|
"image width or height is invalid"_ns));
|
|
}
|
|
|
|
// Allocate AVFrame.
|
|
if (!PrepareFrame()) {
|
|
return Err(
|
|
MediaResult(NS_ERROR_OUT_OF_MEMORY, "failed to allocate frame"_ns));
|
|
}
|
|
|
|
// Set AVFrame properties for its internal data allocation. For now, we always
|
|
// convert into ffmpeg's buffer.
|
|
mFrame->format = ffmpeg::FFMPEG_PIX_FMT_YUV420P;
|
|
mFrame->width = static_cast<int>(mConfig.mSize.width);
|
|
mFrame->height = static_cast<int>(mConfig.mSize.height);
|
|
mFrame->pict_type =
|
|
sample->mKeyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_NONE;
|
|
|
|
// Allocate AVFrame data.
|
|
if (int ret = mLib->av_frame_get_buffer(mFrame, 0); ret < 0) {
|
|
return Err(MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
|
|
RESULT_DETAIL("failed to allocate frame data: %s",
|
|
MakeErrorString(mLib, ret).get())));
|
|
}
|
|
|
|
// Make sure AVFrame is writable.
|
|
if (int ret = mLib->av_frame_make_writable(mFrame); ret < 0) {
|
|
return Err(MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
|
|
RESULT_DETAIL("failed to make frame writable: %s",
|
|
MakeErrorString(mLib, ret).get())));
|
|
}
|
|
|
|
MediaResult rv = ConvertToI420(
|
|
sample->mImage, mFrame->data[0], mFrame->linesize[0], mFrame->data[1],
|
|
mFrame->linesize[1], mFrame->data[2], mFrame->linesize[2], mConfig.mSize);
|
|
if (NS_FAILED(rv)) {
|
|
return Err(MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
|
|
"failed to convert format to I420"_ns));
|
|
}
|
|
|
|
// Set presentation timestamp and duration of the AVFrame. The unit of pts is
|
|
// time_base.
|
|
// TODO(bug 1869560): The recommended time_base is the reciprocal of the frame
|
|
// rate, but we set it to microsecond for now.
|
|
# if LIBAVCODEC_VERSION_MAJOR >= 59
|
|
mFrame->time_base =
|
|
AVRational{.num = 1, .den = static_cast<int>(USECS_PER_S)};
|
|
# endif
|
|
// Provide fake pts, see header file.
|
|
if (mConfig.mCodec == CodecType::AV1) {
|
|
mFrame->pts = mFakePts;
|
|
mPtsMap.Insert(mFakePts, aSample->mTime.ToMicroseconds());
|
|
mFakePts += aSample->mDuration.ToMicroseconds();
|
|
mCurrentFramePts = aSample->mTime.ToMicroseconds();
|
|
} else {
|
|
mFrame->pts = aSample->mTime.ToMicroseconds();
|
|
}
|
|
# if LIBAVCODEC_VERSION_MAJOR >= 60
|
|
mFrame->duration = aSample->mDuration.ToMicroseconds();
|
|
# else
|
|
// Save duration in the time_base unit.
|
|
mDurationMap.Insert(mFrame->pts, aSample->mDuration.ToMicroseconds());
|
|
# endif
|
|
Duration(mFrame) = aSample->mDuration.ToMicroseconds();
|
|
|
|
AVDictionary* dict = nullptr;
|
|
// VP8/VP9 use a mode that handles the temporal layer id sequence internally,
|
|
// and don't require setting explicitly setting the metadata. Other codecs
|
|
// such as AV1 via libaom however requires manual frame tagging.
|
|
if (SvcEnabled() && mConfig.mCodec != CodecType::VP8 &&
|
|
mConfig.mCodec != CodecType::VP9) {
|
|
if (aSample->mKeyframe) {
|
|
FFMPEGV_LOG("Key frame requested, reseting temporal layer id");
|
|
mSVCInfo->ResetTemporalLayerId();
|
|
}
|
|
nsPrintfCString str("%d", mSVCInfo->CurrentTemporalLayerId());
|
|
mLib->av_dict_set(&dict, "temporal_id", str.get(), 0);
|
|
mFrame->metadata = dict;
|
|
}
|
|
|
|
// Now send the AVFrame to ffmpeg for encoding, same code for audio and video.
|
|
return FFmpegDataEncoder<LIBAV_VER>::EncodeWithModernAPIs();
|
|
}
|
|
#endif // if LIBAVCODEC_VERSION_MAJOR >= 58
|
|
|
|
Result<RefPtr<MediaRawData>, MediaResult>
|
|
FFmpegVideoEncoder<LIBAV_VER>::ToMediaRawData(AVPacket* aPacket) {
|
|
MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
|
|
MOZ_ASSERT(aPacket);
|
|
|
|
auto creationResult = CreateMediaRawData(aPacket);
|
|
if (creationResult.isErr()) {
|
|
return Err(creationResult.unwrapErr());
|
|
}
|
|
|
|
RefPtr<MediaRawData> data = creationResult.unwrap();
|
|
|
|
data->mKeyframe = (aPacket->flags & AV_PKT_FLAG_KEY) != 0;
|
|
|
|
auto extradataResult = GetExtraData(aPacket);
|
|
if (extradataResult.isOk()) {
|
|
data->mExtraData = extradataResult.unwrap();
|
|
} else if (extradataResult.isErr()) {
|
|
MediaResult e = extradataResult.unwrapErr();
|
|
if (e.Code() != NS_ERROR_NOT_AVAILABLE &&
|
|
e.Code() != NS_ERROR_NOT_IMPLEMENTED) {
|
|
return Err(e);
|
|
}
|
|
FFMPEGV_LOG("GetExtraData failed with %s, but we can ignore it for now",
|
|
e.Description().get());
|
|
}
|
|
|
|
// TODO(bug 1869560): The unit of pts, dts, and duration is time_base, which
|
|
// is recommended to be the reciprocal of the frame rate, but we set it to
|
|
// microsecond for now.
|
|
data->mTime = media::TimeUnit::FromMicroseconds(aPacket->pts);
|
|
#if LIBAVCODEC_VERSION_MAJOR >= 60
|
|
data->mDuration = media::TimeUnit::FromMicroseconds(aPacket->duration);
|
|
#else
|
|
int64_t duration;
|
|
if (mDurationMap.Find(aPacket->pts, duration)) {
|
|
data->mDuration = media::TimeUnit::FromMicroseconds(duration);
|
|
} else {
|
|
data->mDuration = media::TimeUnit::FromMicroseconds(aPacket->duration);
|
|
}
|
|
#endif
|
|
data->mTimecode = media::TimeUnit::FromMicroseconds(aPacket->dts);
|
|
|
|
if (mConfig.mCodec == CodecType::AV1) {
|
|
auto found = mPtsMap.Take(aPacket->pts);
|
|
data->mTime = media::TimeUnit::FromMicroseconds(found.value());
|
|
}
|
|
|
|
if (mSVCInfo) {
|
|
if (data->mKeyframe) {
|
|
FFMPEGV_LOG(
|
|
"Encoded packet is key frame, reseting temporal layer id sequence");
|
|
mSVCInfo->ResetTemporalLayerId();
|
|
}
|
|
uint8_t temporalLayerId = mSVCInfo->CurrentTemporalLayerId();
|
|
data->mTemporalLayerId.emplace(temporalLayerId);
|
|
mSVCInfo->UpdateTemporalLayerId();
|
|
}
|
|
|
|
return data;
|
|
}
|
|
|
|
Result<already_AddRefed<MediaByteBuffer>, MediaResult>
|
|
FFmpegVideoEncoder<LIBAV_VER>::GetExtraData(AVPacket* aPacket) {
|
|
MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
|
|
MOZ_ASSERT(aPacket);
|
|
|
|
// H264 Extra data comes with the key frame and we only extract it when
|
|
// encoding into AVCC format.
|
|
if (mCodecID != AV_CODEC_ID_H264 || !mConfig.mCodecSpecific ||
|
|
!mConfig.mCodecSpecific->is<H264Specific>() ||
|
|
mConfig.mCodecSpecific->as<H264Specific>().mFormat !=
|
|
H264BitStreamFormat::AVC ||
|
|
!(aPacket->flags & AV_PKT_FLAG_KEY)) {
|
|
return Err(
|
|
MediaResult(NS_ERROR_NOT_AVAILABLE, "No available extra data"_ns));
|
|
}
|
|
|
|
if (mCodecName != "libx264") {
|
|
return Err(MediaResult(
|
|
NS_ERROR_NOT_IMPLEMENTED,
|
|
RESULT_DETAIL(
|
|
"Get extra data from codec %s has not been implemented yet",
|
|
mCodecName.get())));
|
|
}
|
|
|
|
bool useGlobalHeader =
|
|
#if LIBAVCODEC_VERSION_MAJOR >= 57
|
|
mCodecContext->flags & AV_CODEC_FLAG_GLOBAL_HEADER;
|
|
#else
|
|
false;
|
|
#endif
|
|
|
|
Span<const uint8_t> buf;
|
|
if (useGlobalHeader) {
|
|
buf =
|
|
Span<const uint8_t>(mCodecContext->extradata,
|
|
static_cast<size_t>(mCodecContext->extradata_size));
|
|
} else {
|
|
buf =
|
|
Span<const uint8_t>(aPacket->data, static_cast<size_t>(aPacket->size));
|
|
}
|
|
if (buf.empty()) {
|
|
return Err(MediaResult(NS_ERROR_UNEXPECTED,
|
|
"fail to get H264 AVCC header in key frame!"_ns));
|
|
}
|
|
|
|
BufferReader reader(buf);
|
|
|
|
// The first part is sps.
|
|
uint32_t spsSize;
|
|
MOZ_TRY_VAR(spsSize, reader.ReadU32());
|
|
Span<const uint8_t> spsData;
|
|
MOZ_TRY_VAR(spsData,
|
|
reader.ReadSpan<const uint8_t>(static_cast<size_t>(spsSize)));
|
|
|
|
// The second part is pps.
|
|
uint32_t ppsSize;
|
|
MOZ_TRY_VAR(ppsSize, reader.ReadU32());
|
|
Span<const uint8_t> ppsData;
|
|
MOZ_TRY_VAR(ppsData,
|
|
reader.ReadSpan<const uint8_t>(static_cast<size_t>(ppsSize)));
|
|
|
|
// Ensure we have profile, constraints and level needed to create the extra
|
|
// data.
|
|
if (spsData.Length() < 4) {
|
|
return Err(MediaResult(NS_ERROR_UNEXPECTED, "spsData is too short"_ns));
|
|
}
|
|
|
|
FFMPEGV_LOG(
|
|
"Generate extra data: profile - %u, constraints: %u, level: %u for pts @ "
|
|
"%" PRId64,
|
|
spsData[1], spsData[2], spsData[3], aPacket->pts);
|
|
|
|
// Create extra data.
|
|
auto extraData = MakeRefPtr<MediaByteBuffer>();
|
|
H264::WriteExtraData(extraData, spsData[1], spsData[2], spsData[3], spsData,
|
|
ppsData);
|
|
MOZ_ASSERT(extraData);
|
|
return extraData.forget();
|
|
}
|
|
|
|
void FFmpegVideoEncoder<LIBAV_VER>::ForceEnablingFFmpegDebugLogs() {
|
|
#if DEBUG
|
|
if (!getenv("MOZ_AV_LOG_LEVEL") &&
|
|
MOZ_LOG_TEST(sFFmpegVideoLog, LogLevel::Debug)) {
|
|
mLib->av_log_set_level(AV_LOG_DEBUG);
|
|
}
|
|
#endif // DEBUG
|
|
}
|
|
|
|
Maybe<FFmpegVideoEncoder<LIBAV_VER>::SVCSettings>
|
|
FFmpegVideoEncoder<LIBAV_VER>::GetSVCSettings() {
|
|
MOZ_ASSERT(!mCodecName.IsEmpty());
|
|
MOZ_ASSERT(SvcEnabled());
|
|
|
|
CodecType codecType = CodecType::Unknown;
|
|
if (mCodecName == "libvpx") {
|
|
codecType = CodecType::VP8;
|
|
} else if (mCodecName == "libvpx-vp9") {
|
|
codecType = CodecType::VP9;
|
|
} else if (mCodecName == "libaom-av1") {
|
|
codecType = CodecType::AV1;
|
|
}
|
|
|
|
if (codecType == CodecType::Unknown) {
|
|
FFMPEGV_LOG("SVC setting is not implemented for %s codec",
|
|
mCodecName.get());
|
|
return Nothing();
|
|
}
|
|
|
|
SVCLayerSettings svc = GetSVCLayerSettings(
|
|
codecType, mConfig.mScalabilityMode, mConfig.mBitrate);
|
|
|
|
nsAutoCString name;
|
|
nsAutoCString parameters;
|
|
|
|
if (codecType == CodecType::VP8 || codecType == CodecType::VP9) {
|
|
// Check if the number of temporal layers in codec specific settings
|
|
// matches
|
|
// the number of layers for the given scalability mode.
|
|
if (mConfig.mCodecSpecific) {
|
|
if (mConfig.mCodecSpecific->is<VP8Specific>()) {
|
|
MOZ_ASSERT(
|
|
mConfig.mCodecSpecific->as<VP8Specific>().mNumTemporalLayers ==
|
|
svc.mNumberTemporalLayers);
|
|
} else if (mConfig.mCodecSpecific->is<VP9Specific>()) {
|
|
MOZ_ASSERT(
|
|
mConfig.mCodecSpecific->as<VP9Specific>().mNumTemporalLayers ==
|
|
svc.mNumberTemporalLayers);
|
|
}
|
|
}
|
|
|
|
// Form an SVC setting string for libvpx.
|
|
name = "ts-parameters"_ns;
|
|
parameters.Append("ts_target_bitrate=");
|
|
for (size_t i = 0; i < svc.mTargetBitrates.Length(); ++i) {
|
|
if (i > 0) {
|
|
parameters.Append(",");
|
|
}
|
|
parameters.AppendPrintf("%d", svc.mTargetBitrates[i]);
|
|
}
|
|
parameters.AppendPrintf(
|
|
":ts_layering_mode=%u",
|
|
svc.mCodecAppendix->as<VPXSVCAppendix>().mLayeringMode);
|
|
}
|
|
|
|
if (codecType == CodecType::AV1) {
|
|
// Form an SVC setting string for libaom.
|
|
name = "svc-parameters"_ns;
|
|
parameters.AppendPrintf("number_spatial_layers=%zu",
|
|
svc.mNumberSpatialLayers);
|
|
parameters.AppendPrintf(":number_temporal_layers=%zu",
|
|
svc.mNumberTemporalLayers);
|
|
parameters.Append(":framerate_factor=");
|
|
for (size_t i = 0; i < svc.mRateDecimators.Length(); ++i) {
|
|
if (i > 0) {
|
|
parameters.Append(",");
|
|
}
|
|
parameters.AppendPrintf("%d", svc.mRateDecimators[i]);
|
|
}
|
|
parameters.Append(":layer_target_bitrate=");
|
|
for (size_t i = 0; i < svc.mTargetBitrates.Length(); ++i) {
|
|
if (i > 0) {
|
|
parameters.Append(",");
|
|
}
|
|
parameters.AppendPrintf("%d", svc.mTargetBitrates[i]);
|
|
}
|
|
}
|
|
|
|
return Some(
|
|
SVCSettings{std::move(svc.mLayerIds),
|
|
std::make_pair(std::move(name), std::move(parameters))});
|
|
}
|
|
|
|
FFmpegVideoEncoder<LIBAV_VER>::H264Settings FFmpegVideoEncoder<
|
|
LIBAV_VER>::GetH264Settings(const H264Specific& aH264Specific) {
|
|
MOZ_ASSERT(mCodecName == "libx264",
|
|
"GetH264Settings is libx264-only for now");
|
|
|
|
nsTArray<std::pair<nsCString, nsCString>> keyValuePairs;
|
|
|
|
Maybe<H264Setting> profile = GetH264Profile(aH264Specific.mProfile);
|
|
MOZ_RELEASE_ASSERT(profile.isSome());
|
|
if (!profile->mString.IsEmpty()) {
|
|
keyValuePairs.AppendElement(std::make_pair("profile"_ns, profile->mString));
|
|
} else {
|
|
MOZ_RELEASE_ASSERT(aH264Specific.mProfile ==
|
|
H264_PROFILE::H264_PROFILE_EXTENDED);
|
|
}
|
|
|
|
Maybe<H264Setting> level = GetH264Level(aH264Specific.mLevel);
|
|
MOZ_RELEASE_ASSERT(level.isSome());
|
|
MOZ_RELEASE_ASSERT(!level->mString.IsEmpty());
|
|
keyValuePairs.AppendElement(std::make_pair("level"_ns, level->mString));
|
|
|
|
// Set format: libx264's default format is annexb.
|
|
if (aH264Specific.mFormat == H264BitStreamFormat::AVC) {
|
|
keyValuePairs.AppendElement(std::make_pair("x264-params"_ns, "annexb=0"));
|
|
// mCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER
|
|
// if we don't want to append SPS/PPS data in all keyframe
|
|
// (LIBAVCODEC_VERSION_MAJOR >= 57 only).
|
|
} else {
|
|
// Set annexb explicitly even if it's default format.
|
|
keyValuePairs.AppendElement(std::make_pair("x264-params"_ns, "annexb=1"));
|
|
}
|
|
|
|
return H264Settings{.mProfile = profile->mValue,
|
|
.mLevel = level->mValue,
|
|
.mSettingKeyValuePairs = std::move(keyValuePairs)};
|
|
}
|
|
|
|
} // namespace mozilla
|