diff options
Diffstat (limited to 'third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc')
-rw-r--r-- | third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc | 2177 |
1 files changed, 2177 insertions, 0 deletions
diff --git a/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc new file mode 100644 index 0000000000..5330eb7e8c --- /dev/null +++ b/third_party/libwebrtc/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc @@ -0,0 +1,2177 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#include <memory> +#ifdef RTC_ENABLE_VP9 + +#include <algorithm> +#include <limits> +#include <tuple> +#include <utility> +#include <vector> + +#include "absl/algorithm/container.h" +#include "absl/memory/memory.h" +#include "absl/strings/match.h" +#include "absl/types/optional.h" +#include "api/video/color_space.h" +#include "api/video/i010_buffer.h" +#include "api/video_codecs/scalability_mode.h" +#include "common_video/include/video_frame_buffer.h" +#include "common_video/libyuv/include/webrtc_libyuv.h" +#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" +#include "modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h" +#include "modules/video_coding/svc/create_scalability_structure.h" +#include "modules/video_coding/svc/scalability_mode_util.h" +#include "modules/video_coding/svc/scalable_video_controller.h" +#include "modules/video_coding/svc/scalable_video_controller_no_layering.h" +#include "modules/video_coding/svc/svc_rate_allocator.h" +#include "modules/video_coding/utility/vp9_uncompressed_header_parser.h" +#include "rtc_base/checks.h" +#include "rtc_base/experiments/field_trial_list.h" +#include "rtc_base/experiments/field_trial_parser.h" +#include "rtc_base/experiments/rate_control_settings.h" +#include "rtc_base/logging.h" +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/time_utils.h" +#include "rtc_base/trace_event.h" +#include "third_party/libyuv/include/libyuv/convert.h" +#include "vpx/vp8cx.h" +#include "vpx/vpx_encoder.h" + +#if (defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64)) && \ + (defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)) +#define MOBILE_ARM +#endif + +namespace webrtc { + +namespace { +// Maps from gof_idx to encoder internal reference frame buffer index. These +// maps work for 1,2 and 3 temporal layers with GOF length of 1,2 and 4 frames. +uint8_t kRefBufIdx[4] = {0, 0, 0, 1}; +uint8_t kUpdBufIdx[4] = {0, 0, 1, 0}; + +// Maximum allowed PID difference for differnet per-layer frame-rate case. +const int kMaxAllowedPidDiff = 30; + +// TODO(ilink): Tune these thresholds further. +// Selected using ConverenceMotion_1280_720_50.yuv clip. +// No toggling observed on any link capacity from 100-2000kbps. +// HD was reached consistently when link capacity was 1500kbps. +// Set resolutions are a bit more conservative than svc_config.cc sets, e.g. +// for 300kbps resolution converged to 270p instead of 360p. +constexpr int kLowVp9QpThreshold = 149; +constexpr int kHighVp9QpThreshold = 205; + +std::pair<size_t, size_t> GetActiveLayers( + const VideoBitrateAllocation& allocation) { + for (size_t sl_idx = 0; sl_idx < kMaxSpatialLayers; ++sl_idx) { + if (allocation.GetSpatialLayerSum(sl_idx) > 0) { + size_t last_layer = sl_idx + 1; + while (last_layer < kMaxSpatialLayers && + allocation.GetSpatialLayerSum(last_layer) > 0) { + ++last_layer; + } + return std::make_pair(sl_idx, last_layer); + } + } + return {0, 0}; +} + +using Vp9ScalabilityStructure = + std::tuple<std::unique_ptr<ScalableVideoController>, ScalabilityMode>; +absl::optional<Vp9ScalabilityStructure> CreateVp9ScalabilityStructure( + const VideoCodec& codec) { + int num_spatial_layers = codec.VP9().numberOfSpatialLayers; + int num_temporal_layers = + std::max(1, int{codec.VP9().numberOfTemporalLayers}); + if (num_spatial_layers == 1 && num_temporal_layers == 1) { + return absl::make_optional<Vp9ScalabilityStructure>( + std::make_unique<ScalableVideoControllerNoLayering>(), + ScalabilityMode::kL1T1); + } + + char name[20]; + rtc::SimpleStringBuilder ss(name); + if (codec.mode == VideoCodecMode::kScreensharing) { + // TODO(bugs.webrtc.org/11999): Compose names of the structures when they + // are implemented. + return absl::nullopt; + } else if (codec.VP9().interLayerPred == InterLayerPredMode::kOn || + num_spatial_layers == 1) { + ss << "L" << num_spatial_layers << "T" << num_temporal_layers; + } else if (codec.VP9().interLayerPred == InterLayerPredMode::kOnKeyPic) { + ss << "L" << num_spatial_layers << "T" << num_temporal_layers << "_KEY"; + } else { + RTC_DCHECK_EQ(codec.VP9().interLayerPred, InterLayerPredMode::kOff); + ss << "S" << num_spatial_layers << "T" << num_temporal_layers; + } + + // Check spatial ratio. + if (num_spatial_layers > 1 && codec.spatialLayers[0].targetBitrate > 0) { + if (codec.width != codec.spatialLayers[num_spatial_layers - 1].width || + codec.height != codec.spatialLayers[num_spatial_layers - 1].height) { + RTC_LOG(LS_WARNING) + << "Top layer resolution expected to match overall resolution"; + return absl::nullopt; + } + // Check if the ratio is one of the supported. + int numerator; + int denominator; + if (codec.spatialLayers[1].width == 2 * codec.spatialLayers[0].width) { + numerator = 1; + denominator = 2; + // no suffix for 1:2 ratio. + } else if (2 * codec.spatialLayers[1].width == + 3 * codec.spatialLayers[0].width) { + numerator = 2; + denominator = 3; + ss << "h"; + } else { + RTC_LOG(LS_WARNING) << "Unsupported scalability ratio " + << codec.spatialLayers[0].width << ":" + << codec.spatialLayers[1].width; + return absl::nullopt; + } + // Validate ratio is consistent for all spatial layer transitions. + for (int sid = 1; sid < num_spatial_layers; ++sid) { + if (codec.spatialLayers[sid].width * numerator != + codec.spatialLayers[sid - 1].width * denominator || + codec.spatialLayers[sid].height * numerator != + codec.spatialLayers[sid - 1].height * denominator) { + RTC_LOG(LS_WARNING) << "Inconsistent scalability ratio " << numerator + << ":" << denominator; + return absl::nullopt; + } + } + } + + absl::optional<ScalabilityMode> scalability_mode = + ScalabilityModeFromString(name); + if (!scalability_mode.has_value()) { + RTC_LOG(LS_WARNING) << "Invalid scalability mode " << name; + return absl::nullopt; + } + auto scalability_structure_controller = + CreateScalabilityStructure(*scalability_mode); + if (scalability_structure_controller == nullptr) { + RTC_LOG(LS_WARNING) << "Unsupported scalability structure " << name; + } else { + RTC_LOG(LS_INFO) << "Created scalability structure " << name; + } + return absl::make_optional<Vp9ScalabilityStructure>( + std::move(scalability_structure_controller), *scalability_mode); +} + +vpx_svc_ref_frame_config_t Vp9References( + rtc::ArrayView<const ScalableVideoController::LayerFrameConfig> layers) { + vpx_svc_ref_frame_config_t ref_config = {}; + for (const ScalableVideoController::LayerFrameConfig& layer_frame : layers) { + const auto& buffers = layer_frame.Buffers(); + RTC_DCHECK_LE(buffers.size(), 3); + int sid = layer_frame.SpatialId(); + if (!buffers.empty()) { + ref_config.lst_fb_idx[sid] = buffers[0].id; + ref_config.reference_last[sid] = buffers[0].referenced; + if (buffers[0].updated) { + ref_config.update_buffer_slot[sid] |= (1 << buffers[0].id); + } + } + if (buffers.size() > 1) { + ref_config.gld_fb_idx[sid] = buffers[1].id; + ref_config.reference_golden[sid] = buffers[1].referenced; + if (buffers[1].updated) { + ref_config.update_buffer_slot[sid] |= (1 << buffers[1].id); + } + } + if (buffers.size() > 2) { + ref_config.alt_fb_idx[sid] = buffers[2].id; + ref_config.reference_alt_ref[sid] = buffers[2].referenced; + if (buffers[2].updated) { + ref_config.update_buffer_slot[sid] |= (1 << buffers[2].id); + } + } + } + // TODO(bugs.webrtc.org/11999): Fill ref_config.duration + return ref_config; +} + +bool AllowDenoising() { +#ifdef MOBILE_ARM + // Keep the denoiser disabled on mobile ARM devices. It increases encode time + // by up to 16%. + return false; +#else + return true; +#endif +} + +} // namespace + +void LibvpxVp9Encoder::EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt, + void* user_data) { + LibvpxVp9Encoder* enc = static_cast<LibvpxVp9Encoder*>(user_data); + enc->GetEncodedLayerFrame(pkt); +} + +LibvpxVp9Encoder::LibvpxVp9Encoder(const cricket::VideoCodec& codec, + std::unique_ptr<LibvpxInterface> interface, + const FieldTrialsView& trials) + : libvpx_(std::move(interface)), + encoded_image_(), + encoded_complete_callback_(nullptr), + profile_( + ParseSdpForVP9Profile(codec.params).value_or(VP9Profile::kProfile0)), + inited_(false), + timestamp_(0), + rc_max_intra_target_(0), + encoder_(nullptr), + config_(nullptr), + raw_(nullptr), + input_image_(nullptr), + force_key_frame_(true), + pics_since_key_(0), + num_temporal_layers_(0), + num_spatial_layers_(0), + num_active_spatial_layers_(0), + first_active_layer_(0), + layer_deactivation_requires_key_frame_(absl::StartsWith( + trials.Lookup("WebRTC-Vp9IssueKeyFrameOnLayerDeactivation"), + "Enabled")), + is_svc_(false), + inter_layer_pred_(InterLayerPredMode::kOn), + external_ref_control_(false), // Set in InitEncode because of tests. + trusted_rate_controller_( + RateControlSettings::ParseFromKeyValueConfig(&trials) + .LibvpxVp9TrustedRateController()), + first_frame_in_picture_(true), + ss_info_needed_(false), + force_all_active_layers_(false), + num_cores_(0), + is_flexible_mode_(false), + variable_framerate_experiment_(ParseVariableFramerateConfig(trials)), + variable_framerate_controller_( + variable_framerate_experiment_.framerate_limit), + quality_scaler_experiment_(ParseQualityScalerConfig(trials)), + external_ref_ctrl_( + !absl::StartsWith(trials.Lookup("WebRTC-Vp9ExternalRefCtrl"), + "Disabled")), + performance_flags_(ParsePerformanceFlagsFromTrials(trials)), + num_steady_state_frames_(0), + config_changed_(true) { + codec_ = {}; + memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t)); +} + +LibvpxVp9Encoder::~LibvpxVp9Encoder() { + Release(); +} + +void LibvpxVp9Encoder::SetFecControllerOverride(FecControllerOverride*) { + // Ignored. +} + +int LibvpxVp9Encoder::Release() { + int ret_val = WEBRTC_VIDEO_CODEC_OK; + + if (encoder_ != nullptr) { + if (inited_) { + if (libvpx_->codec_destroy(encoder_)) { + ret_val = WEBRTC_VIDEO_CODEC_MEMORY; + } + } + delete encoder_; + encoder_ = nullptr; + } + if (config_ != nullptr) { + delete config_; + config_ = nullptr; + } + if (raw_ != nullptr) { + libvpx_->img_free(raw_); + raw_ = nullptr; + } + inited_ = false; + return ret_val; +} + +bool LibvpxVp9Encoder::ExplicitlyConfiguredSpatialLayers() const { + // We check target_bitrate_bps of the 0th layer to see if the spatial layers + // (i.e. bitrates) were explicitly configured. + return codec_.spatialLayers[0].targetBitrate > 0; +} + +bool LibvpxVp9Encoder::SetSvcRates( + const VideoBitrateAllocation& bitrate_allocation) { + std::pair<size_t, size_t> current_layers = + GetActiveLayers(current_bitrate_allocation_); + std::pair<size_t, size_t> new_layers = GetActiveLayers(bitrate_allocation); + + const bool layer_activation_requires_key_frame = + inter_layer_pred_ == InterLayerPredMode::kOff || + inter_layer_pred_ == InterLayerPredMode::kOnKeyPic; + const bool lower_layers_enabled = new_layers.first < current_layers.first; + const bool higher_layers_enabled = new_layers.second > current_layers.second; + const bool disabled_layers = new_layers.first > current_layers.first || + new_layers.second < current_layers.second; + + if (lower_layers_enabled || + (higher_layers_enabled && layer_activation_requires_key_frame) || + (disabled_layers && layer_deactivation_requires_key_frame_)) { + force_key_frame_ = true; + } + + if (current_layers != new_layers) { + ss_info_needed_ = true; + } + + config_->rc_target_bitrate = bitrate_allocation.get_sum_kbps(); + + if (ExplicitlyConfiguredSpatialLayers()) { + for (size_t sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) { + const bool was_layer_active = (config_->ss_target_bitrate[sl_idx] > 0); + config_->ss_target_bitrate[sl_idx] = + bitrate_allocation.GetSpatialLayerSum(sl_idx) / 1000; + + for (size_t tl_idx = 0; tl_idx < num_temporal_layers_; ++tl_idx) { + config_->layer_target_bitrate[sl_idx * num_temporal_layers_ + tl_idx] = + bitrate_allocation.GetTemporalLayerSum(sl_idx, tl_idx) / 1000; + } + + if (!was_layer_active) { + // Reset frame rate controller if layer is resumed after pause. + framerate_controller_[sl_idx].Reset(); + } + + framerate_controller_[sl_idx].SetTargetRate( + codec_.spatialLayers[sl_idx].maxFramerate); + } + } else { + float rate_ratio[VPX_MAX_LAYERS] = {0}; + float total = 0; + for (int i = 0; i < num_spatial_layers_; ++i) { + if (svc_params_.scaling_factor_num[i] <= 0 || + svc_params_.scaling_factor_den[i] <= 0) { + RTC_LOG(LS_ERROR) << "Scaling factors not specified!"; + return false; + } + rate_ratio[i] = static_cast<float>(svc_params_.scaling_factor_num[i]) / + svc_params_.scaling_factor_den[i]; + total += rate_ratio[i]; + } + + for (int i = 0; i < num_spatial_layers_; ++i) { + RTC_CHECK_GT(total, 0); + config_->ss_target_bitrate[i] = static_cast<unsigned int>( + config_->rc_target_bitrate * rate_ratio[i] / total); + if (num_temporal_layers_ == 1) { + config_->layer_target_bitrate[i] = config_->ss_target_bitrate[i]; + } else if (num_temporal_layers_ == 2) { + config_->layer_target_bitrate[i * num_temporal_layers_] = + config_->ss_target_bitrate[i] * 2 / 3; + config_->layer_target_bitrate[i * num_temporal_layers_ + 1] = + config_->ss_target_bitrate[i]; + } else if (num_temporal_layers_ == 3) { + config_->layer_target_bitrate[i * num_temporal_layers_] = + config_->ss_target_bitrate[i] / 2; + config_->layer_target_bitrate[i * num_temporal_layers_ + 1] = + config_->layer_target_bitrate[i * num_temporal_layers_] + + (config_->ss_target_bitrate[i] / 4); + config_->layer_target_bitrate[i * num_temporal_layers_ + 2] = + config_->ss_target_bitrate[i]; + } else { + RTC_LOG(LS_ERROR) << "Unsupported number of temporal layers: " + << num_temporal_layers_; + return false; + } + + framerate_controller_[i].SetTargetRate(codec_.maxFramerate); + } + } + + num_active_spatial_layers_ = 0; + first_active_layer_ = 0; + bool seen_active_layer = false; + bool expect_no_more_active_layers = false; + for (int i = 0; i < num_spatial_layers_; ++i) { + if (config_->ss_target_bitrate[i] > 0) { + RTC_DCHECK(!expect_no_more_active_layers) << "Only middle layer is " + "deactivated."; + if (!seen_active_layer) { + first_active_layer_ = i; + } + num_active_spatial_layers_ = i + 1; + seen_active_layer = true; + } else { + expect_no_more_active_layers = seen_active_layer; + } + } + + if (seen_active_layer && performance_flags_.use_per_layer_speed) { + bool denoiser_on = + AllowDenoising() && codec_.VP9()->denoisingOn && + performance_flags_by_spatial_index_[num_active_spatial_layers_ - 1] + .allow_denoising; + libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY, + denoiser_on ? 1 : 0); + } + + if (higher_layers_enabled && !force_key_frame_) { + // Prohibit drop of all layers for the next frame, so newly enabled + // layer would have a valid spatial reference. + for (size_t i = 0; i < num_spatial_layers_; ++i) { + svc_drop_frame_.framedrop_thresh[i] = 0; + } + force_all_active_layers_ = true; + } + + if (svc_controller_) { + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + // Bitrates in `layer_target_bitrate` are accumulated for each temporal + // layer but in `VideoBitrateAllocation` they should be separated. + int previous_bitrate_kbps = 0; + for (int tid = 0; tid < num_temporal_layers_; ++tid) { + int accumulated_bitrate_kbps = + config_->layer_target_bitrate[sid * num_temporal_layers_ + tid]; + int single_layer_bitrate_kbps = + accumulated_bitrate_kbps - previous_bitrate_kbps; + RTC_DCHECK_GE(single_layer_bitrate_kbps, 0); + current_bitrate_allocation_.SetBitrate( + sid, tid, single_layer_bitrate_kbps * 1'000); + previous_bitrate_kbps = accumulated_bitrate_kbps; + } + } + svc_controller_->OnRatesUpdated(current_bitrate_allocation_); + } else { + current_bitrate_allocation_ = bitrate_allocation; + } + config_changed_ = true; + return true; +} + +void LibvpxVp9Encoder::DisableSpatialLayer(int sid) { + RTC_DCHECK_LT(sid, num_spatial_layers_); + if (config_->ss_target_bitrate[sid] == 0) { + return; + } + config_->ss_target_bitrate[sid] = 0; + for (int tid = 0; tid < num_temporal_layers_; ++tid) { + config_->layer_target_bitrate[sid * num_temporal_layers_ + tid] = 0; + } + config_changed_ = true; +} + +void LibvpxVp9Encoder::EnableSpatialLayer(int sid) { + RTC_DCHECK_LT(sid, num_spatial_layers_); + if (config_->ss_target_bitrate[sid] > 0) { + return; + } + for (int tid = 0; tid < num_temporal_layers_; ++tid) { + config_->layer_target_bitrate[sid * num_temporal_layers_ + tid] = + current_bitrate_allocation_.GetTemporalLayerSum(sid, tid) / 1000; + } + config_->ss_target_bitrate[sid] = + current_bitrate_allocation_.GetSpatialLayerSum(sid) / 1000; + RTC_DCHECK_GT(config_->ss_target_bitrate[sid], 0); + config_changed_ = true; +} + +void LibvpxVp9Encoder::SetActiveSpatialLayers() { + // Svc controller may decide to skip a frame at certain spatial layer even + // when bitrate for it is non-zero, however libvpx uses configured bitrate as + // a signal which layers should be produced. + RTC_DCHECK(svc_controller_); + RTC_DCHECK(!layer_frames_.empty()); + RTC_DCHECK(absl::c_is_sorted( + layer_frames_, [](const ScalableVideoController::LayerFrameConfig& lhs, + const ScalableVideoController::LayerFrameConfig& rhs) { + return lhs.SpatialId() < rhs.SpatialId(); + })); + + auto frame_it = layer_frames_.begin(); + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + if (frame_it != layer_frames_.end() && frame_it->SpatialId() == sid) { + EnableSpatialLayer(sid); + ++frame_it; + } else { + DisableSpatialLayer(sid); + } + } +} + +void LibvpxVp9Encoder::SetRates(const RateControlParameters& parameters) { + if (!inited_) { + RTC_LOG(LS_WARNING) << "SetRates() called while uninitialized."; + return; + } + if (encoder_->err) { + RTC_LOG(LS_WARNING) << "Encoder in error state: " << encoder_->err; + return; + } + if (parameters.framerate_fps < 1.0) { + RTC_LOG(LS_WARNING) << "Unsupported framerate: " + << parameters.framerate_fps; + return; + } + + codec_.maxFramerate = static_cast<uint32_t>(parameters.framerate_fps + 0.5); + + bool res = SetSvcRates(parameters.bitrate); + RTC_DCHECK(res) << "Failed to set new bitrate allocation"; + config_changed_ = true; +} + +// TODO(eladalon): s/inst/codec_settings/g. +int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst, + const Settings& settings) { + if (inst == nullptr) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (inst->maxFramerate < 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + // Allow zero to represent an unspecified maxBitRate + if (inst->maxBitrate > 0 && inst->startBitrate > inst->maxBitrate) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (inst->width < 1 || inst->height < 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (settings.number_of_cores < 1) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (inst->VP9().numberOfTemporalLayers > 3) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + // libvpx probably does not support more than 3 spatial layers. + if (inst->VP9().numberOfSpatialLayers > 3) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + absl::optional<vpx_img_fmt_t> previous_img_fmt = + raw_ ? absl::make_optional<vpx_img_fmt_t>(raw_->fmt) : absl::nullopt; + + int ret_val = Release(); + if (ret_val < 0) { + return ret_val; + } + if (encoder_ == nullptr) { + encoder_ = new vpx_codec_ctx_t; + memset(encoder_, 0, sizeof(*encoder_)); + } + if (config_ == nullptr) { + config_ = new vpx_codec_enc_cfg_t; + memset(config_, 0, sizeof(*config_)); + } + timestamp_ = 0; + if (&codec_ != inst) { + codec_ = *inst; + } + memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t)); + + force_key_frame_ = true; + pics_since_key_ = 0; + num_cores_ = settings.number_of_cores; + + scalability_mode_ = inst->GetScalabilityMode(); + if (scalability_mode_.has_value()) { + // Use settings from `ScalabilityMode` identifier. + RTC_LOG(LS_INFO) << "Create scalability structure " + << ScalabilityModeToString(*scalability_mode_); + svc_controller_ = CreateScalabilityStructure(*scalability_mode_); + if (!svc_controller_) { + RTC_LOG(LS_WARNING) << "Failed to create scalability structure."; + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + ScalableVideoController::StreamLayersConfig info = + svc_controller_->StreamConfig(); + num_spatial_layers_ = info.num_spatial_layers; + num_temporal_layers_ = info.num_temporal_layers; + inter_layer_pred_ = ScalabilityModeToInterLayerPredMode(*scalability_mode_); + } else { + num_spatial_layers_ = inst->VP9().numberOfSpatialLayers; + RTC_DCHECK_GT(num_spatial_layers_, 0); + num_temporal_layers_ = inst->VP9().numberOfTemporalLayers; + if (num_temporal_layers_ == 0) { + num_temporal_layers_ = 1; + } + inter_layer_pred_ = inst->VP9().interLayerPred; + auto vp9_scalability = CreateVp9ScalabilityStructure(*inst); + if (vp9_scalability.has_value()) { + std::tie(svc_controller_, scalability_mode_) = + std::move(vp9_scalability.value()); + } else { + svc_controller_ = nullptr; + scalability_mode_ = absl::nullopt; + } + } + + framerate_controller_ = std::vector<FramerateControllerDeprecated>( + num_spatial_layers_, FramerateControllerDeprecated(codec_.maxFramerate)); + + is_svc_ = (num_spatial_layers_ > 1 || num_temporal_layers_ > 1); + + // Populate encoder configuration with default values. + if (libvpx_->codec_enc_config_default(vpx_codec_vp9_cx(), config_, 0)) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + + vpx_img_fmt img_fmt = VPX_IMG_FMT_NONE; + unsigned int bits_for_storage = 8; + switch (profile_) { + case VP9Profile::kProfile0: + img_fmt = previous_img_fmt.value_or(VPX_IMG_FMT_I420); + bits_for_storage = 8; + config_->g_bit_depth = VPX_BITS_8; + config_->g_profile = 0; + config_->g_input_bit_depth = 8; + break; + case VP9Profile::kProfile1: + // Encoding of profile 1 is not implemented. It would require extended + // support for I444, I422, and I440 buffers. + RTC_DCHECK_NOTREACHED(); + break; + case VP9Profile::kProfile2: + img_fmt = VPX_IMG_FMT_I42016; + bits_for_storage = 16; + config_->g_bit_depth = VPX_BITS_10; + config_->g_profile = 2; + config_->g_input_bit_depth = 10; + break; + case VP9Profile::kProfile3: + // Encoding of profile 3 is not implemented. + RTC_DCHECK_NOTREACHED(); + break; + } + + // Creating a wrapper to the image - setting image data to nullptr. Actual + // pointer will be set in encode. Setting align to 1, as it is meaningless + // (actual memory is not allocated). + raw_ = libvpx_->img_wrap(nullptr, img_fmt, codec_.width, codec_.height, 1, + nullptr); + raw_->bit_depth = bits_for_storage; + + config_->g_w = codec_.width; + config_->g_h = codec_.height; + config_->rc_target_bitrate = inst->startBitrate; // in kbit/s + config_->g_error_resilient = is_svc_ ? VPX_ERROR_RESILIENT_DEFAULT : 0; + // Setting the time base of the codec. + config_->g_timebase.num = 1; + config_->g_timebase.den = 90000; + config_->g_lag_in_frames = 0; // 0- no frame lagging + config_->g_threads = 1; + // Rate control settings. + config_->rc_dropframe_thresh = inst->GetFrameDropEnabled() ? 30 : 0; + config_->rc_end_usage = VPX_CBR; + config_->g_pass = VPX_RC_ONE_PASS; + config_->rc_min_quantizer = + codec_.mode == VideoCodecMode::kScreensharing ? 8 : 2; + config_->rc_max_quantizer = 52; + config_->rc_undershoot_pct = 50; + config_->rc_overshoot_pct = 50; + config_->rc_buf_initial_sz = 500; + config_->rc_buf_optimal_sz = 600; + config_->rc_buf_sz = 1000; + // Set the maximum target size of any key-frame. + rc_max_intra_target_ = MaxIntraTarget(config_->rc_buf_optimal_sz); + // Key-frame interval is enforced manually by this wrapper. + config_->kf_mode = VPX_KF_DISABLED; + // TODO(webm:1592): work-around for libvpx issue, as it can still + // put some key-frames at will even in VPX_KF_DISABLED kf_mode. + config_->kf_max_dist = inst->VP9().keyFrameInterval; + config_->kf_min_dist = config_->kf_max_dist; + if (quality_scaler_experiment_.enabled) { + // In that experiment webrtc wide quality scaler is used instead of libvpx + // internal scaler. + config_->rc_resize_allowed = 0; + } else { + config_->rc_resize_allowed = inst->VP9().automaticResizeOn ? 1 : 0; + } + // Determine number of threads based on the image size and #cores. + config_->g_threads = + NumberOfThreads(config_->g_w, config_->g_h, settings.number_of_cores); + + is_flexible_mode_ = inst->VP9().flexibleMode; + + if (num_spatial_layers_ > 1 && + codec_.mode == VideoCodecMode::kScreensharing && !is_flexible_mode_) { + RTC_LOG(LS_ERROR) << "Flexible mode is required for screenshare with " + "several spatial layers"; + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + // External reference control is required for different frame rate on spatial + // layers because libvpx generates rtp incompatible references in this case. + external_ref_control_ = external_ref_ctrl_ || + (num_spatial_layers_ > 1 && + codec_.mode == VideoCodecMode::kScreensharing) || + inter_layer_pred_ == InterLayerPredMode::kOn; + + if (num_temporal_layers_ == 1) { + gof_.SetGofInfoVP9(kTemporalStructureMode1); + config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING; + config_->ts_number_layers = 1; + config_->ts_rate_decimator[0] = 1; + config_->ts_periodicity = 1; + config_->ts_layer_id[0] = 0; + } else if (num_temporal_layers_ == 2) { + gof_.SetGofInfoVP9(kTemporalStructureMode2); + config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0101; + config_->ts_number_layers = 2; + config_->ts_rate_decimator[0] = 2; + config_->ts_rate_decimator[1] = 1; + config_->ts_periodicity = 2; + config_->ts_layer_id[0] = 0; + config_->ts_layer_id[1] = 1; + } else if (num_temporal_layers_ == 3) { + gof_.SetGofInfoVP9(kTemporalStructureMode3); + config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0212; + config_->ts_number_layers = 3; + config_->ts_rate_decimator[0] = 4; + config_->ts_rate_decimator[1] = 2; + config_->ts_rate_decimator[2] = 1; + config_->ts_periodicity = 4; + config_->ts_layer_id[0] = 0; + config_->ts_layer_id[1] = 2; + config_->ts_layer_id[2] = 1; + config_->ts_layer_id[3] = 2; + } else { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + if (external_ref_control_) { + config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; + if (num_temporal_layers_ > 1 && num_spatial_layers_ > 1 && + codec_.mode == VideoCodecMode::kScreensharing) { + // External reference control for several temporal layers with different + // frame rates on spatial layers is not implemented yet. + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + } + ref_buf_ = {}; + + return InitAndSetControlSettings(inst); +} + +int LibvpxVp9Encoder::NumberOfThreads(int width, + int height, + int number_of_cores) { + // Keep the number of encoder threads equal to the possible number of column + // tiles, which is (1, 2, 4, 8). See comments below for VP9E_SET_TILE_COLUMNS. + if (width * height >= 1280 * 720 && number_of_cores > 4) { + return 4; + } else if (width * height >= 640 * 360 && number_of_cores > 2) { + return 2; + } else { +// Use 2 threads for low res on mobile ARM. +#ifdef MOBILE_ARM + if (width * height >= 320 * 180 && number_of_cores > 2) { + return 2; + } +#endif + // 1 thread less than VGA. + return 1; + } +} + +int LibvpxVp9Encoder::InitAndSetControlSettings(const VideoCodec* inst) { + // Set QP-min/max per spatial and temporal layer. + int tot_num_layers = num_spatial_layers_ * num_temporal_layers_; + for (int i = 0; i < tot_num_layers; ++i) { + svc_params_.max_quantizers[i] = config_->rc_max_quantizer; + svc_params_.min_quantizers[i] = config_->rc_min_quantizer; + } + config_->ss_number_layers = num_spatial_layers_; + if (svc_controller_) { + auto stream_config = svc_controller_->StreamConfig(); + for (int i = 0; i < stream_config.num_spatial_layers; ++i) { + svc_params_.scaling_factor_num[i] = stream_config.scaling_factor_num[i]; + svc_params_.scaling_factor_den[i] = stream_config.scaling_factor_den[i]; + } + } else if (ExplicitlyConfiguredSpatialLayers()) { + for (int i = 0; i < num_spatial_layers_; ++i) { + const auto& layer = codec_.spatialLayers[i]; + RTC_CHECK_GT(layer.width, 0); + const int scale_factor = codec_.width / layer.width; + RTC_DCHECK_GT(scale_factor, 0); + + // Ensure scaler factor is integer. + if (scale_factor * layer.width != codec_.width) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + // Ensure scale factor is the same in both dimensions. + if (scale_factor * layer.height != codec_.height) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + // Ensure scale factor is power of two. + const bool is_pow_of_two = (scale_factor & (scale_factor - 1)) == 0; + if (!is_pow_of_two) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + svc_params_.scaling_factor_num[i] = 1; + svc_params_.scaling_factor_den[i] = scale_factor; + + RTC_DCHECK_GT(codec_.spatialLayers[i].maxFramerate, 0); + RTC_DCHECK_LE(codec_.spatialLayers[i].maxFramerate, codec_.maxFramerate); + if (i > 0) { + // Frame rate of high spatial layer is supposed to be equal or higher + // than frame rate of low spatial layer. + RTC_DCHECK_GE(codec_.spatialLayers[i].maxFramerate, + codec_.spatialLayers[i - 1].maxFramerate); + } + } + } else { + int scaling_factor_num = 256; + for (int i = num_spatial_layers_ - 1; i >= 0; --i) { + // 1:2 scaling in each dimension. + svc_params_.scaling_factor_num[i] = scaling_factor_num; + svc_params_.scaling_factor_den[i] = 256; + } + } + + UpdatePerformanceFlags(); + RTC_DCHECK_EQ(performance_flags_by_spatial_index_.size(), + static_cast<size_t>(num_spatial_layers_)); + + SvcRateAllocator init_allocator(codec_); + current_bitrate_allocation_ = + init_allocator.Allocate(VideoBitrateAllocationParameters( + inst->startBitrate * 1000, inst->maxFramerate)); + if (!SetSvcRates(current_bitrate_allocation_)) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + const vpx_codec_err_t rv = libvpx_->codec_enc_init( + encoder_, vpx_codec_vp9_cx(), config_, + config_->g_bit_depth == VPX_BITS_8 ? 0 : VPX_CODEC_USE_HIGHBITDEPTH); + if (rv != VPX_CODEC_OK) { + RTC_LOG(LS_ERROR) << "Init error: " << libvpx_->codec_err_to_string(rv); + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + + if (performance_flags_.use_per_layer_speed) { + for (int si = 0; si < num_spatial_layers_; ++si) { + svc_params_.speed_per_layer[si] = + performance_flags_by_spatial_index_[si].base_layer_speed; + svc_params_.loopfilter_ctrl[si] = + performance_flags_by_spatial_index_[si].deblock_mode; + } + bool denoiser_on = + AllowDenoising() && inst->VP9().denoisingOn && + performance_flags_by_spatial_index_[num_spatial_layers_ - 1] + .allow_denoising; + libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY, + denoiser_on ? 1 : 0); + } + + libvpx_->codec_control(encoder_, VP8E_SET_MAX_INTRA_BITRATE_PCT, + rc_max_intra_target_); + libvpx_->codec_control(encoder_, VP9E_SET_AQ_MODE, + inst->VP9().adaptiveQpMode ? 3 : 0); + + libvpx_->codec_control(encoder_, VP9E_SET_FRAME_PARALLEL_DECODING, 0); + libvpx_->codec_control(encoder_, VP9E_SET_SVC_GF_TEMPORAL_REF, 0); + + if (is_svc_) { + libvpx_->codec_control(encoder_, VP9E_SET_SVC, 1); + libvpx_->codec_control(encoder_, VP9E_SET_SVC_PARAMETERS, &svc_params_); + } + if (!is_svc_ || !performance_flags_.use_per_layer_speed) { + libvpx_->codec_control( + encoder_, VP8E_SET_CPUUSED, + performance_flags_by_spatial_index_.rbegin()->base_layer_speed); + } + + if (num_spatial_layers_ > 1) { + switch (inter_layer_pred_) { + case InterLayerPredMode::kOn: + libvpx_->codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 0); + break; + case InterLayerPredMode::kOff: + libvpx_->codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 1); + break; + case InterLayerPredMode::kOnKeyPic: + libvpx_->codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 2); + break; + default: + RTC_DCHECK_NOTREACHED(); + } + + memset(&svc_drop_frame_, 0, sizeof(svc_drop_frame_)); + const bool reverse_constrained_drop_mode = + inter_layer_pred_ == InterLayerPredMode::kOn && + codec_.mode == VideoCodecMode::kScreensharing && + num_spatial_layers_ > 1; + if (reverse_constrained_drop_mode) { + // Screenshare dropping mode: drop a layer only together with all lower + // layers. This ensures that drops on lower layers won't reduce frame-rate + // for higher layers and reference structure is RTP-compatible. + svc_drop_frame_.framedrop_mode = CONSTRAINED_FROM_ABOVE_DROP; + svc_drop_frame_.max_consec_drop = 5; + for (size_t i = 0; i < num_spatial_layers_; ++i) { + svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh; + } + } else { + // Configure encoder to drop entire superframe whenever it needs to drop + // a layer. This mode is preferred over per-layer dropping which causes + // quality flickering and is not compatible with RTP non-flexible mode. + svc_drop_frame_.framedrop_mode = FULL_SUPERFRAME_DROP; + svc_drop_frame_.max_consec_drop = std::numeric_limits<int>::max(); + for (size_t i = 0; i < num_spatial_layers_; ++i) { + svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh; + } + } + libvpx_->codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER, + &svc_drop_frame_); + } + + // Register callback for getting each spatial layer. + vpx_codec_priv_output_cx_pkt_cb_pair_t cbp = { + LibvpxVp9Encoder::EncoderOutputCodedPacketCallback, + reinterpret_cast<void*>(this)}; + libvpx_->codec_control(encoder_, VP9E_REGISTER_CX_CALLBACK, + reinterpret_cast<void*>(&cbp)); + + // Control function to set the number of column tiles in encoding a frame, in + // log2 unit: e.g., 0 = 1 tile column, 1 = 2 tile columns, 2 = 4 tile columns. + // The number tile columns will be capped by the encoder based on image size + // (minimum width of tile column is 256 pixels, maximum is 4096). + libvpx_->codec_control(encoder_, VP9E_SET_TILE_COLUMNS, + static_cast<int>((config_->g_threads >> 1))); + + // Turn on row-based multithreading. + libvpx_->codec_control(encoder_, VP9E_SET_ROW_MT, 1); + + if (AllowDenoising() && !performance_flags_.use_per_layer_speed) { + libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY, + inst->VP9().denoisingOn ? 1 : 0); + } + + if (codec_.mode == VideoCodecMode::kScreensharing) { + // Adjust internal parameters to screen content. + libvpx_->codec_control(encoder_, VP9E_SET_TUNE_CONTENT, 1); + } + // Enable encoder skip of static/low content blocks. + libvpx_->codec_control(encoder_, VP8E_SET_STATIC_THRESHOLD, 1); + inited_ = true; + config_changed_ = true; + return WEBRTC_VIDEO_CODEC_OK; +} + +uint32_t LibvpxVp9Encoder::MaxIntraTarget(uint32_t optimal_buffer_size) { + // Set max to the optimal buffer level (normalized by target BR), + // and scaled by a scale_par. + // Max target size = scale_par * optimal_buffer_size * targetBR[Kbps]. + // This value is presented in percentage of perFrameBw: + // perFrameBw = targetBR[Kbps] * 1000 / framerate. + // The target in % is as follows: + float scale_par = 0.5; + uint32_t target_pct = + optimal_buffer_size * scale_par * codec_.maxFramerate / 10; + // Don't go below 3 times the per frame bandwidth. + const uint32_t min_intra_size = 300; + return (target_pct < min_intra_size) ? min_intra_size : target_pct; +} + +int LibvpxVp9Encoder::Encode(const VideoFrame& input_image, + const std::vector<VideoFrameType>* frame_types) { + if (!inited_) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + if (encoded_complete_callback_ == nullptr) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + if (num_active_spatial_layers_ == 0) { + // All spatial layers are disabled, return without encoding anything. + return WEBRTC_VIDEO_CODEC_OK; + } + + // We only support one stream at the moment. + if (frame_types && !frame_types->empty()) { + if ((*frame_types)[0] == VideoFrameType::kVideoFrameKey) { + force_key_frame_ = true; + } + } + + if (pics_since_key_ + 1 == + static_cast<size_t>(codec_.VP9()->keyFrameInterval)) { + force_key_frame_ = true; + } + + if (svc_controller_) { + layer_frames_ = svc_controller_->NextFrameConfig(force_key_frame_); + if (layer_frames_.empty()) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + if (layer_frames_.front().IsKeyframe()) { + force_key_frame_ = true; + } + } + + vpx_svc_layer_id_t layer_id = {0}; + if (!force_key_frame_) { + const size_t gof_idx = (pics_since_key_ + 1) % gof_.num_frames_in_gof; + layer_id.temporal_layer_id = gof_.temporal_idx[gof_idx]; + + if (codec_.mode == VideoCodecMode::kScreensharing) { + const uint32_t frame_timestamp_ms = + 1000 * input_image.timestamp() / kVideoPayloadTypeFrequency; + + // To ensure that several rate-limiters with different limits don't + // interfere, they must be queried in order of increasing limit. + + bool use_steady_state_limiter = + variable_framerate_experiment_.enabled && + input_image.update_rect().IsEmpty() && + num_steady_state_frames_ >= + variable_framerate_experiment_.frames_before_steady_state; + + // Need to check all frame limiters, even if lower layers are disabled, + // because variable frame-rate limiter should be checked after the first + // layer. It's easier to overwrite active layers after, then check all + // cases. + for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) { + const float layer_fps = + framerate_controller_[layer_id.spatial_layer_id].GetTargetRate(); + // Use steady state rate-limiter at the correct place. + if (use_steady_state_limiter && + layer_fps > variable_framerate_experiment_.framerate_limit - 1e-9) { + if (variable_framerate_controller_.DropFrame(frame_timestamp_ms)) { + layer_id.spatial_layer_id = num_active_spatial_layers_; + } + // Break always: if rate limiter triggered frame drop, no need to + // continue; otherwise, the rate is less than the next limiters. + break; + } + if (framerate_controller_[sl_idx].DropFrame(frame_timestamp_ms)) { + ++layer_id.spatial_layer_id; + } else { + break; + } + } + + if (use_steady_state_limiter && + layer_id.spatial_layer_id < num_active_spatial_layers_) { + variable_framerate_controller_.AddFrame(frame_timestamp_ms); + } + } + + if (force_all_active_layers_) { + layer_id.spatial_layer_id = first_active_layer_; + force_all_active_layers_ = false; + } + + RTC_DCHECK_LE(layer_id.spatial_layer_id, num_active_spatial_layers_); + if (layer_id.spatial_layer_id >= num_active_spatial_layers_) { + // Drop entire picture. + return WEBRTC_VIDEO_CODEC_OK; + } + } + + // Need to set temporal layer id on ALL layers, even disabled ones. + // Otherwise libvpx might produce frames on a disabled layer: + // http://crbug.com/1051476 + for (int sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) { + layer_id.temporal_layer_id_per_spatial[sl_idx] = layer_id.temporal_layer_id; + } + + if (layer_id.spatial_layer_id < first_active_layer_) { + layer_id.spatial_layer_id = first_active_layer_; + } + + if (svc_controller_) { + layer_id.spatial_layer_id = layer_frames_.front().SpatialId(); + layer_id.temporal_layer_id = layer_frames_.front().TemporalId(); + for (const auto& layer : layer_frames_) { + layer_id.temporal_layer_id_per_spatial[layer.SpatialId()] = + layer.TemporalId(); + } + SetActiveSpatialLayers(); + } + + if (is_svc_ && performance_flags_.use_per_layer_speed) { + // Update speed settings that might depend on temporal index. + bool speed_updated = false; + for (int sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) { + const int target_speed = + layer_id.temporal_layer_id_per_spatial[sl_idx] == 0 + ? performance_flags_by_spatial_index_[sl_idx].base_layer_speed + : performance_flags_by_spatial_index_[sl_idx].high_layer_speed; + if (svc_params_.speed_per_layer[sl_idx] != target_speed) { + svc_params_.speed_per_layer[sl_idx] = target_speed; + speed_updated = true; + } + } + if (speed_updated) { + libvpx_->codec_control(encoder_, VP9E_SET_SVC_PARAMETERS, &svc_params_); + } + } + + libvpx_->codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id); + + if (num_spatial_layers_ > 1) { + // Update frame dropping settings as they may change on per-frame basis. + libvpx_->codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER, + &svc_drop_frame_); + } + + if (config_changed_) { + if (libvpx_->codec_enc_config_set(encoder_, config_)) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + + if (!performance_flags_.use_per_layer_speed) { + // Not setting individual speeds per layer, find the highest active + // resolution instead and base the speed on that. + for (int i = num_spatial_layers_ - 1; i >= 0; --i) { + if (config_->ss_target_bitrate[i] > 0) { + int width = (svc_params_.scaling_factor_num[i] * config_->g_w) / + svc_params_.scaling_factor_den[i]; + int height = (svc_params_.scaling_factor_num[i] * config_->g_h) / + svc_params_.scaling_factor_den[i]; + int speed = + std::prev(performance_flags_.settings_by_resolution.lower_bound( + width * height)) + ->second.base_layer_speed; + libvpx_->codec_control(encoder_, VP8E_SET_CPUUSED, speed); + break; + } + } + } + config_changed_ = false; + } + + if (input_image.width() != codec_.width || + input_image.height() != codec_.height) { + int ret = UpdateCodecFrameSize(input_image); + if (ret < 0) { + return ret; + } + } + + RTC_DCHECK_EQ(input_image.width(), raw_->d_w); + RTC_DCHECK_EQ(input_image.height(), raw_->d_h); + + // Set input image for use in the callback. + // This was necessary since you need some information from input_image. + // You can save only the necessary information (such as timestamp) instead of + // doing this. + input_image_ = &input_image; + + // In case we need to map the buffer, `mapped_buffer` is used to keep it alive + // through reference counting until after encoding has finished. + rtc::scoped_refptr<const VideoFrameBuffer> mapped_buffer; + const I010BufferInterface* i010_buffer; + rtc::scoped_refptr<const I010BufferInterface> i010_copy; + switch (profile_) { + case VP9Profile::kProfile0: { + mapped_buffer = + PrepareBufferForProfile0(input_image.video_frame_buffer()); + if (!mapped_buffer) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + break; + } + case VP9Profile::kProfile1: { + RTC_DCHECK_NOTREACHED(); + break; + } + case VP9Profile::kProfile2: { + // We can inject kI010 frames directly for encode. All other formats + // should be converted to it. + switch (input_image.video_frame_buffer()->type()) { + case VideoFrameBuffer::Type::kI010: { + i010_buffer = input_image.video_frame_buffer()->GetI010(); + break; + } + default: { + auto i420_buffer = input_image.video_frame_buffer()->ToI420(); + if (!i420_buffer) { + RTC_LOG(LS_ERROR) << "Failed to convert " + << VideoFrameBufferTypeToString( + input_image.video_frame_buffer()->type()) + << " image to I420. Can't encode frame."; + return WEBRTC_VIDEO_CODEC_ERROR; + } + i010_copy = I010Buffer::Copy(*i420_buffer); + i010_buffer = i010_copy.get(); + } + } + raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>( + reinterpret_cast<const uint8_t*>(i010_buffer->DataY())); + raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>( + reinterpret_cast<const uint8_t*>(i010_buffer->DataU())); + raw_->planes[VPX_PLANE_V] = const_cast<uint8_t*>( + reinterpret_cast<const uint8_t*>(i010_buffer->DataV())); + raw_->stride[VPX_PLANE_Y] = i010_buffer->StrideY() * 2; + raw_->stride[VPX_PLANE_U] = i010_buffer->StrideU() * 2; + raw_->stride[VPX_PLANE_V] = i010_buffer->StrideV() * 2; + break; + } + case VP9Profile::kProfile3: { + RTC_DCHECK_NOTREACHED(); + break; + } + } + + vpx_enc_frame_flags_t flags = 0; + if (force_key_frame_) { + flags = VPX_EFLAG_FORCE_KF; + } + + if (svc_controller_) { + vpx_svc_ref_frame_config_t ref_config = Vp9References(layer_frames_); + libvpx_->codec_control(encoder_, VP9E_SET_SVC_REF_FRAME_CONFIG, + &ref_config); + } else if (external_ref_control_) { + vpx_svc_ref_frame_config_t ref_config = + SetReferences(force_key_frame_, layer_id.spatial_layer_id); + + if (VideoCodecMode::kScreensharing == codec_.mode) { + for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) { + ref_config.duration[sl_idx] = static_cast<int64_t>( + 90000 / (std::min(static_cast<float>(codec_.maxFramerate), + framerate_controller_[sl_idx].GetTargetRate()))); + } + } + + libvpx_->codec_control(encoder_, VP9E_SET_SVC_REF_FRAME_CONFIG, + &ref_config); + } + + first_frame_in_picture_ = true; + + // TODO(ssilkin): Frame duration should be specified per spatial layer + // since their frame rate can be different. For now calculate frame duration + // based on target frame rate of the highest spatial layer, which frame rate + // is supposed to be equal or higher than frame rate of low spatial layers. + // Also, timestamp should represent actual time passed since previous frame + // (not 'expected' time). Then rate controller can drain buffer more + // accurately. + RTC_DCHECK_GE(framerate_controller_.size(), num_active_spatial_layers_); + float target_framerate_fps = + (codec_.mode == VideoCodecMode::kScreensharing) + ? std::min(static_cast<float>(codec_.maxFramerate), + framerate_controller_[num_active_spatial_layers_ - 1] + .GetTargetRate()) + : codec_.maxFramerate; + uint32_t duration = static_cast<uint32_t>(90000 / target_framerate_fps); + const vpx_codec_err_t rv = libvpx_->codec_encode( + encoder_, raw_, timestamp_, duration, flags, VPX_DL_REALTIME); + if (rv != VPX_CODEC_OK) { + RTC_LOG(LS_ERROR) << "Encoding error: " << libvpx_->codec_err_to_string(rv) + << "\n" + "Details: " + << libvpx_->codec_error(encoder_) << "\n" + << libvpx_->codec_error_detail(encoder_); + return WEBRTC_VIDEO_CODEC_ERROR; + } + timestamp_ += duration; + + return WEBRTC_VIDEO_CODEC_OK; +} + +int LibvpxVp9Encoder::UpdateCodecFrameSize( + const VideoFrame& input_image) { + RTC_LOG(LS_INFO) << "Reconfiging VP from " << + codec_.width << "x" << codec_.height << " to " << + input_image.width() << "x" << input_image.height(); + // Preserve latest bitrate/framerate setting + // TODO: Mozilla - see below, we need to save more state here. + //uint32_t old_bitrate_kbit = config_->rc_target_bitrate; + //uint32_t old_framerate = codec_.maxFramerate; + + codec_.width = input_image.width(); + codec_.height = input_image.height(); + + vpx_img_free(raw_); + raw_ = vpx_img_wrap(NULL, VPX_IMG_FMT_I420, codec_.width, codec_.height, + 1, NULL); + // Update encoder context for new frame size. + config_->g_w = codec_.width; + config_->g_h = codec_.height; + + // Determine number of threads based on the image size and #cores. + config_->g_threads = NumberOfThreads(codec_.width, codec_.height, + num_cores_); + + // NOTE: We would like to do this the same way vp8 does it + // (with vpx_codec_enc_config_set()), but that causes asserts + // in AQ 3 (cyclic); and in AQ 0 it works, but on a resize to smaller + // than 1/2 x 1/2 original it asserts in convolve(). Given these + // bugs in trying to do it the "right" way, we basically re-do + // the initialization. + vpx_codec_destroy(encoder_); // clean up old state + int result = InitAndSetControlSettings(&codec_); + if (result == WEBRTC_VIDEO_CODEC_OK) { + // TODO: Mozilla rates have become much more complicated, we need to store + // more state or find another way of doing this. + //return SetRates(old_bitrate_kbit, old_framerate); + RTC_CHECK(false); + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + return result; +} + +bool LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, + absl::optional<int>* spatial_idx, + absl::optional<int>* temporal_idx, + const vpx_codec_cx_pkt& pkt) { + RTC_CHECK(codec_specific != nullptr); + codec_specific->codecType = kVideoCodecVP9; + CodecSpecificInfoVP9* vp9_info = &(codec_specific->codecSpecific.VP9); + + vp9_info->first_frame_in_picture = first_frame_in_picture_; + vp9_info->flexible_mode = is_flexible_mode_; + + if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) { + pics_since_key_ = 0; + } else if (first_frame_in_picture_) { + ++pics_since_key_; + } + + vpx_svc_layer_id_t layer_id = {0}; + libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); + + // Can't have keyframe with non-zero temporal layer. + RTC_DCHECK(pics_since_key_ != 0 || layer_id.temporal_layer_id == 0); + + RTC_CHECK_GT(num_temporal_layers_, 0); + RTC_CHECK_GT(num_active_spatial_layers_, 0); + if (num_temporal_layers_ == 1) { + RTC_CHECK_EQ(layer_id.temporal_layer_id, 0); + vp9_info->temporal_idx = kNoTemporalIdx; + *temporal_idx = absl::nullopt; + } else { + vp9_info->temporal_idx = layer_id.temporal_layer_id; + *temporal_idx = layer_id.temporal_layer_id; + } + if (num_active_spatial_layers_ == 1) { + RTC_CHECK_EQ(layer_id.spatial_layer_id, 0); + *spatial_idx = absl::nullopt; + } else { + *spatial_idx = layer_id.spatial_layer_id; + } + + const bool is_key_pic = (pics_since_key_ == 0); + const bool is_inter_layer_pred_allowed = + (inter_layer_pred_ == InterLayerPredMode::kOn || + (inter_layer_pred_ == InterLayerPredMode::kOnKeyPic && is_key_pic)); + + // Always set inter_layer_predicted to true on high layer frame if inter-layer + // prediction (ILP) is allowed even if encoder didn't actually use it. + // Setting inter_layer_predicted to false would allow receiver to decode high + // layer frame without decoding low layer frame. If that would happen (e.g. + // if low layer frame is lost) then receiver won't be able to decode next high + // layer frame which uses ILP. + vp9_info->inter_layer_predicted = + first_frame_in_picture_ ? false : is_inter_layer_pred_allowed; + + // Mark all low spatial layer frames as references (not just frames of + // active low spatial layers) if inter-layer prediction is enabled since + // these frames are indirect references of high spatial layer, which can + // later be enabled without key frame. + vp9_info->non_ref_for_inter_layer_pred = + !is_inter_layer_pred_allowed || + layer_id.spatial_layer_id + 1 == num_spatial_layers_; + + // Always populate this, so that the packetizer can properly set the marker + // bit. + vp9_info->num_spatial_layers = num_active_spatial_layers_; + vp9_info->first_active_layer = first_active_layer_; + + vp9_info->num_ref_pics = 0; + FillReferenceIndices(pkt, pics_since_key_, vp9_info->inter_layer_predicted, + vp9_info); + if (vp9_info->flexible_mode) { + vp9_info->gof_idx = kNoGofIdx; + if (!svc_controller_) { + if (num_temporal_layers_ == 1) { + vp9_info->temporal_up_switch = true; + } else { + // In flexible mode with > 1 temporal layer but no SVC controller we + // can't techincally determine if a frame is an upswitch point, use + // gof-based data as proxy for now. + // TODO(sprang): Remove once SVC controller is the only choice. + vp9_info->gof_idx = + static_cast<uint8_t>(pics_since_key_ % gof_.num_frames_in_gof); + vp9_info->temporal_up_switch = + gof_.temporal_up_switch[vp9_info->gof_idx]; + } + } + } else { + vp9_info->gof_idx = + static_cast<uint8_t>(pics_since_key_ % gof_.num_frames_in_gof); + vp9_info->temporal_up_switch = gof_.temporal_up_switch[vp9_info->gof_idx]; + RTC_DCHECK(vp9_info->num_ref_pics == gof_.num_ref_pics[vp9_info->gof_idx] || + vp9_info->num_ref_pics == 0); + } + + vp9_info->inter_pic_predicted = (!is_key_pic && vp9_info->num_ref_pics > 0); + + // Write SS on key frame of independently coded spatial layers and on base + // temporal/spatial layer frame if number of layers changed without issuing + // of key picture (inter-layer prediction is enabled). + const bool is_key_frame = is_key_pic && !vp9_info->inter_layer_predicted; + if (is_key_frame || (ss_info_needed_ && layer_id.temporal_layer_id == 0 && + layer_id.spatial_layer_id == first_active_layer_)) { + vp9_info->ss_data_available = true; + vp9_info->spatial_layer_resolution_present = true; + // Signal disabled layers. + for (size_t i = 0; i < first_active_layer_; ++i) { + vp9_info->width[i] = 0; + vp9_info->height[i] = 0; + } + for (size_t i = first_active_layer_; i < num_active_spatial_layers_; ++i) { + vp9_info->width[i] = codec_.width * svc_params_.scaling_factor_num[i] / + svc_params_.scaling_factor_den[i]; + vp9_info->height[i] = codec_.height * svc_params_.scaling_factor_num[i] / + svc_params_.scaling_factor_den[i]; + } + if (vp9_info->flexible_mode) { + vp9_info->gof.num_frames_in_gof = 0; + } else { + vp9_info->gof.CopyGofInfoVP9(gof_); + } + + ss_info_needed_ = false; + } else { + vp9_info->ss_data_available = false; + } + + first_frame_in_picture_ = false; + + // Populate codec-agnostic section in the codec specific structure. + if (svc_controller_) { + auto it = absl::c_find_if( + layer_frames_, + [&](const ScalableVideoController::LayerFrameConfig& config) { + return config.SpatialId() == layer_id.spatial_layer_id; + }); + if (it == layer_frames_.end()) { + RTC_LOG(LS_ERROR) << "Encoder produced a frame for layer S" + << layer_id.spatial_layer_id << "T" + << layer_id.temporal_layer_id + << " that wasn't requested."; + return false; + } + codec_specific->generic_frame_info = svc_controller_->OnEncodeDone(*it); + if (is_key_frame) { + codec_specific->template_structure = + svc_controller_->DependencyStructure(); + auto& resolutions = codec_specific->template_structure->resolutions; + resolutions.resize(num_spatial_layers_); + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + resolutions[sid] = RenderResolution( + /*width=*/codec_.width * svc_params_.scaling_factor_num[sid] / + svc_params_.scaling_factor_den[sid], + /*height=*/codec_.height * svc_params_.scaling_factor_num[sid] / + svc_params_.scaling_factor_den[sid]); + } + } + if (is_flexible_mode_) { + // Populate data for legacy temporal-upswitch state. + // We can switch up to a higher temporal layer only if all temporal layers + // higher than this (within the current spatial layer) are switch points. + vp9_info->temporal_up_switch = true; + for (int i = layer_id.temporal_layer_id + 1; i < num_temporal_layers_; + ++i) { + // Assumes decode targets are always ordered first by spatial then by + // temporal id. + size_t dti_index = + (layer_id.spatial_layer_id * num_temporal_layers_) + i; + vp9_info->temporal_up_switch &= + (codec_specific->generic_frame_info + ->decode_target_indications[dti_index] == + DecodeTargetIndication::kSwitch); + } + } + } + codec_specific->scalability_mode = scalability_mode_; + return true; +} + +void LibvpxVp9Encoder::FillReferenceIndices(const vpx_codec_cx_pkt& pkt, + const size_t pic_num, + const bool inter_layer_predicted, + CodecSpecificInfoVP9* vp9_info) { + vpx_svc_layer_id_t layer_id = {0}; + libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); + + const bool is_key_frame = + (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? true : false; + + std::vector<RefFrameBuffer> ref_buf_list; + + if (is_svc_) { + vpx_svc_ref_frame_config_t enc_layer_conf = {{0}}; + libvpx_->codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG, + &enc_layer_conf); + char ref_buf_flags[] = "00000000"; + // There should be one character per buffer + 1 termination '\0'. + static_assert(sizeof(ref_buf_flags) == kNumVp9Buffers + 1); + + if (enc_layer_conf.reference_last[layer_id.spatial_layer_id]) { + const size_t fb_idx = + enc_layer_conf.lst_fb_idx[layer_id.spatial_layer_id]; + RTC_DCHECK_LT(fb_idx, ref_buf_.size()); + if (std::find(ref_buf_list.begin(), ref_buf_list.end(), + ref_buf_[fb_idx]) == ref_buf_list.end()) { + ref_buf_list.push_back(ref_buf_[fb_idx]); + ref_buf_flags[fb_idx] = '1'; + } + } + + if (enc_layer_conf.reference_alt_ref[layer_id.spatial_layer_id]) { + const size_t fb_idx = + enc_layer_conf.alt_fb_idx[layer_id.spatial_layer_id]; + RTC_DCHECK_LT(fb_idx, ref_buf_.size()); + if (std::find(ref_buf_list.begin(), ref_buf_list.end(), + ref_buf_[fb_idx]) == ref_buf_list.end()) { + ref_buf_list.push_back(ref_buf_[fb_idx]); + ref_buf_flags[fb_idx] = '1'; + } + } + + if (enc_layer_conf.reference_golden[layer_id.spatial_layer_id]) { + const size_t fb_idx = + enc_layer_conf.gld_fb_idx[layer_id.spatial_layer_id]; + RTC_DCHECK_LT(fb_idx, ref_buf_.size()); + if (std::find(ref_buf_list.begin(), ref_buf_list.end(), + ref_buf_[fb_idx]) == ref_buf_list.end()) { + ref_buf_list.push_back(ref_buf_[fb_idx]); + ref_buf_flags[fb_idx] = '1'; + } + } + + RTC_LOG(LS_VERBOSE) << "Frame " << pic_num << " sl " + << layer_id.spatial_layer_id << " tl " + << layer_id.temporal_layer_id << " refered buffers " + << ref_buf_flags; + + } else if (!is_key_frame) { + RTC_DCHECK_EQ(num_spatial_layers_, 1); + RTC_DCHECK_EQ(num_temporal_layers_, 1); + // In non-SVC mode encoder doesn't provide reference list. Assume each frame + // refers previous one, which is stored in buffer 0. + ref_buf_list.push_back(ref_buf_[0]); + } + + std::vector<size_t> ref_pid_list; + + vp9_info->num_ref_pics = 0; + for (const RefFrameBuffer& ref_buf : ref_buf_list) { + RTC_DCHECK_LE(ref_buf.pic_num, pic_num); + if (ref_buf.pic_num < pic_num) { + if (inter_layer_pred_ != InterLayerPredMode::kOn) { + // RTP spec limits temporal prediction to the same spatial layer. + // It is safe to ignore this requirement if inter-layer prediction is + // enabled for all frames when all base frames are relayed to receiver. + RTC_DCHECK_EQ(ref_buf.spatial_layer_id, layer_id.spatial_layer_id); + } else { + RTC_DCHECK_LE(ref_buf.spatial_layer_id, layer_id.spatial_layer_id); + } + RTC_DCHECK_LE(ref_buf.temporal_layer_id, layer_id.temporal_layer_id); + + // Encoder may reference several spatial layers on the same previous + // frame in case if some spatial layers are skipped on the current frame. + // We shouldn't put duplicate references as it may break some old + // clients and isn't RTP compatible. + if (std::find(ref_pid_list.begin(), ref_pid_list.end(), + ref_buf.pic_num) != ref_pid_list.end()) { + continue; + } + ref_pid_list.push_back(ref_buf.pic_num); + + const size_t p_diff = pic_num - ref_buf.pic_num; + RTC_DCHECK_LE(p_diff, 127UL); + + vp9_info->p_diff[vp9_info->num_ref_pics] = static_cast<uint8_t>(p_diff); + ++vp9_info->num_ref_pics; + } else { + RTC_DCHECK(inter_layer_predicted); + // RTP spec only allows to use previous spatial layer for inter-layer + // prediction. + RTC_DCHECK_EQ(ref_buf.spatial_layer_id + 1, layer_id.spatial_layer_id); + } + } +} + +void LibvpxVp9Encoder::UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt, + const size_t pic_num) { + vpx_svc_layer_id_t layer_id = {0}; + libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); + + RefFrameBuffer frame_buf = {.pic_num = pic_num, + .spatial_layer_id = layer_id.spatial_layer_id, + .temporal_layer_id = layer_id.temporal_layer_id}; + + if (is_svc_) { + vpx_svc_ref_frame_config_t enc_layer_conf = {{0}}; + libvpx_->codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG, + &enc_layer_conf); + const int update_buffer_slot = + enc_layer_conf.update_buffer_slot[layer_id.spatial_layer_id]; + + for (size_t i = 0; i < ref_buf_.size(); ++i) { + if (update_buffer_slot & (1 << i)) { + ref_buf_[i] = frame_buf; + } + } + + RTC_LOG(LS_VERBOSE) << "Frame " << pic_num << " sl " + << layer_id.spatial_layer_id << " tl " + << layer_id.temporal_layer_id << " updated buffers " + << (update_buffer_slot & (1 << 0) ? 1 : 0) + << (update_buffer_slot & (1 << 1) ? 1 : 0) + << (update_buffer_slot & (1 << 2) ? 1 : 0) + << (update_buffer_slot & (1 << 3) ? 1 : 0) + << (update_buffer_slot & (1 << 4) ? 1 : 0) + << (update_buffer_slot & (1 << 5) ? 1 : 0) + << (update_buffer_slot & (1 << 6) ? 1 : 0) + << (update_buffer_slot & (1 << 7) ? 1 : 0); + } else { + RTC_DCHECK_EQ(num_spatial_layers_, 1); + RTC_DCHECK_EQ(num_temporal_layers_, 1); + // In non-svc mode encoder doesn't provide reference list. Assume each frame + // is reference and stored in buffer 0. + ref_buf_[0] = frame_buf; + } +} + +vpx_svc_ref_frame_config_t LibvpxVp9Encoder::SetReferences( + bool is_key_pic, + int first_active_spatial_layer_id) { + // kRefBufIdx, kUpdBufIdx need to be updated to support longer GOFs. + RTC_DCHECK_LE(gof_.num_frames_in_gof, 4); + + vpx_svc_ref_frame_config_t ref_config; + memset(&ref_config, 0, sizeof(ref_config)); + + const size_t num_temporal_refs = std::max(1, num_temporal_layers_ - 1); + const bool is_inter_layer_pred_allowed = + inter_layer_pred_ == InterLayerPredMode::kOn || + (inter_layer_pred_ == InterLayerPredMode::kOnKeyPic && is_key_pic); + absl::optional<int> last_updated_buf_idx; + + // Put temporal reference to LAST and spatial reference to GOLDEN. Update + // frame buffer (i.e. store encoded frame) if current frame is a temporal + // reference (i.e. it belongs to a low temporal layer) or it is a spatial + // reference. In later case, always store spatial reference in the last + // reference frame buffer. + // For the case of 3 temporal and 3 spatial layers we need 6 frame buffers + // for temporal references plus 1 buffer for spatial reference. 7 buffers + // in total. + + for (int sl_idx = first_active_spatial_layer_id; + sl_idx < num_active_spatial_layers_; ++sl_idx) { + const size_t curr_pic_num = is_key_pic ? 0 : pics_since_key_ + 1; + const size_t gof_idx = curr_pic_num % gof_.num_frames_in_gof; + + if (!is_key_pic) { + // Set up temporal reference. + const int buf_idx = sl_idx * num_temporal_refs + kRefBufIdx[gof_idx]; + + // Last reference frame buffer is reserved for spatial reference. It is + // not supposed to be used for temporal prediction. + RTC_DCHECK_LT(buf_idx, kNumVp9Buffers - 1); + + const int pid_diff = curr_pic_num - ref_buf_[buf_idx].pic_num; + // Incorrect spatial layer may be in the buffer due to a key-frame. + const bool same_spatial_layer = + ref_buf_[buf_idx].spatial_layer_id == sl_idx; + bool correct_pid = false; + if (is_flexible_mode_) { + correct_pid = pid_diff > 0 && pid_diff < kMaxAllowedPidDiff; + } else { + // Below code assumes single temporal referecence. + RTC_DCHECK_EQ(gof_.num_ref_pics[gof_idx], 1); + correct_pid = pid_diff == gof_.pid_diff[gof_idx][0]; + } + + if (same_spatial_layer && correct_pid) { + ref_config.lst_fb_idx[sl_idx] = buf_idx; + ref_config.reference_last[sl_idx] = 1; + } else { + // This reference doesn't match with one specified by GOF. This can + // only happen if spatial layer is enabled dynamically without key + // frame. Spatial prediction is supposed to be enabled in this case. + RTC_DCHECK(is_inter_layer_pred_allowed && + sl_idx > first_active_spatial_layer_id); + } + } + + if (is_inter_layer_pred_allowed && sl_idx > first_active_spatial_layer_id) { + // Set up spatial reference. + RTC_DCHECK(last_updated_buf_idx); + ref_config.gld_fb_idx[sl_idx] = *last_updated_buf_idx; + ref_config.reference_golden[sl_idx] = 1; + } else { + RTC_DCHECK(ref_config.reference_last[sl_idx] != 0 || + sl_idx == first_active_spatial_layer_id || + inter_layer_pred_ == InterLayerPredMode::kOff); + } + + last_updated_buf_idx.reset(); + + if (gof_.temporal_idx[gof_idx] < num_temporal_layers_ - 1 || + num_temporal_layers_ == 1) { + last_updated_buf_idx = sl_idx * num_temporal_refs + kUpdBufIdx[gof_idx]; + + // Ensure last frame buffer is not used for temporal prediction (it is + // reserved for spatial reference). + RTC_DCHECK_LT(*last_updated_buf_idx, kNumVp9Buffers - 1); + } else if (is_inter_layer_pred_allowed) { + last_updated_buf_idx = kNumVp9Buffers - 1; + } + + if (last_updated_buf_idx) { + ref_config.update_buffer_slot[sl_idx] = 1 << *last_updated_buf_idx; + } + } + + return ref_config; +} + +void LibvpxVp9Encoder::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) { + RTC_DCHECK_EQ(pkt->kind, VPX_CODEC_CX_FRAME_PKT); + + if (pkt->data.frame.sz == 0) { + // Ignore dropped frame. + return; + } + + vpx_svc_layer_id_t layer_id = {0}; + libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); + + encoded_image_.SetEncodedData(EncodedImageBuffer::Create( + static_cast<const uint8_t*>(pkt->data.frame.buf), pkt->data.frame.sz)); + + codec_specific_ = {}; + absl::optional<int> spatial_index; + absl::optional<int> temporal_index; + if (!PopulateCodecSpecific(&codec_specific_, &spatial_index, &temporal_index, + *pkt)) { + // Drop the frame. + encoded_image_.set_size(0); + return; + } + encoded_image_.SetSpatialIndex(spatial_index); + encoded_image_.SetTemporalIndex(temporal_index); + + const bool is_key_frame = + ((pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false) && + !codec_specific_.codecSpecific.VP9.inter_layer_predicted; + + // Ensure encoder issued key frame on request. + RTC_DCHECK(is_key_frame || !force_key_frame_); + + // Check if encoded frame is a key frame. + encoded_image_._frameType = VideoFrameType::kVideoFrameDelta; + if (is_key_frame) { + encoded_image_._frameType = VideoFrameType::kVideoFrameKey; + force_key_frame_ = false; + } + + UpdateReferenceBuffers(*pkt, pics_since_key_); + + TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_.size()); + encoded_image_.SetRtpTimestamp(input_image_->timestamp()); + encoded_image_.SetCaptureTimeIdentifier( + input_image_->capture_time_identifier()); + encoded_image_.SetColorSpace(input_image_->color_space()); + encoded_image_._encodedHeight = + pkt->data.frame.height[layer_id.spatial_layer_id]; + encoded_image_._encodedWidth = + pkt->data.frame.width[layer_id.spatial_layer_id]; + int qp = -1; + libvpx_->codec_control(encoder_, VP8E_GET_LAST_QUANTIZER, &qp); + encoded_image_.qp_ = qp; + + const bool end_of_picture = encoded_image_.SpatialIndex().value_or(0) + 1 == + num_active_spatial_layers_; + DeliverBufferedFrame(end_of_picture); +} + +void LibvpxVp9Encoder::DeliverBufferedFrame(bool end_of_picture) { + if (encoded_image_.size() > 0) { + if (num_spatial_layers_ > 1) { + // Restore frame dropping settings, as dropping may be temporary forbidden + // due to dynamically enabled layers. + for (size_t i = 0; i < num_spatial_layers_; ++i) { + svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh; + } + } + + codec_specific_.end_of_picture = end_of_picture; + + encoded_complete_callback_->OnEncodedImage(encoded_image_, + &codec_specific_); + + if (codec_.mode == VideoCodecMode::kScreensharing) { + const uint8_t spatial_idx = encoded_image_.SpatialIndex().value_or(0); + const uint32_t frame_timestamp_ms = + 1000 * encoded_image_.RtpTimestamp() / kVideoPayloadTypeFrequency; + framerate_controller_[spatial_idx].AddFrame(frame_timestamp_ms); + + const size_t steady_state_size = SteadyStateSize( + spatial_idx, codec_specific_.codecSpecific.VP9.temporal_idx); + + // Only frames on spatial layers, which may be limited in a steady state + // are considered for steady state detection. + if (framerate_controller_[spatial_idx].GetTargetRate() > + variable_framerate_experiment_.framerate_limit + 1e-9) { + if (encoded_image_.qp_ <= + variable_framerate_experiment_.steady_state_qp && + encoded_image_.size() <= steady_state_size) { + ++num_steady_state_frames_; + } else { + num_steady_state_frames_ = 0; + } + } + } + encoded_image_.set_size(0); + } +} + +int LibvpxVp9Encoder::RegisterEncodeCompleteCallback( + EncodedImageCallback* callback) { + encoded_complete_callback_ = callback; + return WEBRTC_VIDEO_CODEC_OK; +} + +VideoEncoder::EncoderInfo LibvpxVp9Encoder::GetEncoderInfo() const { + EncoderInfo info; + info.supports_native_handle = false; + info.implementation_name = "libvpx"; + if (quality_scaler_experiment_.enabled && inited_ && + codec_.VP9().automaticResizeOn) { + info.scaling_settings = VideoEncoder::ScalingSettings( + quality_scaler_experiment_.low_qp, quality_scaler_experiment_.high_qp); + } else { + info.scaling_settings = VideoEncoder::ScalingSettings::kOff; + } + info.has_trusted_rate_controller = trusted_rate_controller_; + info.is_hardware_accelerated = false; + if (inited_) { + // Find the max configured fps of any active spatial layer. + float max_fps = 0.0; + for (size_t si = 0; si < num_spatial_layers_; ++si) { + if (codec_.spatialLayers[si].active && + codec_.spatialLayers[si].maxFramerate > max_fps) { + max_fps = codec_.spatialLayers[si].maxFramerate; + } + } + + for (size_t si = 0; si < num_spatial_layers_; ++si) { + info.fps_allocation[si].clear(); + if (!codec_.spatialLayers[si].active) { + continue; + } + + // This spatial layer may already use a fraction of the total frame rate. + const float sl_fps_fraction = + codec_.spatialLayers[si].maxFramerate / max_fps; + for (size_t ti = 0; ti < num_temporal_layers_; ++ti) { + const uint32_t decimator = + num_temporal_layers_ <= 1 ? 1 : config_->ts_rate_decimator[ti]; + RTC_DCHECK_GT(decimator, 0); + info.fps_allocation[si].push_back( + rtc::saturated_cast<uint8_t>(EncoderInfo::kMaxFramerateFraction * + (sl_fps_fraction / decimator))); + } + } + if (profile_ == VP9Profile::kProfile0) { + info.preferred_pixel_formats = {VideoFrameBuffer::Type::kI420, + VideoFrameBuffer::Type::kNV12}; + } + } + if (!encoder_info_override_.resolution_bitrate_limits().empty()) { + info.resolution_bitrate_limits = + encoder_info_override_.resolution_bitrate_limits(); + } + return info; +} + +size_t LibvpxVp9Encoder::SteadyStateSize(int sid, int tid) { + const size_t bitrate_bps = current_bitrate_allocation_.GetBitrate( + sid, tid == kNoTemporalIdx ? 0 : tid); + const float fps = (codec_.mode == VideoCodecMode::kScreensharing) + ? std::min(static_cast<float>(codec_.maxFramerate), + framerate_controller_[sid].GetTargetRate()) + : codec_.maxFramerate; + return static_cast<size_t>( + bitrate_bps / (8 * fps) * + (100 - + variable_framerate_experiment_.steady_state_undershoot_percentage) / + 100 + + 0.5); +} + +// static +LibvpxVp9Encoder::VariableFramerateExperiment +LibvpxVp9Encoder::ParseVariableFramerateConfig(const FieldTrialsView& trials) { + FieldTrialFlag enabled = FieldTrialFlag("Enabled"); + FieldTrialParameter<double> framerate_limit("min_fps", 5.0); + FieldTrialParameter<int> qp("min_qp", 32); + FieldTrialParameter<int> undershoot_percentage("undershoot", 30); + FieldTrialParameter<int> frames_before_steady_state( + "frames_before_steady_state", 5); + ParseFieldTrial({&enabled, &framerate_limit, &qp, &undershoot_percentage, + &frames_before_steady_state}, + trials.Lookup("WebRTC-VP9VariableFramerateScreenshare")); + VariableFramerateExperiment config; + config.enabled = enabled.Get(); + config.framerate_limit = framerate_limit.Get(); + config.steady_state_qp = qp.Get(); + config.steady_state_undershoot_percentage = undershoot_percentage.Get(); + config.frames_before_steady_state = frames_before_steady_state.Get(); + + return config; +} + +// static +LibvpxVp9Encoder::QualityScalerExperiment +LibvpxVp9Encoder::ParseQualityScalerConfig(const FieldTrialsView& trials) { + FieldTrialFlag disabled = FieldTrialFlag("Disabled"); + FieldTrialParameter<int> low_qp("low_qp", kLowVp9QpThreshold); + FieldTrialParameter<int> high_qp("hihg_qp", kHighVp9QpThreshold); + ParseFieldTrial({&disabled, &low_qp, &high_qp}, + trials.Lookup("WebRTC-VP9QualityScaler")); + QualityScalerExperiment config; + config.enabled = !disabled.Get(); + RTC_LOG(LS_INFO) << "Webrtc quality scaler for vp9 is " + << (config.enabled ? "enabled." : "disabled"); + config.low_qp = low_qp.Get(); + config.high_qp = high_qp.Get(); + + return config; +} + +void LibvpxVp9Encoder::UpdatePerformanceFlags() { + flat_map<int, PerformanceFlags::ParameterSet> params_by_resolution; + if (codec_.GetVideoEncoderComplexity() == + VideoCodecComplexity::kComplexityLow) { + // For low tier devices, always use speed 9. Only disable upper + // layer deblocking below QCIF. + params_by_resolution[0] = {.base_layer_speed = 9, + .high_layer_speed = 9, + .deblock_mode = 1, + .allow_denoising = true}; + params_by_resolution[352 * 288] = {.base_layer_speed = 9, + .high_layer_speed = 9, + .deblock_mode = 0, + .allow_denoising = true}; + } else { + params_by_resolution = performance_flags_.settings_by_resolution; + } + + const auto find_speed = [&](int min_pixel_count) { + RTC_DCHECK(!params_by_resolution.empty()); + auto it = params_by_resolution.upper_bound(min_pixel_count); + return std::prev(it)->second; + }; + performance_flags_by_spatial_index_.clear(); + + if (is_svc_) { + for (int si = 0; si < num_spatial_layers_; ++si) { + performance_flags_by_spatial_index_.push_back(find_speed( + codec_.spatialLayers[si].width * codec_.spatialLayers[si].height)); + } + } else { + performance_flags_by_spatial_index_.push_back( + find_speed(codec_.width * codec_.height)); + } +} + +// static +LibvpxVp9Encoder::PerformanceFlags +LibvpxVp9Encoder::ParsePerformanceFlagsFromTrials( + const FieldTrialsView& trials) { + struct Params : public PerformanceFlags::ParameterSet { + int min_pixel_count = 0; + }; + + FieldTrialStructList<Params> trials_list( + {FieldTrialStructMember("min_pixel_count", + [](Params* p) { return &p->min_pixel_count; }), + FieldTrialStructMember("high_layer_speed", + [](Params* p) { return &p->high_layer_speed; }), + FieldTrialStructMember("base_layer_speed", + [](Params* p) { return &p->base_layer_speed; }), + FieldTrialStructMember("deblock_mode", + [](Params* p) { return &p->deblock_mode; }), + FieldTrialStructMember("denoiser", + [](Params* p) { return &p->allow_denoising; })}, + {}); + + FieldTrialFlag per_layer_speed("use_per_layer_speed"); + + ParseFieldTrial({&trials_list, &per_layer_speed}, + trials.Lookup("WebRTC-VP9-PerformanceFlags")); + + PerformanceFlags flags; + flags.use_per_layer_speed = per_layer_speed.Get(); + + constexpr int kMinSpeed = 1; + constexpr int kMaxSpeed = 9; + for (auto& f : trials_list.Get()) { + if (f.base_layer_speed < kMinSpeed || f.base_layer_speed > kMaxSpeed || + f.high_layer_speed < kMinSpeed || f.high_layer_speed > kMaxSpeed || + f.deblock_mode < 0 || f.deblock_mode > 2) { + RTC_LOG(LS_WARNING) << "Ignoring invalid performance flags: " + << "min_pixel_count = " << f.min_pixel_count + << ", high_layer_speed = " << f.high_layer_speed + << ", base_layer_speed = " << f.base_layer_speed + << ", deblock_mode = " << f.deblock_mode; + continue; + } + flags.settings_by_resolution[f.min_pixel_count] = f; + } + + if (flags.settings_by_resolution.empty()) { + return GetDefaultPerformanceFlags(); + } + + return flags; +} + +// static +LibvpxVp9Encoder::PerformanceFlags +LibvpxVp9Encoder::GetDefaultPerformanceFlags() { + PerformanceFlags flags; + flags.use_per_layer_speed = true; +#ifdef MOBILE_ARM + // Speed 8 on all layers for all resolutions. + flags.settings_by_resolution[0] = {.base_layer_speed = 8, + .high_layer_speed = 8, + .deblock_mode = 0, + .allow_denoising = true}; +#else + + // For smaller resolutions, use lower speed setting for the temporal base + // layer (get some coding gain at the cost of increased encoding complexity). + // Set encoder Speed 5 for TL0, encoder Speed 8 for upper temporal layers, and + // disable deblocking for upper-most temporal layers. + flags.settings_by_resolution[0] = {.base_layer_speed = 5, + .high_layer_speed = 8, + .deblock_mode = 1, + .allow_denoising = true}; + + // Use speed 7 for QCIF and above. + // Set encoder Speed 7 for TL0, encoder Speed 8 for upper temporal layers, and + // enable deblocking for all temporal layers. + flags.settings_by_resolution[352 * 288] = {.base_layer_speed = 7, + .high_layer_speed = 8, + .deblock_mode = 0, + .allow_denoising = true}; + + // For very high resolution (1080p and up), turn the speed all the way up + // since this is very CPU intensive. Also disable denoising to save CPU, at + // these resolutions denoising appear less effective and hopefully you also + // have a less noisy video source at this point. + flags.settings_by_resolution[1920 * 1080] = {.base_layer_speed = 9, + .high_layer_speed = 9, + .deblock_mode = 0, + .allow_denoising = false}; + +#endif + return flags; +} + +void LibvpxVp9Encoder::MaybeRewrapRawWithFormat(const vpx_img_fmt fmt) { + if (!raw_) { + raw_ = libvpx_->img_wrap(nullptr, fmt, codec_.width, codec_.height, 1, + nullptr); + } else if (raw_->fmt != fmt) { + RTC_LOG(LS_INFO) << "Switching VP9 encoder pixel format to " + << (fmt == VPX_IMG_FMT_NV12 ? "NV12" : "I420"); + libvpx_->img_free(raw_); + raw_ = libvpx_->img_wrap(nullptr, fmt, codec_.width, codec_.height, 1, + nullptr); + } + // else no-op since the image is already in the right format. +} + +rtc::scoped_refptr<VideoFrameBuffer> LibvpxVp9Encoder::PrepareBufferForProfile0( + rtc::scoped_refptr<VideoFrameBuffer> buffer) { + absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats> + supported_formats = {VideoFrameBuffer::Type::kI420, + VideoFrameBuffer::Type::kNV12}; + + rtc::scoped_refptr<VideoFrameBuffer> mapped_buffer; + if (buffer->type() != VideoFrameBuffer::Type::kNative) { + // `buffer` is already mapped. + mapped_buffer = buffer; + } else { + // Attempt to map to one of the supported formats. + mapped_buffer = buffer->GetMappedFrameBuffer(supported_formats); + } + if (!mapped_buffer || + (absl::c_find(supported_formats, mapped_buffer->type()) == + supported_formats.end() && + mapped_buffer->type() != VideoFrameBuffer::Type::kI420A)) { + // Unknown pixel format or unable to map, convert to I420 and prepare that + // buffer instead to ensure Scale() is safe to use. + auto converted_buffer = buffer->ToI420(); + if (!converted_buffer) { + RTC_LOG(LS_ERROR) << "Failed to convert " + << VideoFrameBufferTypeToString(buffer->type()) + << " image to I420. Can't encode frame."; + return {}; + } + RTC_CHECK(converted_buffer->type() == VideoFrameBuffer::Type::kI420 || + converted_buffer->type() == VideoFrameBuffer::Type::kI420A); + + // Because `buffer` had to be converted, use `converted_buffer` instead. + buffer = mapped_buffer = converted_buffer; + } + + // Prepare `raw_` from `mapped_buffer`. + switch (mapped_buffer->type()) { + case VideoFrameBuffer::Type::kI420: + case VideoFrameBuffer::Type::kI420A: { + MaybeRewrapRawWithFormat(VPX_IMG_FMT_I420); + const I420BufferInterface* i420_buffer = mapped_buffer->GetI420(); + RTC_DCHECK(i420_buffer); + raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(i420_buffer->DataY()); + raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(i420_buffer->DataU()); + raw_->planes[VPX_PLANE_V] = const_cast<uint8_t*>(i420_buffer->DataV()); + raw_->stride[VPX_PLANE_Y] = i420_buffer->StrideY(); + raw_->stride[VPX_PLANE_U] = i420_buffer->StrideU(); + raw_->stride[VPX_PLANE_V] = i420_buffer->StrideV(); + break; + } + case VideoFrameBuffer::Type::kNV12: { + MaybeRewrapRawWithFormat(VPX_IMG_FMT_NV12); + const NV12BufferInterface* nv12_buffer = mapped_buffer->GetNV12(); + RTC_DCHECK(nv12_buffer); + raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(nv12_buffer->DataY()); + raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(nv12_buffer->DataUV()); + raw_->planes[VPX_PLANE_V] = raw_->planes[VPX_PLANE_U] + 1; + raw_->stride[VPX_PLANE_Y] = nv12_buffer->StrideY(); + raw_->stride[VPX_PLANE_U] = nv12_buffer->StrideUV(); + raw_->stride[VPX_PLANE_V] = nv12_buffer->StrideUV(); + break; + } + default: + RTC_DCHECK_NOTREACHED(); + } + return mapped_buffer; +} + +} // namespace webrtc + +#endif // RTC_ENABLE_VP9 |